From cee9ef6ba986cc804e3ba333209d21d2c77390b0 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 26 Apr 2019 12:36:31 +0800 Subject: [PATCH 01/14] added virtualenv to python executable --- crawlab/config/config.py | 2 +- crawlab/config/config_local.py | 2 +- crawlab/tasks/spider.py | 44 ++++++++++++++++++++++------------ crawlab/utils/file.py | 12 ++++++++-- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/crawlab/config/config.py b/crawlab/config/config.py index afbcb9bf..08ab113c 100644 --- a/crawlab/config/config.py +++ b/crawlab/config/config.py @@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 -PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' +PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python' # 爬虫部署路径 # PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' diff --git a/crawlab/config/config_local.py b/crawlab/config/config_local.py index afbcb9bf..08ab113c 100644 --- a/crawlab/config/config_local.py +++ b/crawlab/config/config_local.py @@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 -PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' +PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python' # 爬虫部署路径 # PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py index 0d843e22..57dab0c8 100644 --- a/crawlab/tasks/spider.py +++ b/crawlab/tasks/spider.py @@ -1,4 +1,5 @@ import os +import sys from datetime import datetime from time import sleep @@ -35,8 +36,16 @@ def execute_spider(self, id: str, params: str = None): hostname = self.request.hostname spider = db_manager.get('spiders', id=id) command = spider.get('cmd') - if command.startswith("env"): - command = PYTHON_ENV_PATH + command.replace("env", "") + + # if start with python, then use sys.executable to execute in the virtualenv + if command.startswith('python '): + command = command.replace('python ', sys.executable + ' ') + + # if start with scrapy, then use sys.executable to execute scrapy as module in the virtualenv + elif command.startswith('scrapy '): + command = command.replace('scrapy ', sys.executable + ' -m scrapy ') + + # pass params to the command if params is not None: command += ' ' + params @@ -95,21 +104,26 @@ def execute_spider(self, id: str, params: str = None): # start process cmd_arr = command.split(' ') cmd_arr = list(filter(lambda x: x != '', cmd_arr)) - p = subprocess.Popen(cmd_arr, - stdout=stdout.fileno(), - stderr=stderr.fileno(), - cwd=current_working_directory, - env=env, - bufsize=1) + try: + p = subprocess.Popen(cmd_arr, + stdout=stdout.fileno(), + stderr=stderr.fileno(), + cwd=current_working_directory, + env=env, + bufsize=1) - # get output from the process - _stdout, _stderr = p.communicate() + # get output from the process + _stdout, _stderr = p.communicate() - # get return code - code = p.poll() - if code == 0: - status = TaskStatus.SUCCESS - else: + # get return code + code = p.poll() + if code == 0: + status = TaskStatus.SUCCESS + else: + status = TaskStatus.FAILURE + except Exception as err: + logger.error(err) + stderr.write(str(err)) status = TaskStatus.FAILURE # save task when the task is finished diff --git a/crawlab/utils/file.py b/crawlab/utils/file.py index d549c62d..06163d49 100644 --- a/crawlab/utils/file.py +++ b/crawlab/utils/file.py @@ -11,6 +11,7 @@ SUFFIX_LANG_MAPPING = { 'sh': 'shell', 'java': 'java', 'c': 'c', + 'go': 'go', } @@ -48,11 +49,18 @@ def get_file_suffix_stats(path) -> dict: Get suffix stats of given file :param path: file path """ - stats = defaultdict(int) + _stats = defaultdict(int) for file_path in get_file_list(path): suffix = get_file_suffix(file_path) if suffix is not None: - stats[suffix] += 1 + _stats[suffix] += 1 + + # only return suffixes with languages + stats = {} + for suffix, count in _stats.items(): + if SUFFIX_LANG_MAPPING.get(suffix) is not None: + stats[suffix] = count + return stats From a41415a0416607c497ba8f88ba3a8062ba391dd4 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 13:17:40 +0800 Subject: [PATCH 02/14] added analytics for spider --- crawlab/config/__init__.py | 9 +- crawlab/config/config_local.py | 55 ------ frontend/src/api/login 2.js | 27 --- frontend/src/api/request 2.js | 46 ----- frontend/src/api/request.js | 1 + frontend/src/components/Stats/MetricCard.vue | 89 +++++++++ frontend/src/components/Stats/SpiderStats.vue | 175 ++++++++++++++++++ frontend/src/i18n/zh.js | 1 + frontend/src/store/modules/spider.js | 28 ++- frontend/src/views/spider/SpiderDetail.vue | 10 + 10 files changed, 304 insertions(+), 137 deletions(-) delete mode 100644 crawlab/config/config_local.py delete mode 100644 frontend/src/api/login 2.js delete mode 100644 frontend/src/api/request 2.js create mode 100644 frontend/src/components/Stats/MetricCard.vue create mode 100644 frontend/src/components/Stats/SpiderStats.vue diff --git a/crawlab/config/__init__.py b/crawlab/config/__init__.py index 609b69de..4d2d8d10 100644 --- a/crawlab/config/__init__.py +++ b/crawlab/config/__init__.py @@ -1,10 +1,3 @@ # encoding: utf-8 -import os - -run_env = os.environ.get("RUNENV", "local") - -if run_env == "local": # 加载本地配置 - from config.config_local import * -else: - from config.config import * +from config.config import * diff --git a/crawlab/config/config_local.py b/crawlab/config/config_local.py deleted file mode 100644 index 08ab113c..00000000 --- a/crawlab/config/config_local.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -# 爬虫源码路径 -PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") - -# 配置python虚拟环境的路径 -PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python' - -# 爬虫部署路径 -# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' -PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab' - -# 爬虫日志路径 -PROJECT_LOGS_FOLDER = '../deployfile/logs' - -# 打包临时文件夹 -PROJECT_TMP_FOLDER = '/tmp' - -# Celery中间者URL -BROKER_URL = 'redis://127.0.0.1:6379/0' - -# Celery后台URL -CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/' - -# Celery MongoDB设置 -CELERY_MONGODB_BACKEND_SETTINGS = { - 'database': 'crawlab_test', - 'taskmeta_collection': 'tasks_celery', -} - -# Celery时区 -CELERY_TIMEZONE = 'Asia/Shanghai' - -# 是否启用UTC -CELERY_ENABLE_UTC = True - -# Celery Scheduler Redis URL -CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler' -CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379' -CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks' - -# flower variables -FLOWER_API_ENDPOINT = 'http://localhost:5555/api' - -# MongoDB 变量 -MONGO_HOST = '127.0.0.1' -MONGO_PORT = 27017 -MONGO_DB = 'crawlab_test' - -# Flask 变量 -DEBUG = True -FLASK_HOST = '127.0.0.1' -FLASK_PORT = 8000 diff --git a/frontend/src/api/login 2.js b/frontend/src/api/login 2.js deleted file mode 100644 index 4699f07e..00000000 --- a/frontend/src/api/login 2.js +++ /dev/null @@ -1,27 +0,0 @@ -import request from '@/utils/request' - -export function login (username, password) { - return request({ - url: '/user/login', - method: 'post', - data: { - username, - password - } - }) -} - -export function getInfo (token) { - return request({ - url: '/user/info', - method: 'get', - params: { token } - }) -} - -export function logout () { - return request({ - url: '/user/logout', - method: 'post' - }) -} diff --git a/frontend/src/api/request 2.js b/frontend/src/api/request 2.js deleted file mode 100644 index 6ec95917..00000000 --- a/frontend/src/api/request 2.js +++ /dev/null @@ -1,46 +0,0 @@ -import axios from 'axios' - -let baseUrl = 'http://localhost:8000/api' -if (process.env.NODE_ENV === 'production') { - baseUrl = 'http://139.129.230.98:8000/api' -} -// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api' - -const request = (method, path, params, data) => { - return new Promise((resolve, reject) => { - const url = `${baseUrl}${path}` - axios({ - method, - url, - params, - data - }) - .then(resolve) - .catch(reject) - }) -} - -const get = (path, params) => { - return request('GET', path, params) -} - -const post = (path, data) => { - return request('POST', path, {}, data) -} - -const put = (path, data) => { - return request('PUT', path, {}, data) -} - -const del = (path, data) => { - return request('DELETE', path) -} - -export default { - baseUrl, - request, - get, - post, - put, - delete: del -} diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js index 6ec95917..53603af8 100644 --- a/frontend/src/api/request.js +++ b/frontend/src/api/request.js @@ -4,6 +4,7 @@ let baseUrl = 'http://localhost:8000/api' if (process.env.NODE_ENV === 'production') { baseUrl = 'http://139.129.230.98:8000/api' } +// console.log(process.env) // const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api' const request = (method, path, params, data) => { diff --git a/frontend/src/components/Stats/MetricCard.vue b/frontend/src/components/Stats/MetricCard.vue new file mode 100644 index 00000000..14658dbd --- /dev/null +++ b/frontend/src/components/Stats/MetricCard.vue @@ -0,0 +1,89 @@ + + + + + diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue new file mode 100644 index 00000000..6abb9527 --- /dev/null +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -0,0 +1,175 @@ + + + + + diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 13baddc7..d56cf69a 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -18,6 +18,7 @@ export default { 'Log': '日志', 'Results': '结果', 'Environment': '环境', + 'Analytics': '分析', // 选择 Spider: '爬虫', diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index 1e18ecde..bb7d948f 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -14,7 +14,16 @@ const state = { importForm: { url: '', type: 'github' - } + }, + + // spider overview stats + overviewStats: {}, + + // spider status stats + statusStats: [], + + // spider daily stats + dailyStats: [] } const getters = {} @@ -31,6 +40,15 @@ const mutations = { }, SET_IMPORT_FORM (state, value) { state.importForm = value + }, + SET_OVERVIEW_STATS (state, value) { + state.overviewStats = value + }, + SET_STATUS_STATS (state, value) { + state.statusStats = value + }, + SET_DAILY_STATS (state, value) { + state.dailyStats = value } } @@ -138,6 +156,14 @@ const actions = { .then(response => { console.log(response) }) + }, + getSpiderStats ({ state, commit }) { + return request.get('/stats/get_spider_stats?spider_id=' + state.spiderForm._id) + .then(response => { + commit('SET_OVERVIEW_STATS', response.data.overview) + commit('SET_STATUS_STATS', response.data.task_count_by_status) + commit('SET_DAILY_STATS', response.data.daily_stats) + }) } } diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index 56cf7982..dac7931e 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -19,6 +19,9 @@ + + + @@ -30,10 +33,12 @@ import { import FileList from '../../components/FileList/FileList' import SpiderOverview from '../../components/Overview/SpiderOverview' import EnvironmentList from '../../components/Environment/EnvironmentList' +import SpiderStats from '../../components/Stats/SpiderStats' export default { name: 'NodeDetail', components: { + SpiderStats, EnvironmentList, FileList, SpiderOverview @@ -57,6 +62,11 @@ export default { }, methods: { onTabClick () { + if (this.activeTabName === 'analytics') { + setTimeout(() => { + this.$refs['spider-stats'].update() + }, 0) + } }, onSpiderChange (id) { this.$router.push(`/spiders/${id}`) From 21ac67256487864311e195c9957d877ab891092e Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 13:18:03 +0800 Subject: [PATCH 03/14] added stats for spider --- crawlab/routes/stats.py | 107 +++++++++++++++++++++++++++++++++++++++- crawlab/tasks/spider.py | 4 +- 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index aa7432c6..3d5eb7db 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -1,13 +1,20 @@ import os +from collections import defaultdict from datetime import datetime, timedelta from flask_restful import reqparse, Resource +from constants.task import TaskStatus from db.manager import db_manager +from routes.base import BaseApi from utils import jsonify -class StatsApi(Resource): +class StatsApi(BaseApi): + arguments = [ + ['spider_id', str], + ] + def get(self, action: str = None) -> (dict, tuple): """ GET method of StatsApi. @@ -87,3 +94,101 @@ class StatsApi(Resource): }, 'daily_tasks': daily_tasks } + + def get_spider_stats(self): + args = self.parser.parse_args() + spider_id = args.get('spider_id') + spider = db_manager.get('spiders', id=spider_id) + tasks = db_manager.list( + col_name='tasks', + cond={ + 'spider_id': spider['_id'], + 'create_ts': { + '$gte': datetime.now() - timedelta(30) + } + }, + limit=9999999 + ) + + # task count + task_count = len(tasks) + + # calculate task count by status + task_count_by_status = defaultdict(int) + total_seconds = 0 + for task in tasks: + task_count_by_status[task['status']] += 1 + if task['status'] == TaskStatus.SUCCESS and task.get('finish_ts'): + duration = (task['finish_ts'] - task['create_ts']).total_seconds() + total_seconds += duration + + task_count_by_status_ = [] + for status, value in task_count_by_status.items(): + task_count_by_status_.append({ + 'name': status, + 'value': value + }) + + # success rate + success_rate = task_count_by_status[TaskStatus.SUCCESS] / task_count + + # average duration + avg_duration = total_seconds / task_count + + # calculate task count by date + cur = db_manager.aggregate('tasks', [ + { + '$match': { + 'spider_id': spider['_id'] + } + }, + { + '$project': { + 'date': { + '$dateToString': { + 'format': '%Y-%m-%d', + 'date': '$create_ts' + } + } + } + }, + { + '$group': { + '_id': '$date', + 'count': { + '$sum': 1 + } + } + }, + { + '$sort': { + '_id': 1 + } + } + ]) + date_cache = {} + for item in cur: + date_cache[item['_id']] = item['count'] + start_date = datetime.now() - timedelta(31) + end_date = datetime.now() - timedelta(1) + date = start_date + daily_tasks = [] + while date < end_date: + date = date + timedelta(1) + date_str = date.strftime('%Y-%m-%d') + daily_tasks.append({ + 'date': date_str, + 'count': date_cache.get(date_str) or 0, + }) + + return { + 'status': 'ok', + 'overview': { + 'task_count': task_count, + 'result_count': 800, + 'success_rate': success_rate, + 'avg_duration': avg_duration + }, + 'task_count_by_status': task_count_by_status_, + 'daily_stats': daily_tasks, + } diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py index 57dab0c8..48cafc27 100644 --- a/crawlab/tasks/spider.py +++ b/crawlab/tasks/spider.py @@ -127,8 +127,10 @@ def execute_spider(self, id: str, params: str = None): status = TaskStatus.FAILURE # save task when the task is finished + finish_ts = datetime.utcnow() db_manager.update_one('tasks', id=task_id, values={ - 'finish_ts': datetime.utcnow(), + 'finish_ts': finish_ts, + 'duration': (finish_ts - task['create_ts']).total_seconds(), 'status': status }) task = db_manager.get('tasks', id=id) From 16a958a6e4fd7e65bda29be30bf39fd08a35b281 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:23:21 +0800 Subject: [PATCH 04/14] updated stats for spider --- crawlab/routes/stats.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index 3d5eb7db..abc6f7d4 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -149,6 +149,12 @@ class StatsApi(BaseApi): 'format': '%Y-%m-%d', 'date': '$create_ts' } + }, + 'duration': { + '$subtract': [ + '$finish_ts', + '$create_ts' + ] } } }, @@ -157,6 +163,9 @@ class StatsApi(BaseApi): '_id': '$date', 'count': { '$sum': 1 + }, + 'duration': { + '$avg': '$duration' } } }, @@ -168,7 +177,10 @@ class StatsApi(BaseApi): ]) date_cache = {} for item in cur: - date_cache[item['_id']] = item['count'] + date_cache[item['_id']] = { + 'duration': item['duration'] / 1000, + 'count': item['count'] + } start_date = datetime.now() - timedelta(31) end_date = datetime.now() - timedelta(1) date = start_date @@ -176,16 +188,33 @@ class StatsApi(BaseApi): while date < end_date: date = date + timedelta(1) date_str = date.strftime('%Y-%m-%d') - daily_tasks.append({ + d = date_cache.get(date_str) + row = { 'date': date_str, - 'count': date_cache.get(date_str) or 0, - }) + } + if d is None: + row['count'] = 0 + row['duration'] = 0 + else: + row['count'] = d['count'] + row['duration'] = d['duration'] + daily_tasks.append(row) + + # calculate total results + result_count = 0 + col_name = spider.get('col') + if col_name is not None: + for task in tasks: + result_count += db_manager.count(col_name, {'task_id': task['_id']}) + + # top tasks + # top_10_tasks = db_manager.list('tasks', {'spider_id': spider['_id']}) return { 'status': 'ok', 'overview': { 'task_count': task_count, - 'result_count': 800, + 'result_count': result_count, 'success_rate': success_rate, 'avg_duration': avg_duration }, From 70d392ef62ee70f28bbf1e3071eeebdddb270de8 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:24:03 +0800 Subject: [PATCH 05/14] updated analytics for spider --- frontend/src/components/Stats/SpiderStats.vue | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index 6abb9527..5bc5608b 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -41,13 +41,13 @@ -

{{$t('Tasks by Status')}}

+

{{$t('Long Tasks')}}

-

{{$t('Tasks by Status')}}

+

{{$t('Daily Duration')}}

@@ -74,9 +74,22 @@ export default { type: 'pie', radius: ['50%', '70%'], data: this.statusStats.map(d => { + let color + if (d.name === 'SUCCESS') { + color = '#67c23a' + } else if (d.name === 'STARTED') { + color = '#e6a23c' + } else if (d.name === 'FAILURE') { + color = '#f56c6c' + } else { + color = 'grey' + } return { name: this.$t(d.name), - value: d.value + value: d.value, + itemStyle: { + color + } } }) }] @@ -112,9 +125,38 @@ export default { chart.setOption(option) }, + renderDurationLine () { + const chart = echarts.init(this.$el.querySelector('#duration-line')) + const option = { + grid: { + top: 20, + bottom: 40 + }, + xAxis: { + type: 'category', + data: this.dailyStats.map(d => d.date) + }, + yAxis: { + type: 'value' + }, + series: [{ + type: 'line', + data: this.dailyStats.map(d => d.duration), + areaStyle: {}, + smooth: true + }], + tooltip: { + trigger: 'axis', + show: true + } + } + chart.setOption(option) + }, + render () { this.renderTaskPie() this.renderTaskLine() + this.renderDurationLine() }, update () { From efc32886ae324cf24001e3d655639a889e8fa85a Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:32:18 +0800 Subject: [PATCH 06/14] updated analytics for spider --- frontend/src/components/Stats/SpiderStats.vue | 35 +++++++++++++++---- frontend/src/store/modules/spider.js | 11 ++++-- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index 5bc5608b..24a9b257 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -27,7 +27,7 @@

{{$t('Tasks by Status')}}

-
+
@@ -41,8 +41,8 @@ -

{{$t('Long Tasks')}}

- +

{{$t('Tasks by Node')}}

+
@@ -66,8 +66,8 @@ export default { name: 'SpiderStats', components: { MetricCard }, methods: { - renderTaskPie () { - const chart = echarts.init(this.$el.querySelector('#task-pie')) + renderTaskPieStatus () { + const chart = echarts.init(this.$el.querySelector('#task-pie-status')) const option = { series: [{ name: '', @@ -97,6 +97,27 @@ export default { chart.setOption(option) }, + renderTaskPieNode () { + const chart = echarts.init(this.$el.querySelector('#task-pie-node')) + const option = { + series: [{ + name: '', + type: 'pie', + radius: ['50%', '70%'], + data: this.nodeStats.map(d => { + return { + name: d.name, + value: d.value + // itemStyle: { + // color + // } + } + }) + }] + } + chart.setOption(option) + }, + renderTaskLine () { const chart = echarts.init(this.$el.querySelector('#task-line')) const option = { @@ -154,8 +175,9 @@ export default { }, render () { - this.renderTaskPie() + this.renderTaskPieStatus() this.renderTaskLine() + this.renderTaskPieNode() this.renderDurationLine() }, @@ -180,6 +202,7 @@ export default { ...mapState('spider', [ 'overviewStats', 'statusStats', + 'nodeStats', 'dailyStats' ]) }, diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index bb7d948f..b8345082 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -23,7 +23,10 @@ const state = { statusStats: [], // spider daily stats - dailyStats: [] + dailyStats: [], + + // spider node stats + nodeStats: [] } const getters = {} @@ -49,7 +52,10 @@ const mutations = { }, SET_DAILY_STATS (state, value) { state.dailyStats = value - } + }, + SET_NODE_STATS (state, value) { + state.nodeStats = value + }, } const actions = { @@ -163,6 +169,7 @@ const actions = { commit('SET_OVERVIEW_STATS', response.data.overview) commit('SET_STATUS_STATS', response.data.task_count_by_status) commit('SET_DAILY_STATS', response.data.daily_stats) + commit('SET_NODE_STATS', response.data.task_count_by_node) }) } } From c497cf2c8b31b113fd83d6509e820650e476e64b Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:32:23 +0800 Subject: [PATCH 07/14] updated stats for spider --- crawlab/routes/stats.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index abc6f7d4..885ab595 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -113,15 +113,27 @@ class StatsApi(BaseApi): # task count task_count = len(tasks) - # calculate task count by status + # calculate task count stats task_count_by_status = defaultdict(int) + task_count_by_node = defaultdict(int) total_seconds = 0 for task in tasks: task_count_by_status[task['status']] += 1 + task_count_by_node[task.get('node_id')] += 1 if task['status'] == TaskStatus.SUCCESS and task.get('finish_ts'): duration = (task['finish_ts'] - task['create_ts']).total_seconds() total_seconds += duration + + # task count by node + task_count_by_node_ = [] + for status, value in task_count_by_node.items(): + task_count_by_node_.append({ + 'name': status, + 'value': value + }) + + # task count by status task_count_by_status_ = [] for status, value in task_count_by_status.items(): task_count_by_status_.append({ @@ -219,5 +231,6 @@ class StatsApi(BaseApi): 'avg_duration': avg_duration }, 'task_count_by_status': task_count_by_status_, + 'task_count_by_node': task_count_by_node_, 'daily_stats': daily_tasks, } From 2445868102975f02fec11020a66413416cdf1b25 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:37:48 +0800 Subject: [PATCH 08/14] updated stats for spider --- crawlab/routes/stats.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index 885ab595..93264522 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -124,7 +124,6 @@ class StatsApi(BaseApi): duration = (task['finish_ts'] - task['create_ts']).total_seconds() total_seconds += duration - # task count by node task_count_by_node_ = [] for status, value in task_count_by_node.items(): @@ -217,7 +216,12 @@ class StatsApi(BaseApi): col_name = spider.get('col') if col_name is not None: for task in tasks: - result_count += db_manager.count(col_name, {'task_id': task['_id']}) + result_count += db_manager.count(col_name, { + 'task_id': task['_id'], + 'create_ts': { + '$gte': datetime.now() - timedelta(30) + } + }) # top tasks # top_10_tasks = db_manager.list('tasks', {'spider_id': spider['_id']}) From 8a34aae3e6d8f355b5e9f0b508d5533a3600cd59 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:38:17 +0800 Subject: [PATCH 09/14] updated stats for spider --- crawlab/routes/stats.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py index 93264522..319867f9 100644 --- a/crawlab/routes/stats.py +++ b/crawlab/routes/stats.py @@ -216,12 +216,7 @@ class StatsApi(BaseApi): col_name = spider.get('col') if col_name is not None: for task in tasks: - result_count += db_manager.count(col_name, { - 'task_id': task['_id'], - 'create_ts': { - '$gte': datetime.now() - timedelta(30) - } - }) + result_count += db_manager.count(col_name, {'task_id': task['_id']}) # top tasks # top_10_tasks = db_manager.list('tasks', {'spider_id': spider['_id']}) From 60d658de5fdeef6363353e4b9b681c807189b839 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:41:44 +0800 Subject: [PATCH 10/14] updated analytics for spider --- frontend/src/components/Stats/SpiderStats.vue | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index 24a9b257..be2a3de0 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -47,7 +47,7 @@ -

{{$t('Daily Duration')}}

+

{{$t('Daily Duration (sec)')}}

@@ -69,10 +69,13 @@ export default { renderTaskPieStatus () { const chart = echarts.init(this.$el.querySelector('#task-pie-status')) const option = { + tooltip: { + show: true + }, series: [{ name: '', type: 'pie', - radius: ['50%', '70%'], + // radius: ['50%', '70%'], data: this.statusStats.map(d => { let color if (d.name === 'SUCCESS') { @@ -100,10 +103,13 @@ export default { renderTaskPieNode () { const chart = echarts.init(this.$el.querySelector('#task-pie-node')) const option = { + tooltip: { + show: true + }, series: [{ name: '', type: 'pie', - radius: ['50%', '70%'], + // radius: ['50%', '70%'], data: this.nodeStats.map(d => { return { name: d.name, From 9bd819c384a4cecdbe4e16305c81e9009bfc1b6a Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 19:52:10 +0800 Subject: [PATCH 11/14] updated analytics for spider --- frontend/src/components/Stats/SpiderStats.vue | 2 +- frontend/src/i18n/zh.js | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index be2a3de0..0227ef2c 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -47,7 +47,7 @@
-

{{$t('Daily Duration (sec)')}}

+

{{$t('Daily Avg Duration (sec)')}}

diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index d56cf69a..c3d3dc6d 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -89,6 +89,14 @@ export default { 'Add Environment Variables': '添加环境变量', 'Last 7-Day Tasks': '最近7天任务数', 'Last 5-Run Errors': '最近5次运行错误数', + '30-Day Tasks': '最近30天任务数', + '30-Day Results': '最近30天结果数', + 'Success Rate': '运行成功率', + 'Avg Duration (sec)': '平均运行时长(秒)', + 'Tasks by Status': '分状态任务数', + 'Tasks by Node': '分节点任务数', + 'Daily Tasks': '每日任务数', + 'Daily Avg Duration (sec)': '每日平均运行时长(秒)', // 爬虫列表 'Name': '名称', From 21f7864c68e69c682c00eb786ca9b5fce154cb4b Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 28 Apr 2019 21:16:31 +0800 Subject: [PATCH 12/14] updated analytics for spider --- frontend/src/components/Stats/SpiderStats.vue | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index 0227ef2c..e98e6f3e 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -1,5 +1,5 @@