diff --git a/crawlab/config/__init__.py b/crawlab/config/__init__.py
index 609b69de..4d2d8d10 100644
--- a/crawlab/config/__init__.py
+++ b/crawlab/config/__init__.py
@@ -1,10 +1,3 @@
# encoding: utf-8
-import os
-
-run_env = os.environ.get("RUNENV", "local")
-
-if run_env == "local": # 加载本地配置
- from config.config_local import *
-else:
- from config.config import *
+from config.config import *
diff --git a/crawlab/config/config.py b/crawlab/config/config.py
index afbcb9bf..08ab113c 100644
--- a/crawlab/config/config.py
+++ b/crawlab/config/config.py
@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
# 配置python虚拟环境的路径
-PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
+PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python'
# 爬虫部署路径
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
diff --git a/crawlab/config/config_local.py b/crawlab/config/config_local.py
deleted file mode 100644
index afbcb9bf..00000000
--- a/crawlab/config/config_local.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import os
-
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-# 爬虫源码路径
-PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
-
-# 配置python虚拟环境的路径
-PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
-
-# 爬虫部署路径
-# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
-PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
-
-# 爬虫日志路径
-PROJECT_LOGS_FOLDER = '../deployfile/logs'
-
-# 打包临时文件夹
-PROJECT_TMP_FOLDER = '/tmp'
-
-# Celery中间者URL
-BROKER_URL = 'redis://127.0.0.1:6379/0'
-
-# Celery后台URL
-CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
-
-# Celery MongoDB设置
-CELERY_MONGODB_BACKEND_SETTINGS = {
- 'database': 'crawlab_test',
- 'taskmeta_collection': 'tasks_celery',
-}
-
-# Celery时区
-CELERY_TIMEZONE = 'Asia/Shanghai'
-
-# 是否启用UTC
-CELERY_ENABLE_UTC = True
-
-# Celery Scheduler Redis URL
-CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler'
-CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379'
-CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks'
-
-# flower variables
-FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
-
-# MongoDB 变量
-MONGO_HOST = '127.0.0.1'
-MONGO_PORT = 27017
-MONGO_DB = 'crawlab_test'
-
-# Flask 变量
-DEBUG = True
-FLASK_HOST = '127.0.0.1'
-FLASK_PORT = 8000
diff --git a/crawlab/routes/stats.py b/crawlab/routes/stats.py
index aa7432c6..fe43e7a9 100644
--- a/crawlab/routes/stats.py
+++ b/crawlab/routes/stats.py
@@ -1,13 +1,20 @@
import os
+from collections import defaultdict
from datetime import datetime, timedelta
from flask_restful import reqparse, Resource
+from constants.task import TaskStatus
from db.manager import db_manager
+from routes.base import BaseApi
from utils import jsonify
-class StatsApi(Resource):
+class StatsApi(BaseApi):
+ arguments = [
+ ['spider_id', str],
+ ]
+
def get(self, action: str = None) -> (dict, tuple):
"""
GET method of StatsApi.
@@ -87,3 +94,142 @@ class StatsApi(Resource):
},
'daily_tasks': daily_tasks
}
+
+ def get_spider_stats(self):
+ args = self.parser.parse_args()
+ spider_id = args.get('spider_id')
+ spider = db_manager.get('spiders', id=spider_id)
+ tasks = db_manager.list(
+ col_name='tasks',
+ cond={
+ 'spider_id': spider['_id'],
+ 'create_ts': {
+ '$gte': datetime.now() - timedelta(30)
+ }
+ },
+ limit=9999999
+ )
+
+ # task count
+ task_count = len(tasks)
+
+ # calculate task count stats
+ task_count_by_status = defaultdict(int)
+ task_count_by_node = defaultdict(int)
+ total_seconds = 0
+ for task in tasks:
+ task_count_by_status[task['status']] += 1
+ task_count_by_node[task.get('node_id')] += 1
+ if task['status'] == TaskStatus.SUCCESS and task.get('finish_ts'):
+ duration = (task['finish_ts'] - task['create_ts']).total_seconds()
+ total_seconds += duration
+
+ # task count by node
+ task_count_by_node_ = []
+ for status, value in task_count_by_node.items():
+ task_count_by_node_.append({
+ 'name': status,
+ 'value': value
+ })
+
+ # task count by status
+ task_count_by_status_ = []
+ for status, value in task_count_by_status.items():
+ task_count_by_status_.append({
+ 'name': status,
+ 'value': value
+ })
+
+ # success rate
+ success_rate = task_count_by_status[TaskStatus.SUCCESS] / task_count
+
+ # average duration
+ avg_duration = total_seconds / task_count
+
+ # calculate task count by date
+ cur = db_manager.aggregate('tasks', [
+ {
+ '$match': {
+ 'spider_id': spider['_id']
+ }
+ },
+ {
+ '$project': {
+ 'date': {
+ '$dateToString': {
+ 'format': '%Y-%m-%d',
+ 'date': '$create_ts'
+ }
+ },
+ 'duration': {
+ '$subtract': [
+ '$finish_ts',
+ '$create_ts'
+ ]
+ }
+ }
+ },
+ {
+ '$group': {
+ '_id': '$date',
+ 'count': {
+ '$sum': 1
+ },
+ 'duration': {
+ '$avg': '$duration'
+ }
+ }
+ },
+ {
+ '$sort': {
+ '_id': 1
+ }
+ }
+ ])
+ date_cache = {}
+ for item in cur:
+ date_cache[item['_id']] = {
+ 'duration': (item['duration'] or 0) / 1000,
+ 'count': item['count']
+ }
+ start_date = datetime.now() - timedelta(31)
+ end_date = datetime.now() - timedelta(1)
+ date = start_date
+ daily_tasks = []
+ while date < end_date:
+ date = date + timedelta(1)
+ date_str = date.strftime('%Y-%m-%d')
+ d = date_cache.get(date_str)
+ row = {
+ 'date': date_str,
+ }
+ if d is None:
+ row['count'] = 0
+ row['duration'] = 0
+ else:
+ row['count'] = d['count']
+ row['duration'] = d['duration']
+ daily_tasks.append(row)
+
+ # calculate total results
+ result_count = 0
+ col_name = spider.get('col')
+ if col_name is not None:
+ for task in tasks:
+ result_count += db_manager.count(col_name, {'task_id': task['_id']})
+
+ # top tasks
+ # top_10_tasks = db_manager.list('tasks', {'spider_id': spider['_id']})
+
+ return {
+ 'status': 'ok',
+ 'overview': {
+ 'task_count': task_count,
+ 'result_count': result_count,
+ 'success_rate': success_rate,
+ 'avg_duration': avg_duration
+ },
+ 'task_count_by_status': task_count_by_status_,
+ 'task_count_by_node': task_count_by_node_,
+ 'daily_stats': daily_tasks,
+ }
diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py
index 0d843e22..48cafc27 100644
--- a/crawlab/tasks/spider.py
+++ b/crawlab/tasks/spider.py
@@ -1,4 +1,5 @@
import os
+import sys
from datetime import datetime
from time import sleep
@@ -35,8 +36,16 @@ def execute_spider(self, id: str, params: str = None):
hostname = self.request.hostname
spider = db_manager.get('spiders', id=id)
command = spider.get('cmd')
- if command.startswith("env"):
- command = PYTHON_ENV_PATH + command.replace("env", "")
+
+ # if start with python, then use sys.executable to execute in the virtualenv
+ if command.startswith('python '):
+ command = command.replace('python ', sys.executable + ' ')
+
+ # if start with scrapy, then use sys.executable to execute scrapy as module in the virtualenv
+ elif command.startswith('scrapy '):
+ command = command.replace('scrapy ', sys.executable + ' -m scrapy ')
+
+ # pass params to the command
if params is not None:
command += ' ' + params
@@ -95,26 +104,33 @@ def execute_spider(self, id: str, params: str = None):
# start process
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
- p = subprocess.Popen(cmd_arr,
- stdout=stdout.fileno(),
- stderr=stderr.fileno(),
- cwd=current_working_directory,
- env=env,
- bufsize=1)
+ try:
+ p = subprocess.Popen(cmd_arr,
+ stdout=stdout.fileno(),
+ stderr=stderr.fileno(),
+ cwd=current_working_directory,
+ env=env,
+ bufsize=1)
- # get output from the process
- _stdout, _stderr = p.communicate()
+ # get output from the process
+ _stdout, _stderr = p.communicate()
- # get return code
- code = p.poll()
- if code == 0:
- status = TaskStatus.SUCCESS
- else:
+ # get return code
+ code = p.poll()
+ if code == 0:
+ status = TaskStatus.SUCCESS
+ else:
+ status = TaskStatus.FAILURE
+ except Exception as err:
+ logger.error(err)
+ stderr.write(str(err))
status = TaskStatus.FAILURE
# save task when the task is finished
+ finish_ts = datetime.utcnow()
db_manager.update_one('tasks', id=task_id, values={
- 'finish_ts': datetime.utcnow(),
+ 'finish_ts': finish_ts,
+ 'duration': (finish_ts - task['create_ts']).total_seconds(),
'status': status
})
task = db_manager.get('tasks', id=id)
diff --git a/crawlab/utils/file.py b/crawlab/utils/file.py
index d549c62d..06163d49 100644
--- a/crawlab/utils/file.py
+++ b/crawlab/utils/file.py
@@ -11,6 +11,7 @@ SUFFIX_LANG_MAPPING = {
'sh': 'shell',
'java': 'java',
'c': 'c',
+ 'go': 'go',
}
@@ -48,11 +49,18 @@ def get_file_suffix_stats(path) -> dict:
Get suffix stats of given file
:param path: file path
"""
- stats = defaultdict(int)
+ _stats = defaultdict(int)
for file_path in get_file_list(path):
suffix = get_file_suffix(file_path)
if suffix is not None:
- stats[suffix] += 1
+ _stats[suffix] += 1
+
+ # only return suffixes with languages
+ stats = {}
+ for suffix, count in _stats.items():
+ if SUFFIX_LANG_MAPPING.get(suffix) is not None:
+ stats[suffix] = count
+
return stats
diff --git a/frontend/src/api/login 2.js b/frontend/src/api/login 2.js
deleted file mode 100644
index 4699f07e..00000000
--- a/frontend/src/api/login 2.js
+++ /dev/null
@@ -1,27 +0,0 @@
-import request from '@/utils/request'
-
-export function login (username, password) {
- return request({
- url: '/user/login',
- method: 'post',
- data: {
- username,
- password
- }
- })
-}
-
-export function getInfo (token) {
- return request({
- url: '/user/info',
- method: 'get',
- params: { token }
- })
-}
-
-export function logout () {
- return request({
- url: '/user/logout',
- method: 'post'
- })
-}
diff --git a/frontend/src/api/request 2.js b/frontend/src/api/request 2.js
deleted file mode 100644
index 6ec95917..00000000
--- a/frontend/src/api/request 2.js
+++ /dev/null
@@ -1,46 +0,0 @@
-import axios from 'axios'
-
-let baseUrl = 'http://localhost:8000/api'
-if (process.env.NODE_ENV === 'production') {
- baseUrl = 'http://139.129.230.98:8000/api'
-}
-// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
-
-const request = (method, path, params, data) => {
- return new Promise((resolve, reject) => {
- const url = `${baseUrl}${path}`
- axios({
- method,
- url,
- params,
- data
- })
- .then(resolve)
- .catch(reject)
- })
-}
-
-const get = (path, params) => {
- return request('GET', path, params)
-}
-
-const post = (path, data) => {
- return request('POST', path, {}, data)
-}
-
-const put = (path, data) => {
- return request('PUT', path, {}, data)
-}
-
-const del = (path, data) => {
- return request('DELETE', path)
-}
-
-export default {
- baseUrl,
- request,
- get,
- post,
- put,
- delete: del
-}
diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js
index 6ec95917..53603af8 100644
--- a/frontend/src/api/request.js
+++ b/frontend/src/api/request.js
@@ -4,6 +4,7 @@ let baseUrl = 'http://localhost:8000/api'
if (process.env.NODE_ENV === 'production') {
baseUrl = 'http://139.129.230.98:8000/api'
}
+// console.log(process.env)
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
const request = (method, path, params, data) => {
diff --git a/frontend/src/components/Stats/MetricCard.vue b/frontend/src/components/Stats/MetricCard.vue
new file mode 100644
index 00000000..14658dbd
--- /dev/null
+++ b/frontend/src/components/Stats/MetricCard.vue
@@ -0,0 +1,89 @@
+
+
+
+
+
+
+
+
+
+
+ {{value}}
+
+
+
+
+
+
+
+
diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue
new file mode 100644
index 00000000..516c6b0e
--- /dev/null
+++ b/frontend/src/components/Stats/SpiderStats.vue
@@ -0,0 +1,258 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{$t('Tasks by Status')}}
+
+
+
+
+
+ {{$t('Daily Tasks')}}
+
+
+
+
+
+
+
+
+ {{$t('Tasks by Node')}}
+
+
+
+
+
+ {{$t('Daily Avg Duration (sec)')}}
+
+
+
+
+
+
+
+
+
+
diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js
index 13baddc7..c3d3dc6d 100644
--- a/frontend/src/i18n/zh.js
+++ b/frontend/src/i18n/zh.js
@@ -18,6 +18,7 @@ export default {
'Log': '日志',
'Results': '结果',
'Environment': '环境',
+ 'Analytics': '分析',
// 选择
Spider: '爬虫',
@@ -88,6 +89,14 @@ export default {
'Add Environment Variables': '添加环境变量',
'Last 7-Day Tasks': '最近7天任务数',
'Last 5-Run Errors': '最近5次运行错误数',
+ '30-Day Tasks': '最近30天任务数',
+ '30-Day Results': '最近30天结果数',
+ 'Success Rate': '运行成功率',
+ 'Avg Duration (sec)': '平均运行时长(秒)',
+ 'Tasks by Status': '分状态任务数',
+ 'Tasks by Node': '分节点任务数',
+ 'Daily Tasks': '每日任务数',
+ 'Daily Avg Duration (sec)': '每日平均运行时长(秒)',
// 爬虫列表
'Name': '名称',
diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js
index 1e18ecde..b8345082 100644
--- a/frontend/src/store/modules/spider.js
+++ b/frontend/src/store/modules/spider.js
@@ -14,7 +14,19 @@ const state = {
importForm: {
url: '',
type: 'github'
- }
+ },
+
+ // spider overview stats
+ overviewStats: {},
+
+ // spider status stats
+ statusStats: [],
+
+ // spider daily stats
+ dailyStats: [],
+
+ // spider node stats
+ nodeStats: []
}
const getters = {}
@@ -31,7 +43,19 @@ const mutations = {
},
SET_IMPORT_FORM (state, value) {
state.importForm = value
- }
+ },
+ SET_OVERVIEW_STATS (state, value) {
+ state.overviewStats = value
+ },
+ SET_STATUS_STATS (state, value) {
+ state.statusStats = value
+ },
+ SET_DAILY_STATS (state, value) {
+ state.dailyStats = value
+ },
+ SET_NODE_STATS (state, value) {
+ state.nodeStats = value
+ },
}
const actions = {
@@ -138,6 +162,15 @@ const actions = {
.then(response => {
console.log(response)
})
+ },
+ getSpiderStats ({ state, commit }) {
+ return request.get('/stats/get_spider_stats?spider_id=' + state.spiderForm._id)
+ .then(response => {
+ commit('SET_OVERVIEW_STATS', response.data.overview)
+ commit('SET_STATUS_STATS', response.data.task_count_by_status)
+ commit('SET_DAILY_STATS', response.data.daily_stats)
+ commit('SET_NODE_STATS', response.data.task_count_by_node)
+ })
}
}
diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue
index 56cf7982..dac7931e 100644
--- a/frontend/src/views/spider/SpiderDetail.vue
+++ b/frontend/src/views/spider/SpiderDetail.vue
@@ -19,6 +19,9 @@
+
+
+
@@ -30,10 +33,12 @@ import {
import FileList from '../../components/FileList/FileList'
import SpiderOverview from '../../components/Overview/SpiderOverview'
import EnvironmentList from '../../components/Environment/EnvironmentList'
+import SpiderStats from '../../components/Stats/SpiderStats'
export default {
name: 'NodeDetail',
components: {
+ SpiderStats,
EnvironmentList,
FileList,
SpiderOverview
@@ -57,6 +62,11 @@ export default {
},
methods: {
onTabClick () {
+ if (this.activeTabName === 'analytics') {
+ setTimeout(() => {
+ this.$refs['spider-stats'].update()
+ }, 0)
+ }
},
onSpiderChange (id) {
this.$router.push(`/spiders/${id}`)