Merge pull request #24 from tikazyq/develop

Develop
This commit is contained in:
Marvin Zhang
2019-05-08 12:41:55 +08:00
committed by GitHub
14 changed files with 593 additions and 158 deletions

View File

@@ -1,10 +1,3 @@
# encoding: utf-8
import os
run_env = os.environ.get("RUNENV", "local")
if run_env == "local": # 加载本地配置
from config.config_local import *
else:
from config.config import *
from config.config import *

View File

@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
# 配置python虚拟环境的路径
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python'
# 爬虫部署路径
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'

View File

@@ -1,55 +0,0 @@
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# 爬虫源码路径
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
# 配置python虚拟环境的路径
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
# 爬虫部署路径
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
# 爬虫日志路径
PROJECT_LOGS_FOLDER = '../deployfile/logs'
# 打包临时文件夹
PROJECT_TMP_FOLDER = '/tmp'
# Celery中间者URL
BROKER_URL = 'redis://127.0.0.1:6379/0'
# Celery后台URL
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
# Celery MongoDB设置
CELERY_MONGODB_BACKEND_SETTINGS = {
'database': 'crawlab_test',
'taskmeta_collection': 'tasks_celery',
}
# Celery时区
CELERY_TIMEZONE = 'Asia/Shanghai'
# 是否启用UTC
CELERY_ENABLE_UTC = True
# Celery Scheduler Redis URL
CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler'
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379'
CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks'
# flower variables
FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
# MongoDB 变量
MONGO_HOST = '127.0.0.1'
MONGO_PORT = 27017
MONGO_DB = 'crawlab_test'
# Flask 变量
DEBUG = True
FLASK_HOST = '127.0.0.1'
FLASK_PORT = 8000

View File

@@ -1,13 +1,20 @@
import os
from collections import defaultdict
from datetime import datetime, timedelta
from flask_restful import reqparse, Resource
from constants.task import TaskStatus
from db.manager import db_manager
from routes.base import BaseApi
from utils import jsonify
class StatsApi(Resource):
class StatsApi(BaseApi):
arguments = [
['spider_id', str],
]
def get(self, action: str = None) -> (dict, tuple):
"""
GET method of StatsApi.
@@ -87,3 +94,142 @@ class StatsApi(Resource):
},
'daily_tasks': daily_tasks
}
def get_spider_stats(self):
args = self.parser.parse_args()
spider_id = args.get('spider_id')
spider = db_manager.get('spiders', id=spider_id)
tasks = db_manager.list(
col_name='tasks',
cond={
'spider_id': spider['_id'],
'create_ts': {
'$gte': datetime.now() - timedelta(30)
}
},
limit=9999999
)
# task count
task_count = len(tasks)
# calculate task count stats
task_count_by_status = defaultdict(int)
task_count_by_node = defaultdict(int)
total_seconds = 0
for task in tasks:
task_count_by_status[task['status']] += 1
task_count_by_node[task.get('node_id')] += 1
if task['status'] == TaskStatus.SUCCESS and task.get('finish_ts'):
duration = (task['finish_ts'] - task['create_ts']).total_seconds()
total_seconds += duration
# task count by node
task_count_by_node_ = []
for status, value in task_count_by_node.items():
task_count_by_node_.append({
'name': status,
'value': value
})
# task count by status
task_count_by_status_ = []
for status, value in task_count_by_status.items():
task_count_by_status_.append({
'name': status,
'value': value
})
# success rate
success_rate = task_count_by_status[TaskStatus.SUCCESS] / task_count
# average duration
avg_duration = total_seconds / task_count
# calculate task count by date
cur = db_manager.aggregate('tasks', [
{
'$match': {
'spider_id': spider['_id']
}
},
{
'$project': {
'date': {
'$dateToString': {
'format': '%Y-%m-%d',
'date': '$create_ts'
}
},
'duration': {
'$subtract': [
'$finish_ts',
'$create_ts'
]
}
}
},
{
'$group': {
'_id': '$date',
'count': {
'$sum': 1
},
'duration': {
'$avg': '$duration'
}
}
},
{
'$sort': {
'_id': 1
}
}
])
date_cache = {}
for item in cur:
date_cache[item['_id']] = {
'duration': (item['duration'] or 0) / 1000,
'count': item['count']
}
start_date = datetime.now() - timedelta(31)
end_date = datetime.now() - timedelta(1)
date = start_date
daily_tasks = []
while date < end_date:
date = date + timedelta(1)
date_str = date.strftime('%Y-%m-%d')
d = date_cache.get(date_str)
row = {
'date': date_str,
}
if d is None:
row['count'] = 0
row['duration'] = 0
else:
row['count'] = d['count']
row['duration'] = d['duration']
daily_tasks.append(row)
# calculate total results
result_count = 0
col_name = spider.get('col')
if col_name is not None:
for task in tasks:
result_count += db_manager.count(col_name, {'task_id': task['_id']})
# top tasks
# top_10_tasks = db_manager.list('tasks', {'spider_id': spider['_id']})
return {
'status': 'ok',
'overview': {
'task_count': task_count,
'result_count': result_count,
'success_rate': success_rate,
'avg_duration': avg_duration
},
'task_count_by_status': task_count_by_status_,
'task_count_by_node': task_count_by_node_,
'daily_stats': daily_tasks,
}

View File

@@ -1,4 +1,5 @@
import os
import sys
from datetime import datetime
from time import sleep
@@ -35,8 +36,16 @@ def execute_spider(self, id: str, params: str = None):
hostname = self.request.hostname
spider = db_manager.get('spiders', id=id)
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env", "")
# if start with python, then use sys.executable to execute in the virtualenv
if command.startswith('python '):
command = command.replace('python ', sys.executable + ' ')
# if start with scrapy, then use sys.executable to execute scrapy as module in the virtualenv
elif command.startswith('scrapy '):
command = command.replace('scrapy ', sys.executable + ' -m scrapy ')
# pass params to the command
if params is not None:
command += ' ' + params
@@ -95,26 +104,33 @@ def execute_spider(self, id: str, params: str = None):
# start process
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
env=env,
bufsize=1)
try:
p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
env=env,
bufsize=1)
# get output from the process
_stdout, _stderr = p.communicate()
# get output from the process
_stdout, _stderr = p.communicate()
# get return code
code = p.poll()
if code == 0:
status = TaskStatus.SUCCESS
else:
# get return code
code = p.poll()
if code == 0:
status = TaskStatus.SUCCESS
else:
status = TaskStatus.FAILURE
except Exception as err:
logger.error(err)
stderr.write(str(err))
status = TaskStatus.FAILURE
# save task when the task is finished
finish_ts = datetime.utcnow()
db_manager.update_one('tasks', id=task_id, values={
'finish_ts': datetime.utcnow(),
'finish_ts': finish_ts,
'duration': (finish_ts - task['create_ts']).total_seconds(),
'status': status
})
task = db_manager.get('tasks', id=id)

View File

@@ -11,6 +11,7 @@ SUFFIX_LANG_MAPPING = {
'sh': 'shell',
'java': 'java',
'c': 'c',
'go': 'go',
}
@@ -48,11 +49,18 @@ def get_file_suffix_stats(path) -> dict:
Get suffix stats of given file
:param path: file path
"""
stats = defaultdict(int)
_stats = defaultdict(int)
for file_path in get_file_list(path):
suffix = get_file_suffix(file_path)
if suffix is not None:
stats[suffix] += 1
_stats[suffix] += 1
# only return suffixes with languages
stats = {}
for suffix, count in _stats.items():
if SUFFIX_LANG_MAPPING.get(suffix) is not None:
stats[suffix] = count
return stats

View File

@@ -1,27 +0,0 @@
import request from '@/utils/request'
export function login (username, password) {
return request({
url: '/user/login',
method: 'post',
data: {
username,
password
}
})
}
export function getInfo (token) {
return request({
url: '/user/info',
method: 'get',
params: { token }
})
}
export function logout () {
return request({
url: '/user/logout',
method: 'post'
})
}

View File

@@ -1,46 +0,0 @@
import axios from 'axios'
let baseUrl = 'http://localhost:8000/api'
if (process.env.NODE_ENV === 'production') {
baseUrl = 'http://139.129.230.98:8000/api'
}
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
const request = (method, path, params, data) => {
return new Promise((resolve, reject) => {
const url = `${baseUrl}${path}`
axios({
method,
url,
params,
data
})
.then(resolve)
.catch(reject)
})
}
const get = (path, params) => {
return request('GET', path, params)
}
const post = (path, data) => {
return request('POST', path, {}, data)
}
const put = (path, data) => {
return request('PUT', path, {}, data)
}
const del = (path, data) => {
return request('DELETE', path)
}
export default {
baseUrl,
request,
get,
post,
put,
delete: del
}

View File

@@ -4,6 +4,7 @@ let baseUrl = 'http://localhost:8000/api'
if (process.env.NODE_ENV === 'production') {
baseUrl = 'http://139.129.230.98:8000/api'
}
// console.log(process.env)
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
const request = (method, path, params, data) => {

View File

@@ -0,0 +1,89 @@
<template>
<el-card class="metric-card">
<el-col :span="6" class="icon-col">
<i :class="icon" :style="{color:color}"></i>
</el-col>
<el-col :span="18" class="text-col">
<el-row>
<label class="label">{{$t(label)}}</label>
</el-row>
<el-row>
<div class="value">{{value}}</div>
</el-row>
</el-col>
</el-card>
</template>
<script>
export default {
name: 'MetricCard',
props: {
icon: {
type: String,
default: ''
},
label: {
type: String,
default: ''
},
value: {
type: String,
default: ''
},
type: {
type: String,
default: 'default'
}
},
computed: {
color () {
if (this.type === 'primary') {
return '#409EFF'
} else if (this.type === 'warning') {
return '#e6a23c'
} else if (this.type === 'success') {
return '#67c23a'
} else if (this.type === 'danger') {
return '#f56c6c'
} else {
return 'grey'
}
}
}
}
</script>
<style scoped>
.metric-card {
margin-right: 20px;
}
.metric-card:last-child {
margin-right: 0;
}
.metric-card .icon-col i {
margin-bottom: 15px;
font-size: 56px;
}
.metric-card .text-col {
padding-left: 10px;
height: 76px;
text-align: center;
}
.metric-card .text-col label {
font-size: 16px;
display: block;
height: 24px;
color: grey;
font-weight: 900;
}
.metric-card .text-col .value {
font-size: 24px;
display: block;
height: 32px;
}
</style>

View File

@@ -0,0 +1,258 @@
<template>
<div class="spider-stats" v-loading="loading">
<!--overall stats-->
<el-row>
<div class="metric-list">
<metric-card :label="$t('30-Day Tasks')"
icon="fa fa-play"
:value="overviewStats.task_count"
type="danger"/>
<metric-card :label="$t('30-Day Results')"
icon="fa fa-table"
:value="overviewStats.result_count"
type="primary"/>
<metric-card :label="$t('Success Rate')"
icon="fa fa-check"
:value="getPercentStr(overviewStats.success_rate)"
type="success"/>
<metric-card :label="$t('Avg Duration (sec)')"
icon="fa fa-hourglass"
:value="getDecimal(overviewStats.avg_duration)"
type="warning"/>
</div>
</el-row>
<!--./overall stats-->
<el-row>
<el-col :span="8">
<el-card class="chart-wrapper" style="margin-right: 20px;">
<h4>{{$t('Tasks by Status')}}</h4>
<div id="task-pie-status" class="chart"></div>
</el-card>
</el-col>
<el-col :span="16">
<el-card class="chart-wrapper">
<h4>{{$t('Daily Tasks')}}</h4>
<div id="task-line" class="chart"></div>
</el-card>
</el-col>
</el-row>
<el-row>
<el-col :span="8">
<el-card class="chart-wrapper" style="margin-right: 20px;">
<h4>{{$t('Tasks by Node')}}</h4>
<div id="task-pie-node" class="chart"></div>
</el-card>
</el-col>
<el-col :span="16">
<el-card class="chart-wrapper">
<h4>{{$t('Daily Avg Duration (sec)')}}</h4>
<div id="duration-line" class="chart"></div>
</el-card>
</el-col>
</el-row>
</div>
</template>
<script>
import {
mapState
} from 'vuex'
import MetricCard from './MetricCard'
import echarts from 'echarts'
export default {
name: 'SpiderStats',
components: { MetricCard },
data () {
return {
loading: false
}
},
methods: {
renderTaskPieStatus () {
const chart = echarts.init(this.$el.querySelector('#task-pie-status'))
const option = {
tooltip: {
show: true
},
series: [{
name: '',
type: 'pie',
// radius: ['50%', '70%'],
data: this.statusStats.map(d => {
let color
if (d.name === 'SUCCESS') {
color = '#67c23a'
} else if (d.name === 'STARTED') {
color = '#e6a23c'
} else if (d.name === 'FAILURE') {
color = '#f56c6c'
} else {
color = 'grey'
}
return {
name: this.$t(d.name),
value: d.value,
itemStyle: {
color
}
}
})
}]
}
chart.setOption(option)
},
renderTaskPieNode () {
const chart = echarts.init(this.$el.querySelector('#task-pie-node'))
const option = {
tooltip: {
show: true
},
series: [{
name: '',
type: 'pie',
// radius: ['50%', '70%'],
data: this.nodeStats.map(d => {
return {
name: d.name,
value: d.value
// itemStyle: {
// color
// }
}
})
}]
}
chart.setOption(option)
},
renderTaskLine () {
const chart = echarts.init(this.$el.querySelector('#task-line'))
const option = {
grid: {
top: 20,
bottom: 40
},
xAxis: {
type: 'category',
data: this.dailyStats.map(d => d.date)
},
yAxis: {
type: 'value'
},
series: [{
type: 'line',
data: this.dailyStats.map(d => d.count),
areaStyle: {},
smooth: true
}],
tooltip: {
trigger: 'axis',
show: true
}
}
chart.setOption(option)
},
renderDurationLine () {
const chart = echarts.init(this.$el.querySelector('#duration-line'))
const option = {
grid: {
top: 20,
bottom: 40
},
xAxis: {
type: 'category',
data: this.dailyStats.map(d => d.date)
},
yAxis: {
type: 'value'
},
series: [{
type: 'line',
data: this.dailyStats.map(d => d.duration),
areaStyle: {},
smooth: true
}],
tooltip: {
trigger: 'axis',
show: true
}
}
chart.setOption(option)
},
render () {
this.renderTaskPieStatus()
this.renderTaskLine()
this.renderTaskPieNode()
this.renderDurationLine()
},
update () {
this.loading = true
this.$store.dispatch('spider/getSpiderStats')
.then(() => {
this.render()
})
.catch(() => {
this.$message.error(this.$t('An error happened when fetching the data'))
})
.finally(() => {
this.loading = false
})
},
getPercentStr (value) {
if (value === undefined) return 'NA'
return (value * 100).toFixed(2) + '%'
},
getDecimal (value) {
if (value === undefined) return 'NA'
return value.toFixed(2)
}
},
computed: {
...mapState('spider', [
'overviewStats',
'statusStats',
'nodeStats',
'dailyStats'
])
},
mounted () {
}
}
</script>
<style scoped>
.metric-list {
display: flex;
}
.metric-list .metric-card {
flex-basis: 25%;
}
.chart-wrapper {
margin-top: 20px;
}
.chart {
width: 100%;
height: 240px;
}
.table {
height: 240px;
}
h4 {
display: inline-block;
margin: 0
}
</style>

View File

@@ -18,6 +18,7 @@ export default {
'Log': '日志',
'Results': '结果',
'Environment': '环境',
'Analytics': '分析',
// 选择
Spider: '爬虫',
@@ -88,6 +89,14 @@ export default {
'Add Environment Variables': '添加环境变量',
'Last 7-Day Tasks': '最近7天任务数',
'Last 5-Run Errors': '最近5次运行错误数',
'30-Day Tasks': '最近30天任务数',
'30-Day Results': '最近30天结果数',
'Success Rate': '运行成功率',
'Avg Duration (sec)': '平均运行时长(秒)',
'Tasks by Status': '分状态任务数',
'Tasks by Node': '分节点任务数',
'Daily Tasks': '每日任务数',
'Daily Avg Duration (sec)': '每日平均运行时长(秒)',
// 爬虫列表
'Name': '名称',

View File

@@ -14,7 +14,19 @@ const state = {
importForm: {
url: '',
type: 'github'
}
},
// spider overview stats
overviewStats: {},
// spider status stats
statusStats: [],
// spider daily stats
dailyStats: [],
// spider node stats
nodeStats: []
}
const getters = {}
@@ -31,7 +43,19 @@ const mutations = {
},
SET_IMPORT_FORM (state, value) {
state.importForm = value
}
},
SET_OVERVIEW_STATS (state, value) {
state.overviewStats = value
},
SET_STATUS_STATS (state, value) {
state.statusStats = value
},
SET_DAILY_STATS (state, value) {
state.dailyStats = value
},
SET_NODE_STATS (state, value) {
state.nodeStats = value
},
}
const actions = {
@@ -138,6 +162,15 @@ const actions = {
.then(response => {
console.log(response)
})
},
getSpiderStats ({ state, commit }) {
return request.get('/stats/get_spider_stats?spider_id=' + state.spiderForm._id)
.then(response => {
commit('SET_OVERVIEW_STATS', response.data.overview)
commit('SET_STATUS_STATS', response.data.task_count_by_status)
commit('SET_DAILY_STATS', response.data.daily_stats)
commit('SET_NODE_STATS', response.data.task_count_by_node)
})
}
}

View File

@@ -19,6 +19,9 @@
<el-tab-pane :label="$t('Environment')" name="environment">
<environment-list/>
</el-tab-pane>
<el-tab-pane :label="$t('Analytics')" name="analytics">
<spider-stats ref="spider-stats"/>
</el-tab-pane>
</el-tabs>
</div>
</template>
@@ -30,10 +33,12 @@ import {
import FileList from '../../components/FileList/FileList'
import SpiderOverview from '../../components/Overview/SpiderOverview'
import EnvironmentList from '../../components/Environment/EnvironmentList'
import SpiderStats from '../../components/Stats/SpiderStats'
export default {
name: 'NodeDetail',
components: {
SpiderStats,
EnvironmentList,
FileList,
SpiderOverview
@@ -57,6 +62,11 @@ export default {
},
methods: {
onTabClick () {
if (this.activeTabName === 'analytics') {
setTimeout(() => {
this.$refs['spider-stats'].update()
}, 0)
}
},
onSpiderChange (id) {
this.$router.push(`/spiders/${id}`)