mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
added results stats for tasks
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
import os
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 爬虫源码路径
|
||||
PROJECT_SOURCE_FILE_FOLDER = '../spiders'
|
||||
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
|
||||
|
||||
# 配置python虚拟环境的路径
|
||||
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
|
||||
|
||||
@@ -1,38 +1,55 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import os
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 爬虫源码路径
|
||||
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
|
||||
|
||||
# 配置python虚拟环境的路径
|
||||
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
|
||||
|
||||
# 爬虫部署路径
|
||||
PROJECT_DEPLOY_FILE_FOLDER = os.path.join(BASE_DIR, 'deployfile')
|
||||
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
|
||||
|
||||
PROJECT_LOGS_FOLDER = os.path.join(BASE_DIR, 'deployfile/logs')
|
||||
# 爬虫日志路径
|
||||
PROJECT_LOGS_FOLDER = '../deployfile/logs'
|
||||
|
||||
# 打包临时文件夹
|
||||
PROJECT_TMP_FOLDER = '/tmp'
|
||||
|
||||
# celery variables
|
||||
BROKER_URL = 'redis://127.0.0.1:56379/0'
|
||||
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:57017/'
|
||||
# Celery中间者URL
|
||||
BROKER_URL = 'redis://127.0.0.1:6379/0'
|
||||
|
||||
# Celery后台URL
|
||||
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
|
||||
|
||||
# Celery MongoDB设置
|
||||
CELERY_MONGODB_BACKEND_SETTINGS = {
|
||||
'database': 'crawlab_test',
|
||||
'taskmeta_collection': 'tasks_celery',
|
||||
}
|
||||
|
||||
# Celery时区
|
||||
CELERY_TIMEZONE = 'Asia/Shanghai'
|
||||
|
||||
# 是否启用UTC
|
||||
CELERY_ENABLE_UTC = True
|
||||
|
||||
# Celery Scheduler Redis URL
|
||||
CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler'
|
||||
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379'
|
||||
CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks'
|
||||
|
||||
# flower variables
|
||||
FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
|
||||
|
||||
# database variables
|
||||
# MongoDB 变量
|
||||
MONGO_HOST = '127.0.0.1'
|
||||
MONGO_PORT = 57017
|
||||
MONGO_PORT = 27017
|
||||
MONGO_DB = 'crawlab_test'
|
||||
|
||||
# flask variables
|
||||
# Flask 变量
|
||||
DEBUG = True
|
||||
FLASK_HOST = '127.0.0.1'
|
||||
FLASK_PORT = 8000
|
||||
|
||||
@@ -42,9 +42,21 @@ class TaskApi(BaseApi):
|
||||
elif id is not None:
|
||||
task = db_manager.get(col_name=self.col_name, id=id)
|
||||
spider = db_manager.get(col_name='spiders', id=str(task['spider_id']))
|
||||
task['spider_name'] = spider['name']
|
||||
|
||||
# spider
|
||||
task['num_results'] = 0
|
||||
if spider:
|
||||
task['spider_name'] = spider['name']
|
||||
if spider.get('col'):
|
||||
col = spider.get('col')
|
||||
num_results = db_manager.count(col, {'task_id': task['_id']})
|
||||
task['num_results'] = num_results
|
||||
|
||||
# duration
|
||||
if task.get('finish_ts') is not None:
|
||||
task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds()
|
||||
task['avg_num_results'] = round(task['num_results'] / task['duration'], 1)
|
||||
|
||||
try:
|
||||
with open(task['log_file_path']) as f:
|
||||
task['log'] = f.read()
|
||||
@@ -76,13 +88,22 @@ class TaskApi(BaseApi):
|
||||
if task.get('status') is None:
|
||||
task['status'] = TaskStatus.UNAVAILABLE
|
||||
|
||||
# spider name
|
||||
# spider
|
||||
task['num_results'] = 0
|
||||
if _spider:
|
||||
# spider name
|
||||
task['spider_name'] = _spider['name']
|
||||
|
||||
# number of results
|
||||
if _spider.get('col'):
|
||||
col = _spider.get('col')
|
||||
num_results = db_manager.count(col, {'task_id': task['_id']})
|
||||
task['num_results'] = num_results
|
||||
|
||||
# duration
|
||||
if task.get('finish_ts') is not None:
|
||||
task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds()
|
||||
task['avg_num_results'] = round(task['num_results'] / task['duration'], 1)
|
||||
|
||||
items.append(task)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user