diff --git a/crawlab/config/config.py b/crawlab/config/config.py index bad08ee2..afbcb9bf 100644 --- a/crawlab/config/config.py +++ b/crawlab/config/config.py @@ -1,5 +1,9 @@ +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + # 爬虫源码路径 -PROJECT_SOURCE_FILE_FOLDER = '../spiders' +PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' diff --git a/crawlab/config/config_local.py b/crawlab/config/config_local.py index 69d30277..afbcb9bf 100644 --- a/crawlab/config/config_local.py +++ b/crawlab/config/config_local.py @@ -1,38 +1,55 @@ -# encoding: utf-8 - import os + BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +# 爬虫源码路径 PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' # 爬虫部署路径 -PROJECT_DEPLOY_FILE_FOLDER = os.path.join(BASE_DIR, 'deployfile') +# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' +PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab' -PROJECT_LOGS_FOLDER = os.path.join(BASE_DIR, 'deployfile/logs') +# 爬虫日志路径 +PROJECT_LOGS_FOLDER = '../deployfile/logs' + +# 打包临时文件夹 PROJECT_TMP_FOLDER = '/tmp' -# celery variables -BROKER_URL = 'redis://127.0.0.1:56379/0' -CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:57017/' +# Celery中间者URL +BROKER_URL = 'redis://127.0.0.1:6379/0' + +# Celery后台URL +CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/' + +# Celery MongoDB设置 CELERY_MONGODB_BACKEND_SETTINGS = { 'database': 'crawlab_test', 'taskmeta_collection': 'tasks_celery', } + +# Celery时区 CELERY_TIMEZONE = 'Asia/Shanghai' + +# 是否启用UTC CELERY_ENABLE_UTC = True +# Celery Scheduler Redis URL +CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler' +CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379' +CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks' + # flower variables FLOWER_API_ENDPOINT = 'http://localhost:5555/api' -# database variables +# MongoDB 变量 MONGO_HOST = '127.0.0.1' -MONGO_PORT = 57017 +MONGO_PORT = 27017 MONGO_DB = 'crawlab_test' -# flask variables +# Flask 变量 DEBUG = True FLASK_HOST = '127.0.0.1' FLASK_PORT = 8000 diff --git a/crawlab/routes/tasks.py b/crawlab/routes/tasks.py index 86e75dab..2afb0cf9 100644 --- a/crawlab/routes/tasks.py +++ b/crawlab/routes/tasks.py @@ -42,9 +42,21 @@ class TaskApi(BaseApi): elif id is not None: task = db_manager.get(col_name=self.col_name, id=id) spider = db_manager.get(col_name='spiders', id=str(task['spider_id'])) - task['spider_name'] = spider['name'] + + # spider + task['num_results'] = 0 + if spider: + task['spider_name'] = spider['name'] + if spider.get('col'): + col = spider.get('col') + num_results = db_manager.count(col, {'task_id': task['_id']}) + task['num_results'] = num_results + + # duration if task.get('finish_ts') is not None: task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds() + task['avg_num_results'] = round(task['num_results'] / task['duration'], 1) + try: with open(task['log_file_path']) as f: task['log'] = f.read() @@ -76,13 +88,22 @@ class TaskApi(BaseApi): if task.get('status') is None: task['status'] = TaskStatus.UNAVAILABLE - # spider name + # spider + task['num_results'] = 0 if _spider: + # spider name task['spider_name'] = _spider['name'] + # number of results + if _spider.get('col'): + col = _spider.get('col') + num_results = db_manager.count(col, {'task_id': task['_id']}) + task['num_results'] = num_results + # duration if task.get('finish_ts') is not None: task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds() + task['avg_num_results'] = round(task['num_results'] / task['duration'], 1) items.append(task)