mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-25 17:42:25 +01:00
@@ -88,17 +88,17 @@ def monitor_nodes_status(celery_app):
|
||||
recv.capture(limit=None, timeout=None, wakeup=True)
|
||||
|
||||
|
||||
# run scheduler as a separate process
|
||||
scheduler.run()
|
||||
|
||||
# monitor node status
|
||||
p_monitor = Process(target=monitor_nodes_status, args=(celery_app,))
|
||||
p_monitor.start()
|
||||
|
||||
# create folder if it does not exist
|
||||
if not os.path.exists(PROJECT_LOGS_FOLDER):
|
||||
os.makedirs(PROJECT_LOGS_FOLDER)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# create folder if it does not exist
|
||||
if not os.path.exists(PROJECT_LOGS_FOLDER):
|
||||
os.makedirs(PROJECT_LOGS_FOLDER)
|
||||
|
||||
# run scheduler as a separate process
|
||||
scheduler.run()
|
||||
|
||||
# monitor node status
|
||||
p_monitor = Process(target=monitor_nodes_status, args=(celery_app,))
|
||||
p_monitor.start()
|
||||
|
||||
# run app instance
|
||||
app.run(host=FLASK_HOST, port=FLASK_PORT, threaded=True)
|
||||
|
||||
10
crawlab/config/__init__.py
Normal file
10
crawlab/config/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import os
|
||||
|
||||
run_env = os.environ.get("RUNENV", "local")
|
||||
|
||||
if run_env == "local": # 加载本地配置
|
||||
from config.config_local import *
|
||||
else:
|
||||
from config.config import *
|
||||
@@ -1,33 +1,55 @@
|
||||
# project variables
|
||||
import os
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 爬虫源码路径
|
||||
PROJECT_SOURCE_FILE_FOLDER = '../spiders'
|
||||
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
|
||||
|
||||
# 配置python虚拟环境的路径
|
||||
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
|
||||
# 爬虫部署路径
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
|
||||
# 爬虫部署路径
|
||||
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
|
||||
|
||||
# 爬虫日志路径
|
||||
PROJECT_LOGS_FOLDER = '../deployfile/logs'
|
||||
|
||||
# 打包临时文件夹
|
||||
PROJECT_TMP_FOLDER = '/tmp'
|
||||
|
||||
# celery variables
|
||||
# Celery中间者URL
|
||||
BROKER_URL = 'redis://127.0.0.1:6379/0'
|
||||
|
||||
# Celery后台URL
|
||||
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
|
||||
|
||||
# Celery MongoDB设置
|
||||
CELERY_MONGODB_BACKEND_SETTINGS = {
|
||||
'database': 'crawlab_test',
|
||||
'taskmeta_collection': 'tasks_celery',
|
||||
}
|
||||
|
||||
# Celery时区
|
||||
CELERY_TIMEZONE = 'Asia/Shanghai'
|
||||
|
||||
# 是否启用UTC
|
||||
CELERY_ENABLE_UTC = True
|
||||
|
||||
# Celery Scheduler Redis URL
|
||||
CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler'
|
||||
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379'
|
||||
CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks'
|
||||
|
||||
# flower variables
|
||||
FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
|
||||
|
||||
# database variables
|
||||
# MongoDB 变量
|
||||
MONGO_HOST = '127.0.0.1'
|
||||
MONGO_PORT = 27017
|
||||
MONGO_DB = 'crawlab_test'
|
||||
|
||||
# flask variables
|
||||
# Flask 变量
|
||||
DEBUG = True
|
||||
FLASK_HOST = '127.0.0.1'
|
||||
FLASK_PORT = 8000
|
||||
55
crawlab/config/config_local.py
Normal file
55
crawlab/config/config_local.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import os
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 爬虫源码路径
|
||||
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
|
||||
|
||||
# 配置python虚拟环境的路径
|
||||
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
|
||||
|
||||
# 爬虫部署路径
|
||||
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
|
||||
|
||||
# 爬虫日志路径
|
||||
PROJECT_LOGS_FOLDER = '../deployfile/logs'
|
||||
|
||||
# 打包临时文件夹
|
||||
PROJECT_TMP_FOLDER = '/tmp'
|
||||
|
||||
# Celery中间者URL
|
||||
BROKER_URL = 'redis://127.0.0.1:6379/0'
|
||||
|
||||
# Celery后台URL
|
||||
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
|
||||
|
||||
# Celery MongoDB设置
|
||||
CELERY_MONGODB_BACKEND_SETTINGS = {
|
||||
'database': 'crawlab_test',
|
||||
'taskmeta_collection': 'tasks_celery',
|
||||
}
|
||||
|
||||
# Celery时区
|
||||
CELERY_TIMEZONE = 'Asia/Shanghai'
|
||||
|
||||
# 是否启用UTC
|
||||
CELERY_ENABLE_UTC = True
|
||||
|
||||
# Celery Scheduler Redis URL
|
||||
CELERY_BEAT_SCHEDULER = 'utils.redisbeat.RedisScheduler'
|
||||
CELERY_REDIS_SCHEDULER_URL = 'redis://localhost:6379'
|
||||
CELERY_REDIS_SCHEDULER_KEY = 'celery:beat:order_tasks'
|
||||
|
||||
# flower variables
|
||||
FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
|
||||
|
||||
# MongoDB 变量
|
||||
MONGO_HOST = '127.0.0.1'
|
||||
MONGO_PORT = 27017
|
||||
MONGO_DB = 'crawlab_test'
|
||||
|
||||
# Flask 变量
|
||||
DEBUG = True
|
||||
FLASK_HOST = '127.0.0.1'
|
||||
FLASK_PORT = 8000
|
||||
@@ -28,7 +28,7 @@ class DbManager(object):
|
||||
if item.get('stats') is not None:
|
||||
item.pop('stats')
|
||||
|
||||
col.save(item, **kwargs)
|
||||
return col.save(item, **kwargs)
|
||||
|
||||
def remove(self, col_name: str, cond: dict, **kwargs) -> None:
|
||||
"""
|
||||
@@ -175,5 +175,9 @@ class DbManager(object):
|
||||
col = self.db[col_name]
|
||||
return col.aggregate(pipelines, **kwargs)
|
||||
|
||||
def create_index(self, col_name: str, keys: dict, **kwargs):
|
||||
col = self.db[col_name]
|
||||
col.create_index(keys=keys, **kwargs)
|
||||
|
||||
|
||||
db_manager = DbManager()
|
||||
|
||||
@@ -23,7 +23,7 @@ class BaseApi(Resource):
|
||||
super(BaseApi).__init__()
|
||||
self.parser.add_argument('page_num', type=int)
|
||||
self.parser.add_argument('page_size', type=int)
|
||||
self.parser.add_argument('filter', type=dict)
|
||||
self.parser.add_argument('filter', type=str)
|
||||
|
||||
for arg, type in self.arguments:
|
||||
self.parser.add_argument(arg, type=type)
|
||||
@@ -109,7 +109,7 @@ class BaseApi(Resource):
|
||||
item[k] = args.get(k)
|
||||
item = db_manager.save(col_name=self.col_name, item=item)
|
||||
|
||||
self.after_update(item._id)
|
||||
self.after_update()
|
||||
|
||||
return item
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import requests
|
||||
from constants.task import TaskStatus
|
||||
from db.manager import db_manager
|
||||
from routes.base import BaseApi
|
||||
from tasks.scheduler import scheduler
|
||||
from utils import jsonify
|
||||
from utils.spider import get_spider_col_fields
|
||||
|
||||
@@ -16,5 +17,9 @@ class ScheduleApi(BaseApi):
|
||||
('name', str),
|
||||
('description', str),
|
||||
('cron', str),
|
||||
('spider_id', str)
|
||||
('spider_id', str),
|
||||
('params', str)
|
||||
)
|
||||
|
||||
def after_update(self, id: str = None):
|
||||
scheduler.update()
|
||||
|
||||
@@ -21,7 +21,7 @@ from tasks.spider import execute_spider
|
||||
from utils import jsonify
|
||||
from utils.deploy import zip_file, unzip_file
|
||||
from utils.file import get_file_suffix_stats, get_file_suffix
|
||||
from utils.spider import get_lang_by_stats
|
||||
from utils.spider import get_lang_by_stats, get_last_n_run_errors_count, get_last_n_day_tasks_count
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('file', type=FileStorage, location='files')
|
||||
@@ -106,7 +106,7 @@ class SpiderApi(BaseApi):
|
||||
if spider is None:
|
||||
stats = get_file_suffix_stats(dir_path)
|
||||
lang = get_lang_by_stats(stats)
|
||||
db_manager.save('spiders', {
|
||||
spider = db_manager.save('spiders', {
|
||||
'name': dir_name,
|
||||
'src': dir_path,
|
||||
'lang': lang,
|
||||
@@ -137,6 +137,13 @@ class SpiderApi(BaseApi):
|
||||
'suffix_stats': stats,
|
||||
})
|
||||
|
||||
# ---------
|
||||
# stats
|
||||
# ---------
|
||||
# last 5-run errors
|
||||
spider['last_5_errors'] = get_last_n_run_errors_count(spider_id=spider['_id'], n=5)
|
||||
spider['last_7d_tasks'] = get_last_n_day_tasks_count(spider_id=spider['_id'], n=5)
|
||||
|
||||
# append spider
|
||||
items.append(spider)
|
||||
|
||||
@@ -193,12 +200,19 @@ class SpiderApi(BaseApi):
|
||||
:param id: spider_id
|
||||
:return:
|
||||
"""
|
||||
job = execute_spider.delay(id)
|
||||
args = self.parser.parse_args()
|
||||
params = args.get('params')
|
||||
|
||||
spider = db_manager.get('spiders', id=ObjectId(id))
|
||||
|
||||
job = execute_spider.delay(id, params)
|
||||
|
||||
# create a new task
|
||||
db_manager.save('tasks', {
|
||||
'_id': job.id,
|
||||
'spider_id': ObjectId(id),
|
||||
'cmd': spider.get('cmd'),
|
||||
'params': params,
|
||||
'create_ts': datetime.utcnow(),
|
||||
'status': TaskStatus.PENDING
|
||||
})
|
||||
|
||||
@@ -42,9 +42,21 @@ class TaskApi(BaseApi):
|
||||
elif id is not None:
|
||||
task = db_manager.get(col_name=self.col_name, id=id)
|
||||
spider = db_manager.get(col_name='spiders', id=str(task['spider_id']))
|
||||
task['spider_name'] = spider['name']
|
||||
|
||||
# spider
|
||||
task['num_results'] = 0
|
||||
if spider:
|
||||
task['spider_name'] = spider['name']
|
||||
if spider.get('col'):
|
||||
col = spider.get('col')
|
||||
num_results = db_manager.count(col, {'task_id': task['_id']})
|
||||
task['num_results'] = num_results
|
||||
|
||||
# duration
|
||||
if task.get('finish_ts') is not None:
|
||||
task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds()
|
||||
task['avg_num_results'] = round(task['num_results'] / task['duration'], 1)
|
||||
|
||||
try:
|
||||
with open(task['log_file_path']) as f:
|
||||
task['log'] = f.read()
|
||||
@@ -56,20 +68,48 @@ class TaskApi(BaseApi):
|
||||
args = self.parser.parse_args()
|
||||
page_size = args.get('page_size') or 10
|
||||
page_num = args.get('page_num') or 1
|
||||
tasks = db_manager.list(col_name=self.col_name, cond={}, limit=page_size, skip=page_size * (page_num - 1),
|
||||
filter_str = args.get('filter')
|
||||
filter_ = {}
|
||||
if filter_str is not None:
|
||||
filter_ = json.loads(filter_str)
|
||||
if filter_.get('spider_id'):
|
||||
filter_['spider_id'] = ObjectId(filter_['spider_id'])
|
||||
tasks = db_manager.list(col_name=self.col_name, cond=filter_, limit=page_size, skip=page_size * (page_num - 1),
|
||||
sort_key='create_ts')
|
||||
items = []
|
||||
for task in tasks:
|
||||
# celery tasks
|
||||
# _task = db_manager.get('tasks_celery', id=task['_id'])
|
||||
|
||||
# get spider
|
||||
_spider = db_manager.get(col_name='spiders', id=str(task['spider_id']))
|
||||
|
||||
# status
|
||||
if task.get('status') is None:
|
||||
task['status'] = TaskStatus.UNAVAILABLE
|
||||
|
||||
# spider
|
||||
task['num_results'] = 0
|
||||
if _spider:
|
||||
# spider name
|
||||
task['spider_name'] = _spider['name']
|
||||
|
||||
# number of results
|
||||
if _spider.get('col'):
|
||||
col = _spider.get('col')
|
||||
num_results = db_manager.count(col, {'task_id': task['_id']})
|
||||
task['num_results'] = num_results
|
||||
|
||||
# duration
|
||||
if task.get('finish_ts') is not None:
|
||||
task['duration'] = (task['finish_ts'] - task['create_ts']).total_seconds()
|
||||
task['avg_num_results'] = round(task['num_results'] / task['duration'], 1)
|
||||
|
||||
items.append(task)
|
||||
|
||||
return {
|
||||
'status': 'ok',
|
||||
'total_count': db_manager.count('tasks', {}),
|
||||
'total_count': db_manager.count('tasks', filter_),
|
||||
'page_num': page_num,
|
||||
'page_size': page_size,
|
||||
'items': jsonify(items)
|
||||
|
||||
@@ -2,7 +2,6 @@ import requests
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from apscheduler.jobstores.mongodb import MongoDBJobStore
|
||||
from pymongo import MongoClient
|
||||
from flask import current_app
|
||||
|
||||
from config import MONGO_DB, MONGO_HOST, MONGO_PORT, FLASK_HOST, FLASK_PORT
|
||||
from constants.spider import CronEnabled
|
||||
@@ -11,37 +10,36 @@ from db.manager import db_manager
|
||||
|
||||
class Scheduler(object):
|
||||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||||
task_col = 'apscheduler_jobs'
|
||||
|
||||
# scheduler jobstore
|
||||
jobstores = {
|
||||
'mongo': MongoDBJobStore(database=MONGO_DB,
|
||||
collection='apscheduler_jobs',
|
||||
collection=task_col,
|
||||
client=mongo)
|
||||
}
|
||||
|
||||
# scheduler instance
|
||||
scheduler = BackgroundScheduler(jobstores=jobstores)
|
||||
|
||||
def execute_spider(self, id: str):
|
||||
def execute_spider(self, id: str, params: str = None):
|
||||
query = {}
|
||||
if params is not None:
|
||||
query['params'] = params
|
||||
r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
|
||||
FLASK_HOST,
|
||||
FLASK_PORT,
|
||||
id
|
||||
))
|
||||
|
||||
def restart(self):
|
||||
self.scheduler.shutdown()
|
||||
self.scheduler.start()
|
||||
current_app.logger.info('restarted')
|
||||
), query)
|
||||
|
||||
def update(self):
|
||||
current_app.logger.info('updating...')
|
||||
|
||||
# remove all existing periodic jobs
|
||||
self.scheduler.remove_all_jobs()
|
||||
self.mongo[MONGO_DB][self.task_col].remove()
|
||||
|
||||
# add new periodic jobs from database
|
||||
spiders = db_manager.list('spiders', {'cron_enabled': CronEnabled.ON})
|
||||
for spider in spiders:
|
||||
cron = spider.get('cron')
|
||||
periodical_tasks = db_manager.list('schedules', {})
|
||||
for task in periodical_tasks:
|
||||
cron = task.get('cron')
|
||||
cron_arr = cron.split(' ')
|
||||
second = cron_arr[0]
|
||||
minute = cron_arr[1]
|
||||
@@ -49,13 +47,17 @@ class Scheduler(object):
|
||||
day = cron_arr[3]
|
||||
month = cron_arr[4]
|
||||
day_of_week = cron_arr[5]
|
||||
self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(spider['_id']),),
|
||||
self.scheduler.add_job(func=self.execute_spider,
|
||||
args=(str(task['spider_id']), task.get('params'),),
|
||||
trigger='cron',
|
||||
jobstore='mongo',
|
||||
day_of_week=day_of_week, month=month, day=day, hour=hour, minute=minute,
|
||||
day_of_week=day_of_week,
|
||||
month=month,
|
||||
day=day,
|
||||
hour=hour,
|
||||
minute=minute,
|
||||
second=second)
|
||||
|
||||
current_app.logger.info('updated')
|
||||
|
||||
def run(self):
|
||||
self.update()
|
||||
self.scheduler.start()
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
from bson import ObjectId
|
||||
from pymongo import ASCENDING, DESCENDING
|
||||
|
||||
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH
|
||||
from constants.task import TaskStatus
|
||||
from db.manager import db_manager
|
||||
@@ -10,8 +13,19 @@ import subprocess
|
||||
from utils.log import other as logger
|
||||
|
||||
|
||||
def get_task(id: str):
|
||||
i = 0
|
||||
while i < 5:
|
||||
task = db_manager.get('tasks', id=id)
|
||||
if task is not None:
|
||||
return task
|
||||
i += 1
|
||||
sleep(1)
|
||||
return None
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def execute_spider(self, id: str):
|
||||
def execute_spider(self, id: str, params: str = None):
|
||||
"""
|
||||
Execute spider task.
|
||||
:param self:
|
||||
@@ -23,7 +37,15 @@ def execute_spider(self, id: str):
|
||||
command = spider.get('cmd')
|
||||
if command.startswith("env"):
|
||||
command = PYTHON_ENV_PATH + command.replace("env", "")
|
||||
if params is not None:
|
||||
command += ' ' + params
|
||||
|
||||
# get task object and return if not found
|
||||
task = get_task(task_id)
|
||||
if task is None:
|
||||
return
|
||||
|
||||
# current working directory
|
||||
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
|
||||
|
||||
# log info
|
||||
@@ -43,7 +65,7 @@ def execute_spider(self, id: str):
|
||||
stdout = open(log_file_path, 'a')
|
||||
stderr = open(log_file_path, 'a')
|
||||
|
||||
# create a new task
|
||||
# update task status as started
|
||||
db_manager.update_one('tasks', id=task_id, values={
|
||||
'start_ts': datetime.utcnow(),
|
||||
'node_id': hostname,
|
||||
@@ -67,8 +89,13 @@ def execute_spider(self, id: str):
|
||||
if spider.get('col'):
|
||||
env['CRAWLAB_COLLECTION'] = spider.get('col')
|
||||
|
||||
# create index to speed results data retrieval
|
||||
db_manager.create_index(spider.get('col'), [('task_id', ASCENDING)])
|
||||
|
||||
# start process
|
||||
p = subprocess.Popen(command.split(' '),
|
||||
cmd_arr = command.split(' ')
|
||||
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
|
||||
p = subprocess.Popen(cmd_arr,
|
||||
stdout=stdout.fileno(),
|
||||
stderr=stderr.fileno(),
|
||||
cwd=current_working_directory,
|
||||
@@ -87,9 +114,6 @@ def execute_spider(self, id: str):
|
||||
|
||||
# save task when the task is finished
|
||||
db_manager.update_one('tasks', id=task_id, values={
|
||||
'node_id': hostname,
|
||||
'hostname': hostname,
|
||||
'log_file_path': log_file_path,
|
||||
'finish_ts': datetime.utcnow(),
|
||||
'status': status
|
||||
})
|
||||
|
||||
@@ -24,7 +24,11 @@ def update_nodes_status(refresh=False):
|
||||
url = '%s/workers?status=1' % FLOWER_API_ENDPOINT
|
||||
if refresh:
|
||||
url += '&refresh=1'
|
||||
|
||||
res = requests.get(url)
|
||||
if res.status_code != 200:
|
||||
return online_node_ids
|
||||
|
||||
for k, v in json.loads(res.content.decode('utf-8')).items():
|
||||
node_name = k
|
||||
node_status = NodeStatus.ONLINE if v else NodeStatus.OFFLINE
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from bson import ObjectId
|
||||
|
||||
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
|
||||
from constants.task import TaskStatus
|
||||
from db.manager import db_manager
|
||||
|
||||
|
||||
@@ -43,3 +47,25 @@ def get_spider_col_fields(col_name: str) -> list:
|
||||
for k in item.keys():
|
||||
fields.add(k)
|
||||
return list(fields)
|
||||
|
||||
|
||||
def get_last_n_run_errors_count(spider_id: ObjectId, n: int) -> list:
|
||||
tasks = db_manager.list(col_name='tasks',
|
||||
cond={'spider_id': spider_id},
|
||||
sort_key='create_ts',
|
||||
limit=n)
|
||||
count = 0
|
||||
for task in tasks:
|
||||
if task['status'] == TaskStatus.FAILURE:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def get_last_n_day_tasks_count(spider_id: ObjectId, n: int) -> list:
|
||||
return db_manager.count(col_name='tasks',
|
||||
cond={
|
||||
'spider_id': spider_id,
|
||||
'create_ts': {
|
||||
'$gte': (datetime.now() - timedelta(n))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -38,26 +38,26 @@
|
||||
<el-option value="go" label="Go"></el-option>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Schedule Enabled')">
|
||||
<el-switch v-model="spiderForm.cron_enabled" :disabled="isView">
|
||||
</el-switch>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Schedule Cron')" v-if="spiderForm.cron_enabled"
|
||||
prop="cron"
|
||||
:rules="cronRules"
|
||||
:inline-message="true">
|
||||
<template slot="label">
|
||||
<el-tooltip :content="$t('Cron Format: [second] [minute] [hour] [day of month] [month] [day of week]')"
|
||||
placement="top">
|
||||
<span>
|
||||
{{$t('Schedule Cron')}}
|
||||
<i class="fa fa-exclamation-circle"></i>
|
||||
</span>
|
||||
</el-tooltip>
|
||||
</template>
|
||||
<el-input v-model="spiderForm.cron" :placeholder="$t('Schedule Cron')"
|
||||
:disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
<!--<el-form-item :label="$t('Schedule Enabled')">-->
|
||||
<!--<el-switch v-model="spiderForm.cron_enabled" :disabled="isView">-->
|
||||
<!--</el-switch>-->
|
||||
<!--</el-form-item>-->
|
||||
<!--<el-form-item :label="$t('Schedule Cron')" v-if="spiderForm.cron_enabled"-->
|
||||
<!--prop="cron"-->
|
||||
<!--:rules="cronRules"-->
|
||||
<!--:inline-message="true">-->
|
||||
<!--<template slot="label">-->
|
||||
<!--<el-tooltip :content="$t('Cron Format: [second] [minute] [hour] [day of month] [month] [day of week]')"-->
|
||||
<!--placement="top">-->
|
||||
<!--<span>-->
|
||||
<!--{{$t('Schedule Cron')}}-->
|
||||
<!--<i class="fa fa-exclamation-circle"></i>-->
|
||||
<!--</span>-->
|
||||
<!--</el-tooltip>-->
|
||||
<!--</template>-->
|
||||
<!--<el-input v-model="spiderForm.cron" :placeholder="$t('Schedule Cron')"-->
|
||||
<!--:disabled="isView"></el-input>-->
|
||||
<!--</el-form-item>-->
|
||||
</el-form>
|
||||
</el-row>
|
||||
<el-row class="button-container" v-if="!isView">
|
||||
|
||||
@@ -30,6 +30,13 @@
|
||||
<el-form-item :label="$t('Duration (sec)')">
|
||||
<el-input v-model="taskForm.duration" placeholder="Duration" disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Results Count')">
|
||||
<el-input v-model="taskForm.num_results" placeholder="Results Count" disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Average Results Count per Second')">
|
||||
<el-input v-model="taskForm.avg_num_results" placeholder="Average Results Count per Second" disabled>
|
||||
</el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Error Message')" v-if="taskForm.status === 'FAILURE'">
|
||||
<div class="error-message">
|
||||
{{taskForm.log}}
|
||||
|
||||
@@ -43,6 +43,12 @@ import {
|
||||
|
||||
export default {
|
||||
name: 'TaskTableView',
|
||||
data () {
|
||||
return {
|
||||
// setInterval handle
|
||||
handle: undefined
|
||||
}
|
||||
},
|
||||
props: {
|
||||
title: String
|
||||
},
|
||||
@@ -71,6 +77,14 @@ export default {
|
||||
this.$store.dispatch('node/getTaskList', this.$route.params.id)
|
||||
}
|
||||
}
|
||||
},
|
||||
mounted () {
|
||||
this.handle = setInterval(() => {
|
||||
this.onRefresh()
|
||||
}, 5000)
|
||||
},
|
||||
destroyed () {
|
||||
clearInterval(this.handle)
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -86,6 +86,8 @@ export default {
|
||||
'Variable': '变量',
|
||||
'Value': '值',
|
||||
'Add Environment Variables': '添加环境变量',
|
||||
'Last 7-Day Tasks': '最近7天任务数',
|
||||
'Last 5-Run Errors': '最近5次运行错误数',
|
||||
|
||||
// 爬虫列表
|
||||
'Name': '名称',
|
||||
@@ -101,6 +103,8 @@ export default {
|
||||
'Finish Timestamp': '完成时间',
|
||||
'Duration (sec)': '用时(秒)',
|
||||
'Error Message': '错误信息',
|
||||
'Results Count': '结果数',
|
||||
'Average Results Count per Second': '抓取速度(个/秒)',
|
||||
|
||||
// 任务列表
|
||||
'Node': '节点',
|
||||
@@ -111,6 +115,12 @@ export default {
|
||||
// 部署
|
||||
'Time': '时间',
|
||||
|
||||
// 定时任务
|
||||
'Schedule Name': '定时任务名称',
|
||||
'Schedule Description': '定时任务描述',
|
||||
'Parameters': '参数',
|
||||
'Add Schedule': '添加定时任务',
|
||||
|
||||
// 文件
|
||||
'Choose Folder': '选择文件',
|
||||
|
||||
|
||||
@@ -9,6 +9,11 @@ const state = {
|
||||
taskResultsData: [],
|
||||
taskResultsColumns: [],
|
||||
taskResultsTotalCount: 0,
|
||||
// filter
|
||||
filter: {
|
||||
node_id: '',
|
||||
spider_id: ''
|
||||
},
|
||||
// pagination
|
||||
pageNum: 0,
|
||||
pageSize: 10,
|
||||
@@ -68,7 +73,11 @@ const actions = {
|
||||
getTaskList ({ state, commit }) {
|
||||
return request.get('/tasks', {
|
||||
page_num: state.pageNum,
|
||||
page_size: state.pageSize
|
||||
page_size: state.pageSize,
|
||||
filter: {
|
||||
node_id: state.filter.node_id || undefined,
|
||||
spider_id: state.filter.spider_id || undefined
|
||||
}
|
||||
})
|
||||
.then(response => {
|
||||
commit('SET_TASK_LIST', response.data.items)
|
||||
|
||||
@@ -31,6 +31,15 @@
|
||||
</template>
|
||||
<el-input v-model="scheduleForm.cron" :placeholder="$t('Cron')"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Execute Command')" prop="params">
|
||||
<el-input v-model="spider.cmd"
|
||||
:placeholder="$t('Execute Command')"
|
||||
disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Parameters')" prop="params">
|
||||
<el-input v-model="scheduleForm.params"
|
||||
:placeholder="$t('Parameters')"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Schedule Description')" prop="description">
|
||||
<el-input v-model="scheduleForm.description" type="textarea"
|
||||
:placeholder="$t('Schedule Description')"></el-input>
|
||||
@@ -130,6 +139,14 @@ export default {
|
||||
]),
|
||||
filteredTableData () {
|
||||
return this.scheduleList
|
||||
},
|
||||
spider () {
|
||||
for (let i = 0; i < this.spiderList.length; i++) {
|
||||
if (this.spiderList[i]._id === this.scheduleForm.spider_id) {
|
||||
return this.spiderList[i]
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
|
||||
@@ -84,6 +84,17 @@
|
||||
<el-tag type="success" v-else-if="scope.row.lang">{{scope.row.lang}}</el-tag>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column v-else-if="col.name === 'last_5_errors'"
|
||||
:key="col.name"
|
||||
:label="$t(col.label)"
|
||||
:width="col.width"
|
||||
align="center">
|
||||
<template slot-scope="scope">
|
||||
<div :style="{color:scope.row[col.name]>0?'red':''}">
|
||||
{{scope.row[col.name]}}
|
||||
</div>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column v-else
|
||||
:key="col.name"
|
||||
:property="col.name"
|
||||
@@ -93,7 +104,7 @@
|
||||
:width="col.width">
|
||||
</el-table-column>
|
||||
</template>
|
||||
<el-table-column :label="$t('Action')" align="left" width="250">
|
||||
<el-table-column :label="$t('Action')" align="left" width="200">
|
||||
<template slot-scope="scope">
|
||||
<el-tooltip :content="$t('View')" placement="top">
|
||||
<el-button type="primary" icon="el-icon-search" size="mini" @click="onView(scope.row)"></el-button>
|
||||
@@ -151,7 +162,9 @@ export default {
|
||||
{ name: 'name', label: 'Name', width: 'auto' },
|
||||
{ name: 'type', label: 'Spider Type', width: '160', sortable: true },
|
||||
{ name: 'lang', label: 'Language', width: '160', sortable: true },
|
||||
{ name: 'task_ts', label: 'Last Run', width: '160' }
|
||||
{ name: 'task_ts', label: 'Last Run', width: '160' },
|
||||
{ name: 'last_7d_tasks', label: 'Last 7-Day Tasks', width: '80' },
|
||||
{ name: 'last_5_errors', label: 'Last 5-Run Errors', width: '80' }
|
||||
],
|
||||
spiderFormRules: {
|
||||
name: [{ required: true, message: 'Required Field', trigger: 'change' }]
|
||||
|
||||
@@ -2,20 +2,22 @@
|
||||
<div class="app-container">
|
||||
<!--filter-->
|
||||
<div class="filter">
|
||||
<el-input prefix-icon="el-icon-search"
|
||||
:placeholder="$t('Search')"
|
||||
class="filter-search"
|
||||
v-model="filter.keyword"
|
||||
@change="onSearch">
|
||||
</el-input>
|
||||
<div class="right">
|
||||
<div class="left">
|
||||
<el-select class="filter-select" v-model="filter.node_id" :placeholder="$t('Node')" filterable clearable>
|
||||
<el-option v-for="op in nodeList" :key="op._id" :value="op._id" :label="op.name"></el-option>
|
||||
</el-select>
|
||||
<el-select class="filter-select" v-model="filter.spider_id" :placeholder="$t('Spider')" filterable clearable>
|
||||
<el-option v-for="op in spiderList" :key="op._id" :value="op._id" :label="op.name"></el-option>
|
||||
</el-select>
|
||||
<el-button type="success"
|
||||
icon="el-icon-refresh"
|
||||
icon="el-icon-search"
|
||||
class="refresh"
|
||||
@click="onRefresh">
|
||||
{{$t('Refresh')}}
|
||||
{{$t('Search')}}
|
||||
</el-button>
|
||||
</div>
|
||||
<!--<div class="right">-->
|
||||
<!--</div>-->
|
||||
</div>
|
||||
|
||||
<!--table list-->
|
||||
@@ -100,28 +102,42 @@ export default {
|
||||
name: 'TaskList',
|
||||
data () {
|
||||
return {
|
||||
// setInterval handle
|
||||
handle: undefined,
|
||||
|
||||
// determine if is edit mode
|
||||
isEditMode: false,
|
||||
|
||||
// dialog visibility
|
||||
dialogVisible: false,
|
||||
filter: {
|
||||
keyword: ''
|
||||
},
|
||||
// tableData,
|
||||
|
||||
// table columns
|
||||
columns: [
|
||||
{ name: 'create_ts', label: 'Create Time', width: '150' },
|
||||
{ name: 'start_ts', label: 'Start Time', width: '150' },
|
||||
{ name: 'finish_ts', label: 'Finish Time', width: '150' },
|
||||
{ name: 'spider_name', label: 'Spider', width: '160' },
|
||||
{ name: 'create_ts', label: 'Create Time', width: '100' },
|
||||
{ name: 'start_ts', label: 'Start Time', width: '100' },
|
||||
{ name: 'finish_ts', label: 'Finish Time', width: '100' },
|
||||
{ name: 'duration', label: 'Duration (sec)', width: '80' },
|
||||
{ name: 'spider_name', label: 'Spider', width: '120' },
|
||||
{ name: 'node_id', label: 'Node', width: '160' },
|
||||
{ name: 'status', label: 'Status', width: '160', sortable: true }
|
||||
{ name: 'num_results', label: 'Results Count', width: '80' },
|
||||
{ name: 'avg_num_results', label: 'Average Results Count per Second', width: '80' },
|
||||
{ name: 'status', label: 'Status', width: '80' }
|
||||
]
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('task', [
|
||||
'filter',
|
||||
'taskList',
|
||||
'taskListTotalCount',
|
||||
'taskForm'
|
||||
]),
|
||||
...mapState('spider', [
|
||||
'spiderList'
|
||||
]),
|
||||
...mapState('node', [
|
||||
'nodeList'
|
||||
]),
|
||||
pageNum: {
|
||||
get () {
|
||||
return this.$store.state.task.pageNum
|
||||
@@ -200,6 +216,17 @@ export default {
|
||||
},
|
||||
created () {
|
||||
this.$store.dispatch('task/getTaskList')
|
||||
this.$store.dispatch('spider/getSpiderList')
|
||||
this.$store.dispatch('node/getNodeList')
|
||||
},
|
||||
mounted () {
|
||||
// request task list every 5 seconds
|
||||
this.handle = setInterval(() => {
|
||||
this.$store.dispatch('task/getTaskList')
|
||||
}, 5000)
|
||||
},
|
||||
destroyed () {
|
||||
clearInterval(this.handle)
|
||||
}
|
||||
}
|
||||
</script>
|
||||
@@ -215,6 +242,13 @@ export default {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
|
||||
.left {
|
||||
.filter-select {
|
||||
width: 180px;
|
||||
margin-right: 10px;
|
||||
}
|
||||
}
|
||||
|
||||
.filter-search {
|
||||
width: 240px;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user