From 278d6e72f5777e77a3e4acdcdea3c9a091b9b90b Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 23 Feb 2019 18:39:22 +0800 Subject: [PATCH] added TaskOverview --- routes/nodes.py | 13 +++++++++++++ routes/spiders.py | 24 +++++++++++++++++++++++- routes/tasks.py | 11 +++++++++++ spiders/meitui/app.py | 1 + spiders/toutiao/toutiao_spider.js | 0 tasks/spider.py | 3 ++- 6 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 spiders/meitui/app.py create mode 100644 spiders/toutiao/toutiao_spider.js diff --git a/routes/nodes.py b/routes/nodes.py index 89aec371..26efa6c9 100644 --- a/routes/nodes.py +++ b/routes/nodes.py @@ -98,3 +98,16 @@ class NodeApi(BaseApi): 'status': 'ok', 'items': deploys }) + + def get_tasks(self, id): + items = db_manager.list('tasks', {'node_id': id}) + for item in items: + spider_id = item['spider_id'] + spider = db_manager.get('spiders', id=str(spider_id)) + item['spider_name'] = spider['name'] + task = db_manager.get('tasks_celery', id=item['_id']) + item['status'] = task['status'] + return jsonify({ + 'status': 'ok', + 'items': items + }) diff --git a/routes/spiders.py b/routes/spiders.py index cbe90bc0..173edea8 100644 --- a/routes/spiders.py +++ b/routes/spiders.py @@ -78,7 +78,13 @@ class SpiderApi(BaseApi): }) def crawl(self, id): - job = execute_spider.delay(id) + args = self.parser.parse_args() + node_id = args.get('node_id') + + if node_id is None: + return {}, 400 + + job = execute_spider.delay(id, node_id) # print('crawl: %s' % id) return { 'code': 200, @@ -147,3 +153,19 @@ class SpiderApi(BaseApi): 'status': 'ok', 'items': deploys }) + + def get_tasks(self, id): + items = db_manager.list('tasks', {'spider_id': ObjectId(id)}) + for item in items: + spider_id = item['spider_id'] + spider = db_manager.get('spiders', id=str(spider_id)) + item['spider_name'] = spider['name'] + task = db_manager.get('tasks_celery', id=item['_id']) + if task is not None: + item['status'] = task['status'] + else: + item['status'] = 'UNAVAILABLE' + return jsonify({ + 'status': 'ok', + 'items': items + }) diff --git a/routes/tasks.py b/routes/tasks.py index a7bac35d..9b5af97d 100644 --- a/routes/tasks.py +++ b/routes/tasks.py @@ -12,6 +12,17 @@ class TaskApi(BaseApi): ) def get(self, id=None): + if id is not None: + task = db_manager.get('tasks', id=id) + _task = db_manager.get('tasks_celery', id=task['_id']) + _spider = db_manager.get('spiders', id=str(task['spider_id'])) + task['status'] = _task['status'] + task['result'] = _task['result'] + task['spider_name'] = _spider['name'] + with open(task['log_file_path']) as f: + task['log'] = f.read() + return jsonify(task) + tasks = db_manager.list('tasks', {}, limit=1000) items = [] for task in tasks: diff --git a/spiders/meitui/app.py b/spiders/meitui/app.py new file mode 100644 index 00000000..493ffc46 --- /dev/null +++ b/spiders/meitui/app.py @@ -0,0 +1 @@ +# /Users/yeqing/projects/crawlab/spiders diff --git a/spiders/toutiao/toutiao_spider.js b/spiders/toutiao/toutiao_spider.js new file mode 100644 index 00000000..e69de29b diff --git a/tasks/spider.py b/tasks/spider.py index 070ff5db..a253bdb5 100644 --- a/tasks/spider.py +++ b/tasks/spider.py @@ -15,7 +15,7 @@ logger = get_logger(__name__) @celery_app.task(bind=True) -def execute_spider(self, id: str): +def execute_spider(self, id: str, node_id: str): task_id = self.request.id hostname = self.request.hostname spider = db_manager.get('spiders', id=id) @@ -43,6 +43,7 @@ def execute_spider(self, id: str): '_id': task_id, 'spider_id': ObjectId(id), 'create_ts': datetime.now(), + 'node_id': node_id, 'hostname': hostname, 'log_file_path': log_file_path, })