From 0e1208d10d0607edd8d81e1df3af98013f7e6a71 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Thu, 30 May 2019 13:07:03 +0800 Subject: [PATCH] download results --- crawlab/requirements.txt | 4 ++++ crawlab/routes/tasks.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/crawlab/requirements.txt b/crawlab/requirements.txt index 0a1b7f13..e9559317 100644 --- a/crawlab/requirements.txt +++ b/crawlab/requirements.txt @@ -10,6 +10,7 @@ Babel==2.6.0 beautifulsoup4==4.7.1 billiard==3.6.0.0 bs4==0.0.1 +bson==0.5.8 cachetools==3.1.0 celery==4.3.0 certifi==2019.3.9 @@ -20,9 +21,11 @@ coloredlogs==10.0 constantly==15.1.0 cryptography==2.6.1 cssselect==1.0.3 +csvalidate==1.1.1 Flask==1.0.2 Flask-APScheduler==1.11.0 Flask-Cors==3.0.7 +Flask-CSV==1.2.0 Flask-RESTful==0.3.7 flask-restplus==0.12.1 flower==0.9.3 @@ -42,6 +45,7 @@ jsonschema==3.0.1 kombu==4.5.0 lxml==4.3.3 MarkupSafe==1.1.1 +marshmallow==2.19.2 mongoengine==0.17.0 multidict==4.5.2 parsel==1.5.1 diff --git a/crawlab/routes/tasks.py b/crawlab/routes/tasks.py index 8f09bbf7..fab2457a 100644 --- a/crawlab/routes/tasks.py +++ b/crawlab/routes/tasks.py @@ -1,6 +1,9 @@ import json import os import sys +from time import time + +from flask_csv import send_csv try: from _signal import SIGKILL @@ -213,3 +216,13 @@ class TaskApi(BaseApi): 'id': id, 'status': 'ok', } + + def download_results(self, id: str): + task = db_manager.get('tasks', id=id) + spider = db_manager.get('spiders', id=task['spider_id']) + col_name = spider.get('col') + if not col_name: + return send_csv([], f'results_{col_name}_{round(time())}.csv') + items = db_manager.list(col_name, {'task_id': id}) + fields = get_spider_col_fields(col_name) + return send_csv(items, filename=f'results_{col_name}_{round(time())}.csv', fields=fields, encoding='utf-8')