This commit is contained in:
Marvin Zhang
2019-04-23 19:33:50 +08:00
parent 4dff16e9ec
commit db1cd7ec9b
2 changed files with 25 additions and 0 deletions

View File

@@ -175,5 +175,9 @@ class DbManager(object):
col = self.db[col_name]
return col.aggregate(pipelines, **kwargs)
def create_index(self, col_name: str, keys: dict, **kwargs):
col = self.db[col_name]
col.create_index(keys=keys, **kwargs)
db_manager = DbManager()

View File

@@ -1,5 +1,6 @@
import os
from datetime import datetime
from time import sleep
from bson import ObjectId
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH
@@ -10,6 +11,17 @@ import subprocess
from utils.log import other as logger
def get_task(id: str):
i = 0
while i < 5:
task = db_manager.get('tasks', id=id)
if task is not None:
return task
i += 1
sleep(1)
return None
@celery_app.task(bind=True)
def execute_spider(self, id: str, params: str = None):
"""
@@ -26,6 +38,12 @@ def execute_spider(self, id: str, params: str = None):
if params is not None:
command += ' ' + params
# get task object and return if not found
task = get_task(task_id)
if task is None:
return
# current working directory
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
# log info
@@ -69,6 +87,9 @@ def execute_spider(self, id: str, params: str = None):
if spider.get('col'):
env['CRAWLAB_COLLECTION'] = spider.get('col')
# create index to speed results data retrieval
db_manager.create_index(spider.get('col'), {'task_id': 1})
# start process
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))