mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
fix issue #12
This commit is contained in:
@@ -175,5 +175,9 @@ class DbManager(object):
|
||||
col = self.db[col_name]
|
||||
return col.aggregate(pipelines, **kwargs)
|
||||
|
||||
def create_index(self, col_name: str, keys: dict, **kwargs):
|
||||
col = self.db[col_name]
|
||||
col.create_index(keys=keys, **kwargs)
|
||||
|
||||
|
||||
db_manager = DbManager()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
from bson import ObjectId
|
||||
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER, PYTHON_ENV_PATH
|
||||
@@ -10,6 +11,17 @@ import subprocess
|
||||
from utils.log import other as logger
|
||||
|
||||
|
||||
def get_task(id: str):
|
||||
i = 0
|
||||
while i < 5:
|
||||
task = db_manager.get('tasks', id=id)
|
||||
if task is not None:
|
||||
return task
|
||||
i += 1
|
||||
sleep(1)
|
||||
return None
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def execute_spider(self, id: str, params: str = None):
|
||||
"""
|
||||
@@ -26,6 +38,12 @@ def execute_spider(self, id: str, params: str = None):
|
||||
if params is not None:
|
||||
command += ' ' + params
|
||||
|
||||
# get task object and return if not found
|
||||
task = get_task(task_id)
|
||||
if task is None:
|
||||
return
|
||||
|
||||
# current working directory
|
||||
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
|
||||
|
||||
# log info
|
||||
@@ -69,6 +87,9 @@ def execute_spider(self, id: str, params: str = None):
|
||||
if spider.get('col'):
|
||||
env['CRAWLAB_COLLECTION'] = spider.get('col')
|
||||
|
||||
# create index to speed results data retrieval
|
||||
db_manager.create_index(spider.get('col'), {'task_id': 1})
|
||||
|
||||
# start process
|
||||
cmd_arr = command.split(' ')
|
||||
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
|
||||
|
||||
Reference in New Issue
Block a user