From cee9ef6ba986cc804e3ba333209d21d2c77390b0 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 26 Apr 2019 12:36:31 +0800 Subject: [PATCH] added virtualenv to python executable --- crawlab/config/config.py | 2 +- crawlab/config/config_local.py | 2 +- crawlab/tasks/spider.py | 44 ++++++++++++++++++++++------------ crawlab/utils/file.py | 12 ++++++++-- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/crawlab/config/config.py b/crawlab/config/config.py index afbcb9bf..08ab113c 100644 --- a/crawlab/config/config.py +++ b/crawlab/config/config.py @@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 -PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' +PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python' # 爬虫部署路径 # PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' diff --git a/crawlab/config/config_local.py b/crawlab/config/config_local.py index afbcb9bf..08ab113c 100644 --- a/crawlab/config/config_local.py +++ b/crawlab/config/config_local.py @@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders") # 配置python虚拟环境的路径 -PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' +PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python' # 爬虫部署路径 # PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py index 0d843e22..57dab0c8 100644 --- a/crawlab/tasks/spider.py +++ b/crawlab/tasks/spider.py @@ -1,4 +1,5 @@ import os +import sys from datetime import datetime from time import sleep @@ -35,8 +36,16 @@ def execute_spider(self, id: str, params: str = None): hostname = self.request.hostname spider = db_manager.get('spiders', id=id) command = spider.get('cmd') - if command.startswith("env"): - command = PYTHON_ENV_PATH + command.replace("env", "") + + # if start with python, then use sys.executable to execute in the virtualenv + if command.startswith('python '): + command = command.replace('python ', sys.executable + ' ') + + # if start with scrapy, then use sys.executable to execute scrapy as module in the virtualenv + elif command.startswith('scrapy '): + command = command.replace('scrapy ', sys.executable + ' -m scrapy ') + + # pass params to the command if params is not None: command += ' ' + params @@ -95,21 +104,26 @@ def execute_spider(self, id: str, params: str = None): # start process cmd_arr = command.split(' ') cmd_arr = list(filter(lambda x: x != '', cmd_arr)) - p = subprocess.Popen(cmd_arr, - stdout=stdout.fileno(), - stderr=stderr.fileno(), - cwd=current_working_directory, - env=env, - bufsize=1) + try: + p = subprocess.Popen(cmd_arr, + stdout=stdout.fileno(), + stderr=stderr.fileno(), + cwd=current_working_directory, + env=env, + bufsize=1) - # get output from the process - _stdout, _stderr = p.communicate() + # get output from the process + _stdout, _stderr = p.communicate() - # get return code - code = p.poll() - if code == 0: - status = TaskStatus.SUCCESS - else: + # get return code + code = p.poll() + if code == 0: + status = TaskStatus.SUCCESS + else: + status = TaskStatus.FAILURE + except Exception as err: + logger.error(err) + stderr.write(str(err)) status = TaskStatus.FAILURE # save task when the task is finished diff --git a/crawlab/utils/file.py b/crawlab/utils/file.py index d549c62d..06163d49 100644 --- a/crawlab/utils/file.py +++ b/crawlab/utils/file.py @@ -11,6 +11,7 @@ SUFFIX_LANG_MAPPING = { 'sh': 'shell', 'java': 'java', 'c': 'c', + 'go': 'go', } @@ -48,11 +49,18 @@ def get_file_suffix_stats(path) -> dict: Get suffix stats of given file :param path: file path """ - stats = defaultdict(int) + _stats = defaultdict(int) for file_path in get_file_list(path): suffix = get_file_suffix(file_path) if suffix is not None: - stats[suffix] += 1 + _stats[suffix] += 1 + + # only return suffixes with languages + stats = {} + for suffix, count in _stats.items(): + if SUFFIX_LANG_MAPPING.get(suffix) is not None: + stats[suffix] = count + return stats