added virtualenv to python executable

This commit is contained in:
Marvin Zhang
2019-04-26 12:36:31 +08:00
parent ae29708d99
commit cee9ef6ba9
4 changed files with 41 additions and 19 deletions

View File

@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
# 配置python虚拟环境的路径
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python'
# 爬虫部署路径
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'

View File

@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__fil
PROJECT_SOURCE_FILE_FOLDER = os.path.join(BASE_DIR, "spiders")
# 配置python虚拟环境的路径
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
PYTHON_ENV_PATH = '/Users/yeqing/.pyenv/shims/python'
# 爬虫部署路径
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'

View File

@@ -1,4 +1,5 @@
import os
import sys
from datetime import datetime
from time import sleep
@@ -35,8 +36,16 @@ def execute_spider(self, id: str, params: str = None):
hostname = self.request.hostname
spider = db_manager.get('spiders', id=id)
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env", "")
# if start with python, then use sys.executable to execute in the virtualenv
if command.startswith('python '):
command = command.replace('python ', sys.executable + ' ')
# if start with scrapy, then use sys.executable to execute scrapy as module in the virtualenv
elif command.startswith('scrapy '):
command = command.replace('scrapy ', sys.executable + ' -m scrapy ')
# pass params to the command
if params is not None:
command += ' ' + params
@@ -95,21 +104,26 @@ def execute_spider(self, id: str, params: str = None):
# start process
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
env=env,
bufsize=1)
try:
p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
env=env,
bufsize=1)
# get output from the process
_stdout, _stderr = p.communicate()
# get output from the process
_stdout, _stderr = p.communicate()
# get return code
code = p.poll()
if code == 0:
status = TaskStatus.SUCCESS
else:
# get return code
code = p.poll()
if code == 0:
status = TaskStatus.SUCCESS
else:
status = TaskStatus.FAILURE
except Exception as err:
logger.error(err)
stderr.write(str(err))
status = TaskStatus.FAILURE
# save task when the task is finished

View File

@@ -11,6 +11,7 @@ SUFFIX_LANG_MAPPING = {
'sh': 'shell',
'java': 'java',
'c': 'c',
'go': 'go',
}
@@ -48,11 +49,18 @@ def get_file_suffix_stats(path) -> dict:
Get suffix stats of given file
:param path: file path
"""
stats = defaultdict(int)
_stats = defaultdict(int)
for file_path in get_file_list(path):
suffix = get_file_suffix(file_path)
if suffix is not None:
stats[suffix] += 1
_stats[suffix] += 1
# only return suffixes with languages
stats = {}
for suffix, count in _stats.items():
if SUFFIX_LANG_MAPPING.get(suffix) is not None:
stats[suffix] = count
return stats