added tasks

This commit is contained in:
Marvin Zhang
2019-02-18 21:16:40 +08:00
parent 06575c8ae5
commit a4a0fdd67b
6 changed files with 52 additions and 9 deletions

View File

@@ -1 +1,2 @@
PROJECT_FILE_FOLDER = '/var/crawlab'
PROJECT_LOGS_FOLDER = '/Users/yeqing/projects/crawlab/logs/crawlab'

View File

@@ -27,7 +27,7 @@ class SpiderApi(BaseApi):
def crawl(self, id):
job = execute_spider.delay(id)
print('crawl: %s' % id)
# print('crawl: %s' % id)
return {
'code': 200,
'status': 'ok',

View File

@@ -1,6 +1,5 @@
from time import sleep
import requests
for i in range(10):
r = requests.get('http://www.baidu.com')
sleep(0.1)
r = requests.get('http://www.baidu.com')
print(r.content)

View File

@@ -1,9 +1,17 @@
import os
import sys
import threading
from celery import Celery
app = Celery(__name__)
app.config_from_object('config.celery')
import tasks.spider
import tasks.deploy
if __name__ == '__main__':
app.start(argv=['tasks.spider', 'worker', '-P', 'eventlet', '-E', '-l', 'INFO'])
if sys.platform == 'windows':
app.start(argv=['tasks', 'worker', '-P', 'eventlet', '-E', '-l', 'INFO'])
else:
app.start(argv=['tasks', 'worker', '-E', '-l', 'INFO'])

18
tasks/deploy.py Normal file
View File

@@ -0,0 +1,18 @@
import os
import sys
from datetime import datetime
import requests
from celery.utils.log import get_logger
from config import PROJECT_FILE_FOLDER, PROJECT_LOGS_FOLDER
from db.manager import db_manager
from tasks import app
import subprocess
logger = get_logger(__name__)
@app.task
def deploy_spider(id):
pass

View File

@@ -1,10 +1,11 @@
import os
import sys
from datetime import datetime
import requests
from celery.utils.log import get_logger
from config import PROJECT_FILE_FOLDER
from config import PROJECT_FILE_FOLDER, PROJECT_LOGS_FOLDER
from db.manager import db_manager
from tasks import app
import subprocess
@@ -18,14 +19,30 @@ def execute_spider(id: str):
latest_version = db_manager.get_latest_version(spider_id=id)
command = spider.get('cmd')
current_working_directory = os.path.join(PROJECT_FILE_FOLDER, str(spider.get('_id')), str(latest_version))
# log info
logger.info('spider_id: %s' % id)
logger.info('version: %s' % latest_version)
logger.info(command)
# make sure the log folder exists
log_path = os.path.join(PROJECT_LOGS_FOLDER, id, str(latest_version))
if not os.path.exists(log_path):
os.makedirs(log_path)
# execute the command
p = subprocess.Popen(command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
cwd=current_working_directory,
bufsize=1)
for i in iter(p.stdout.readline, 'b'):
yield i
# output the log file
log_file_path = os.path.join(log_path, '%s.txt' % datetime.now().strftime('%Y%m%d%H%M%S'))
with open(log_file_path, 'a') as f:
for line in p.stdout.readlines():
f.write(line.decode('utf-8') + '\n')
@app.task