mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
added tasks
This commit is contained in:
@@ -1 +1,2 @@
|
||||
PROJECT_FILE_FOLDER = '/var/crawlab'
|
||||
PROJECT_LOGS_FOLDER = '/Users/yeqing/projects/crawlab/logs/crawlab'
|
||||
|
||||
@@ -27,7 +27,7 @@ class SpiderApi(BaseApi):
|
||||
|
||||
def crawl(self, id):
|
||||
job = execute_spider.delay(id)
|
||||
print('crawl: %s' % id)
|
||||
# print('crawl: %s' % id)
|
||||
return {
|
||||
'code': 200,
|
||||
'status': 'ok',
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from time import sleep
|
||||
import requests
|
||||
|
||||
for i in range(10):
|
||||
r = requests.get('http://www.baidu.com')
|
||||
sleep(0.1)
|
||||
r = requests.get('http://www.baidu.com')
|
||||
print(r.content)
|
||||
|
||||
@@ -1,9 +1,17 @@
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from celery import Celery
|
||||
|
||||
app = Celery(__name__)
|
||||
app.config_from_object('config.celery')
|
||||
|
||||
import tasks.spider
|
||||
import tasks.deploy
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.start(argv=['tasks.spider', 'worker', '-P', 'eventlet', '-E', '-l', 'INFO'])
|
||||
if sys.platform == 'windows':
|
||||
app.start(argv=['tasks', 'worker', '-P', 'eventlet', '-E', '-l', 'INFO'])
|
||||
else:
|
||||
app.start(argv=['tasks', 'worker', '-E', '-l', 'INFO'])
|
||||
|
||||
18
tasks/deploy.py
Normal file
18
tasks/deploy.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from celery.utils.log import get_logger
|
||||
|
||||
from config import PROJECT_FILE_FOLDER, PROJECT_LOGS_FOLDER
|
||||
from db.manager import db_manager
|
||||
from tasks import app
|
||||
import subprocess
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@app.task
|
||||
def deploy_spider(id):
|
||||
pass
|
||||
@@ -1,10 +1,11 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
from celery.utils.log import get_logger
|
||||
|
||||
from config import PROJECT_FILE_FOLDER
|
||||
from config import PROJECT_FILE_FOLDER, PROJECT_LOGS_FOLDER
|
||||
from db.manager import db_manager
|
||||
from tasks import app
|
||||
import subprocess
|
||||
@@ -18,14 +19,30 @@ def execute_spider(id: str):
|
||||
latest_version = db_manager.get_latest_version(spider_id=id)
|
||||
command = spider.get('cmd')
|
||||
current_working_directory = os.path.join(PROJECT_FILE_FOLDER, str(spider.get('_id')), str(latest_version))
|
||||
|
||||
# log info
|
||||
logger.info('spider_id: %s' % id)
|
||||
logger.info('version: %s' % latest_version)
|
||||
logger.info(command)
|
||||
|
||||
# make sure the log folder exists
|
||||
log_path = os.path.join(PROJECT_LOGS_FOLDER, id, str(latest_version))
|
||||
if not os.path.exists(log_path):
|
||||
os.makedirs(log_path)
|
||||
|
||||
# execute the command
|
||||
p = subprocess.Popen(command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
stderr=subprocess.PIPE,
|
||||
cwd=current_working_directory,
|
||||
bufsize=1)
|
||||
for i in iter(p.stdout.readline, 'b'):
|
||||
yield i
|
||||
|
||||
# output the log file
|
||||
log_file_path = os.path.join(log_path, '%s.txt' % datetime.now().strftime('%Y%m%d%H%M%S'))
|
||||
with open(log_file_path, 'a') as f:
|
||||
for line in p.stdout.readlines():
|
||||
f.write(line.decode('utf-8') + '\n')
|
||||
|
||||
|
||||
@app.task
|
||||
|
||||
Reference in New Issue
Block a user