diff --git a/crawlab/config.py b/crawlab/config.py
index d2d69f81..bad08ee2 100644
--- a/crawlab/config.py
+++ b/crawlab/config.py
@@ -5,7 +5,8 @@ PROJECT_SOURCE_FILE_FOLDER = '../spiders'
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
# 爬虫部署路径
-PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
+# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
+PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
# 爬虫日志路径
PROJECT_LOGS_FOLDER = '../deployfile/logs'
diff --git a/crawlab/routes/schedules.py b/crawlab/routes/schedules.py
index 532a4ec5..01db8be1 100644
--- a/crawlab/routes/schedules.py
+++ b/crawlab/routes/schedules.py
@@ -17,7 +17,8 @@ class ScheduleApi(BaseApi):
('name', str),
('description', str),
('cron', str),
- ('spider_id', str)
+ ('spider_id', str),
+ ('params', str)
)
def after_update(self, id: str = None):
diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py
index f36903e3..ba315ce9 100644
--- a/crawlab/routes/spiders.py
+++ b/crawlab/routes/spiders.py
@@ -193,12 +193,19 @@ class SpiderApi(BaseApi):
:param id: spider_id
:return:
"""
- job = execute_spider.delay(id)
+ args = self.parser.parse_args()
+ params = args.get('params')
+
+ spider = db_manager.get('spiders', id=ObjectId(id))
+
+ job = execute_spider.delay(id, params)
# create a new task
db_manager.save('tasks', {
'_id': job.id,
'spider_id': ObjectId(id),
+ 'cmd': spider.get('cmd'),
+ 'params': params,
'create_ts': datetime.utcnow(),
'status': TaskStatus.PENDING
})
diff --git a/crawlab/tasks/scheduler.py b/crawlab/tasks/scheduler.py
index bf29607f..55e8fc36 100644
--- a/crawlab/tasks/scheduler.py
+++ b/crawlab/tasks/scheduler.py
@@ -22,12 +22,15 @@ class Scheduler(object):
# scheduler instance
scheduler = BackgroundScheduler(jobstores=jobstores)
- def execute_spider(self, id: str):
+ def execute_spider(self, id: str, params: str = None):
+ query = {}
+ if params is not None:
+ query['params'] = params
r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
FLASK_HOST,
FLASK_PORT,
id
- ))
+ ), query)
def update(self):
# remove all existing periodic jobs
@@ -44,9 +47,15 @@ class Scheduler(object):
day = cron_arr[3]
month = cron_arr[4]
day_of_week = cron_arr[5]
- self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(task['spider_id']),),
+ self.scheduler.add_job(func=self.execute_spider,
+ args=(str(task['spider_id']), task.get('params'),),
+ trigger='cron',
jobstore='mongo',
- day_of_week=day_of_week, month=month, day=day, hour=hour, minute=minute,
+ day_of_week=day_of_week,
+ month=month,
+ day=day,
+ hour=hour,
+ minute=minute,
second=second)
def run(self):
diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py
index 3413a021..c71c3f34 100644
--- a/crawlab/tasks/spider.py
+++ b/crawlab/tasks/spider.py
@@ -11,7 +11,7 @@ from utils.log import other as logger
@celery_app.task(bind=True)
-def execute_spider(self, id: str):
+def execute_spider(self, id: str, params: str = None):
"""
Execute spider task.
:param self:
@@ -23,6 +23,8 @@ def execute_spider(self, id: str):
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env", "")
+ if params is not None:
+ command += ' ' + params
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
@@ -43,7 +45,7 @@ def execute_spider(self, id: str):
stdout = open(log_file_path, 'a')
stderr = open(log_file_path, 'a')
- # create a new task
+ # update task status as started
db_manager.update_one('tasks', id=task_id, values={
'start_ts': datetime.utcnow(),
'node_id': hostname,
@@ -68,7 +70,9 @@ def execute_spider(self, id: str):
env['CRAWLAB_COLLECTION'] = spider.get('col')
# start process
- p = subprocess.Popen(command.split(' '),
+ cmd_arr = command.split(' ')
+ cmd_arr = list(filter(lambda x: x != '', cmd_arr))
+ p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
@@ -87,9 +91,6 @@ def execute_spider(self, id: str):
# save task when the task is finished
db_manager.update_one('tasks', id=task_id, values={
- 'node_id': hostname,
- 'hostname': hostname,
- 'log_file_path': log_file_path,
'finish_ts': datetime.utcnow(),
'status': status
})
diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js
index 7e127114..a437dcf1 100644
--- a/frontend/src/i18n/zh.js
+++ b/frontend/src/i18n/zh.js
@@ -111,6 +111,11 @@ export default {
// 部署
'Time': '时间',
+ // 定时任务
+ 'Schedule Name': '定时任务名称',
+ 'Schedule Description': '定时任务描述',
+ 'Parameters': '参数',
+
// 文件
'Choose Folder': '选择文件',
diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue
index 3b1e3307..7bcdcdf0 100644
--- a/frontend/src/views/schedule/ScheduleList.vue
+++ b/frontend/src/views/schedule/ScheduleList.vue
@@ -31,6 +31,15 @@
+
+
+
+
+
+
@@ -130,6 +139,14 @@ export default {
]),
filteredTableData () {
return this.scheduleList
+ },
+ spider () {
+ for (let i = 0; i < this.spiderList.length; i++) {
+ if (this.spiderList[i]._id === this.scheduleForm.spider_id) {
+ return this.spiderList[i]
+ }
+ }
+ return {}
}
},
methods: {