added params for spider schedules

This commit is contained in:
Marvin Zhang
2019-04-20 20:04:09 +08:00
parent 6ceb00aedc
commit 3407e4e8df
7 changed files with 54 additions and 13 deletions

View File

@@ -5,7 +5,8 @@ PROJECT_SOURCE_FILE_FOLDER = '../spiders'
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
# 爬虫部署路径
PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
# 爬虫日志路径
PROJECT_LOGS_FOLDER = '../deployfile/logs'

View File

@@ -17,7 +17,8 @@ class ScheduleApi(BaseApi):
('name', str),
('description', str),
('cron', str),
('spider_id', str)
('spider_id', str),
('params', str)
)
def after_update(self, id: str = None):

View File

@@ -193,12 +193,19 @@ class SpiderApi(BaseApi):
:param id: spider_id
:return:
"""
job = execute_spider.delay(id)
args = self.parser.parse_args()
params = args.get('params')
spider = db_manager.get('spiders', id=ObjectId(id))
job = execute_spider.delay(id, params)
# create a new task
db_manager.save('tasks', {
'_id': job.id,
'spider_id': ObjectId(id),
'cmd': spider.get('cmd'),
'params': params,
'create_ts': datetime.utcnow(),
'status': TaskStatus.PENDING
})

View File

@@ -22,12 +22,15 @@ class Scheduler(object):
# scheduler instance
scheduler = BackgroundScheduler(jobstores=jobstores)
def execute_spider(self, id: str):
def execute_spider(self, id: str, params: str = None):
query = {}
if params is not None:
query['params'] = params
r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
FLASK_HOST,
FLASK_PORT,
id
))
), query)
def update(self):
# remove all existing periodic jobs
@@ -44,9 +47,15 @@ class Scheduler(object):
day = cron_arr[3]
month = cron_arr[4]
day_of_week = cron_arr[5]
self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(task['spider_id']),),
self.scheduler.add_job(func=self.execute_spider,
args=(str(task['spider_id']), task.get('params'),),
trigger='cron',
jobstore='mongo',
day_of_week=day_of_week, month=month, day=day, hour=hour, minute=minute,
day_of_week=day_of_week,
month=month,
day=day,
hour=hour,
minute=minute,
second=second)
def run(self):

View File

@@ -11,7 +11,7 @@ from utils.log import other as logger
@celery_app.task(bind=True)
def execute_spider(self, id: str):
def execute_spider(self, id: str, params: str = None):
"""
Execute spider task.
:param self:
@@ -23,6 +23,8 @@ def execute_spider(self, id: str):
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env", "")
if params is not None:
command += ' ' + params
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
@@ -43,7 +45,7 @@ def execute_spider(self, id: str):
stdout = open(log_file_path, 'a')
stderr = open(log_file_path, 'a')
# create a new task
# update task status as started
db_manager.update_one('tasks', id=task_id, values={
'start_ts': datetime.utcnow(),
'node_id': hostname,
@@ -68,7 +70,9 @@ def execute_spider(self, id: str):
env['CRAWLAB_COLLECTION'] = spider.get('col')
# start process
p = subprocess.Popen(command.split(' '),
cmd_arr = command.split(' ')
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
p = subprocess.Popen(cmd_arr,
stdout=stdout.fileno(),
stderr=stderr.fileno(),
cwd=current_working_directory,
@@ -87,9 +91,6 @@ def execute_spider(self, id: str):
# save task when the task is finished
db_manager.update_one('tasks', id=task_id, values={
'node_id': hostname,
'hostname': hostname,
'log_file_path': log_file_path,
'finish_ts': datetime.utcnow(),
'status': status
})

View File

@@ -111,6 +111,11 @@ export default {
// 部署
'Time': '时间',
// 定时任务
'Schedule Name': '定时任务名称',
'Schedule Description': '定时任务描述',
'Parameters': '参数',
// 文件
'Choose Folder': '选择文件',

View File

@@ -31,6 +31,15 @@
</template>
<el-input v-model="scheduleForm.cron" :placeholder="$t('Cron')"></el-input>
</el-form-item>
<el-form-item :label="$t('Execute Command')" prop="params">
<el-input v-model="spider.cmd"
:placeholder="$t('Execute Command')"
disabled></el-input>
</el-form-item>
<el-form-item :label="$t('Parameters')" prop="params">
<el-input v-model="scheduleForm.params"
:placeholder="$t('Parameters')"></el-input>
</el-form-item>
<el-form-item :label="$t('Schedule Description')" prop="description">
<el-input v-model="scheduleForm.description" type="textarea"
:placeholder="$t('Schedule Description')"></el-input>
@@ -130,6 +139,14 @@ export default {
]),
filteredTableData () {
return this.scheduleList
},
spider () {
for (let i = 0; i < this.spiderList.length; i++) {
if (this.spiderList[i]._id === this.scheduleForm.spider_id) {
return this.spiderList[i]
}
}
return {}
}
},
methods: {