mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
added params for spider schedules
This commit is contained in:
@@ -5,7 +5,8 @@ PROJECT_SOURCE_FILE_FOLDER = '../spiders'
|
||||
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
|
||||
|
||||
# 爬虫部署路径
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
# PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
|
||||
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
|
||||
|
||||
# 爬虫日志路径
|
||||
PROJECT_LOGS_FOLDER = '../deployfile/logs'
|
||||
|
||||
@@ -17,7 +17,8 @@ class ScheduleApi(BaseApi):
|
||||
('name', str),
|
||||
('description', str),
|
||||
('cron', str),
|
||||
('spider_id', str)
|
||||
('spider_id', str),
|
||||
('params', str)
|
||||
)
|
||||
|
||||
def after_update(self, id: str = None):
|
||||
|
||||
@@ -193,12 +193,19 @@ class SpiderApi(BaseApi):
|
||||
:param id: spider_id
|
||||
:return:
|
||||
"""
|
||||
job = execute_spider.delay(id)
|
||||
args = self.parser.parse_args()
|
||||
params = args.get('params')
|
||||
|
||||
spider = db_manager.get('spiders', id=ObjectId(id))
|
||||
|
||||
job = execute_spider.delay(id, params)
|
||||
|
||||
# create a new task
|
||||
db_manager.save('tasks', {
|
||||
'_id': job.id,
|
||||
'spider_id': ObjectId(id),
|
||||
'cmd': spider.get('cmd'),
|
||||
'params': params,
|
||||
'create_ts': datetime.utcnow(),
|
||||
'status': TaskStatus.PENDING
|
||||
})
|
||||
|
||||
@@ -22,12 +22,15 @@ class Scheduler(object):
|
||||
# scheduler instance
|
||||
scheduler = BackgroundScheduler(jobstores=jobstores)
|
||||
|
||||
def execute_spider(self, id: str):
|
||||
def execute_spider(self, id: str, params: str = None):
|
||||
query = {}
|
||||
if params is not None:
|
||||
query['params'] = params
|
||||
r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
|
||||
FLASK_HOST,
|
||||
FLASK_PORT,
|
||||
id
|
||||
))
|
||||
), query)
|
||||
|
||||
def update(self):
|
||||
# remove all existing periodic jobs
|
||||
@@ -44,9 +47,15 @@ class Scheduler(object):
|
||||
day = cron_arr[3]
|
||||
month = cron_arr[4]
|
||||
day_of_week = cron_arr[5]
|
||||
self.scheduler.add_job(func=self.execute_spider, trigger='cron', args=(str(task['spider_id']),),
|
||||
self.scheduler.add_job(func=self.execute_spider,
|
||||
args=(str(task['spider_id']), task.get('params'),),
|
||||
trigger='cron',
|
||||
jobstore='mongo',
|
||||
day_of_week=day_of_week, month=month, day=day, hour=hour, minute=minute,
|
||||
day_of_week=day_of_week,
|
||||
month=month,
|
||||
day=day,
|
||||
hour=hour,
|
||||
minute=minute,
|
||||
second=second)
|
||||
|
||||
def run(self):
|
||||
|
||||
@@ -11,7 +11,7 @@ from utils.log import other as logger
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def execute_spider(self, id: str):
|
||||
def execute_spider(self, id: str, params: str = None):
|
||||
"""
|
||||
Execute spider task.
|
||||
:param self:
|
||||
@@ -23,6 +23,8 @@ def execute_spider(self, id: str):
|
||||
command = spider.get('cmd')
|
||||
if command.startswith("env"):
|
||||
command = PYTHON_ENV_PATH + command.replace("env", "")
|
||||
if params is not None:
|
||||
command += ' ' + params
|
||||
|
||||
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))
|
||||
|
||||
@@ -43,7 +45,7 @@ def execute_spider(self, id: str):
|
||||
stdout = open(log_file_path, 'a')
|
||||
stderr = open(log_file_path, 'a')
|
||||
|
||||
# create a new task
|
||||
# update task status as started
|
||||
db_manager.update_one('tasks', id=task_id, values={
|
||||
'start_ts': datetime.utcnow(),
|
||||
'node_id': hostname,
|
||||
@@ -68,7 +70,9 @@ def execute_spider(self, id: str):
|
||||
env['CRAWLAB_COLLECTION'] = spider.get('col')
|
||||
|
||||
# start process
|
||||
p = subprocess.Popen(command.split(' '),
|
||||
cmd_arr = command.split(' ')
|
||||
cmd_arr = list(filter(lambda x: x != '', cmd_arr))
|
||||
p = subprocess.Popen(cmd_arr,
|
||||
stdout=stdout.fileno(),
|
||||
stderr=stderr.fileno(),
|
||||
cwd=current_working_directory,
|
||||
@@ -87,9 +91,6 @@ def execute_spider(self, id: str):
|
||||
|
||||
# save task when the task is finished
|
||||
db_manager.update_one('tasks', id=task_id, values={
|
||||
'node_id': hostname,
|
||||
'hostname': hostname,
|
||||
'log_file_path': log_file_path,
|
||||
'finish_ts': datetime.utcnow(),
|
||||
'status': status
|
||||
})
|
||||
|
||||
@@ -111,6 +111,11 @@ export default {
|
||||
// 部署
|
||||
'Time': '时间',
|
||||
|
||||
// 定时任务
|
||||
'Schedule Name': '定时任务名称',
|
||||
'Schedule Description': '定时任务描述',
|
||||
'Parameters': '参数',
|
||||
|
||||
// 文件
|
||||
'Choose Folder': '选择文件',
|
||||
|
||||
|
||||
@@ -31,6 +31,15 @@
|
||||
</template>
|
||||
<el-input v-model="scheduleForm.cron" :placeholder="$t('Cron')"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Execute Command')" prop="params">
|
||||
<el-input v-model="spider.cmd"
|
||||
:placeholder="$t('Execute Command')"
|
||||
disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Parameters')" prop="params">
|
||||
<el-input v-model="scheduleForm.params"
|
||||
:placeholder="$t('Parameters')"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Schedule Description')" prop="description">
|
||||
<el-input v-model="scheduleForm.description" type="textarea"
|
||||
:placeholder="$t('Schedule Description')"></el-input>
|
||||
@@ -130,6 +139,14 @@ export default {
|
||||
]),
|
||||
filteredTableData () {
|
||||
return this.scheduleList
|
||||
},
|
||||
spider () {
|
||||
for (let i = 0; i < this.spiderList.length; i++) {
|
||||
if (this.spiderList[i]._id === this.scheduleForm.spider_id) {
|
||||
return this.spiderList[i]
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
|
||||
Reference in New Issue
Block a user