diff --git a/crawlab/routes/base.py b/crawlab/routes/base.py index d78079fb..689b8f6a 100644 --- a/crawlab/routes/base.py +++ b/crawlab/routes/base.py @@ -127,7 +127,8 @@ class BaseApi(Resource): values = {} for k in args.keys(): if k not in DEFAULT_ARGS: - values[k] = args.get(k) + if args.get(k) is not None: + values[k] = args.get(k) item = db_manager.update_one(col_name=self.col_name, id=id, values=values) # execute after_update hook diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index 044210b2..b4b3aab1 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -58,6 +58,9 @@ class SpiderApi(BaseApi): # spider schedule cron enabled ('cron_enabled', int), + + # spider schedule cron enabled + ('envs', str), ) def get(self, id=None, action=None): @@ -328,6 +331,11 @@ class SpiderApi(BaseApi): """ scheduler.update() + def update_envs(self, id: str): + args = self.parser.parse_args() + envs = json.loads(args.envs) + db_manager.update_one(col_name='spiders', id=id, values={'envs': envs}) + class SpiderImportApi(Resource): __doc__ = """ diff --git a/crawlab/tasks/spider.py b/crawlab/tasks/spider.py index 589fe9b1..3413a021 100644 --- a/crawlab/tasks/spider.py +++ b/crawlab/tasks/spider.py @@ -52,11 +52,22 @@ def execute_spider(self, id: str): 'status': TaskStatus.STARTED }) - # start the process and pass params as env variables + # pass params as env variables env = os.environ.copy() + + # custom environment variables + if spider.get('envs'): + for _env in spider.get('envs'): + env[_env['name']] = _env['value'] + + # task id environment variable env['CRAWLAB_TASK_ID'] = task_id + + # collection environment variable if spider.get('col'): env['CRAWLAB_COLLECTION'] = spider.get('col') + + # start process p = subprocess.Popen(command.split(' '), stdout=stdout.fileno(), stderr=stderr.fileno(), diff --git a/frontend/src/components/Environment/EnvironmentList.vue b/frontend/src/components/Environment/EnvironmentList.vue new file mode 100644 index 00000000..9e0b1413 --- /dev/null +++ b/frontend/src/components/Environment/EnvironmentList.vue @@ -0,0 +1,75 @@ + + + + + {{$t('Add Environment Variables')}} + {{$t('Save')}} + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 25176002..f1f0cf6f 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -16,6 +16,7 @@ export default { 'Deployed Spiders': '已部署爬虫', 'Log': '日志', 'Results': '结果', + 'Environment': '环境', // 选择 Spider: '爬虫', @@ -79,6 +80,9 @@ export default { 'Language': '语言', 'Schedule Enabled': '是否开启定时任务', 'Schedule Cron': '定时任务', + 'Variable': '变量', + 'Value': '值', + 'Add Environment Variables': '添加环境变量', // 爬虫列表 'Name': '名称', diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index 77e169e2..dba4876e 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -5,7 +5,7 @@ const state = { spiderList: [], // active spider data - spiderForm: { _id: {} }, + spiderForm: {}, // node to deploy/run activeNode: {}, @@ -77,6 +77,11 @@ const actions = { dispatch('getSpiderList') }) }, + updateSpiderEnvs ({ state }) { + return request.post(`/spiders/${state.spiderForm._id}/update_envs`, { + envs: JSON.stringify(state.spiderForm.envs) + }) + }, getSpiderData ({ state, commit }, id) { return request.get(`/spiders/${id}`) .then(response => { diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index ff6a1333..56cf7982 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -16,6 +16,9 @@ + + + @@ -26,10 +29,12 @@ import { } from 'vuex' import FileList from '../../components/FileList/FileList' import SpiderOverview from '../../components/Overview/SpiderOverview' +import EnvironmentList from '../../components/Environment/EnvironmentList' export default { name: 'NodeDetail', components: { + EnvironmentList, FileList, SpiderOverview }, diff --git a/spiders/example_juejin/juejin/pipelines.py b/spiders/example_juejin/juejin/pipelines.py index b34aac50..4a497f54 100644 --- a/spiders/example_juejin/juejin/pipelines.py +++ b/spiders/example_juejin/juejin/pipelines.py @@ -8,9 +8,10 @@ import os from pymongo import MongoClient -MONGO_HOST = '127.0.0.1' -MONGO_PORT = 27017 -MONGO_DB = 'crawlab_test' +MONGO_HOST = os.environ['MONGO_HOST'] +MONGO_PORT = os.environ['MONGO_PORT'] +MONGO_DB = os.environ['MONGO_DB'] +print(MONGO_HOST) class JuejinPipeline(object): diff --git a/spiders/juejin_node/juejin_spider.js b/spiders/juejin_node/juejin_spider.js index 3cf2bcac..afb0cea8 100644 --- a/spiders/juejin_node/juejin_spider.js +++ b/spiders/juejin_node/juejin_spider.js @@ -52,8 +52,10 @@ const MongoClient = require('mongodb').MongoClient; }); // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); + console.log(process.env.MONGO_HOST); + console.log(process.env.MONGO_PORT); + const client = await MongoClient.connect(`mongodb://${process.env.MONGO_HOST}:${process.env.MONGO_PORT}`); + let db = await client.db(process.env.MONGO_DB); const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin'; const taskId = process.env.CRAWLAB_TASK_ID; const col = db.collection(colName);