From 87546f0c88a5c8b581fcbb403bafaa77de215135 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Fri, 29 Nov 2019 13:42:50 +0800 Subject: [PATCH] =?UTF-8?q?=E5=87=86=E5=A4=87=E5=8F=AF=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E7=88=AC=E8=99=AB=E8=87=AA=E5=AE=9A=E4=B9=89=E8=AE=BE=E7=BD=AE?= =?UTF-8?q?=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/entity/config_spider.go | 27 +- backend/routes/config_spider.go | 10 +- backend/services/task.go | 6 + backend/template/Spiderfile | 2 + .../template/scrapy/config_spider/settings.py | 8 +- frontend/src/components/Config/ConfigList.vue | 16 + .../TableView/SettingFieldsTableView.vue | 349 ++++++++++++++++++ 7 files changed, 402 insertions(+), 16 deletions(-) create mode 100644 frontend/src/components/TableView/SettingFieldsTableView.vue diff --git a/backend/entity/config_spider.go b/backend/entity/config_spider.go index 7afb7109..3fe28bc9 100644 --- a/backend/entity/config_spider.go +++ b/backend/entity/config_spider.go @@ -1,12 +1,12 @@ package entity -type Field struct { - Name string `yaml:"name" json:"name"` - Css string `yaml:"css" json:"css"` - Xpath string `yaml:"xpath" json:"xpath"` - Attr string `yaml:"attr" json:"attr"` - NextStage string `yaml:"next_stage" json:"next_stage"` - Remark string `yaml:"remark" json:"remark"` +type ConfigSpiderData struct { + Version string `yaml:"version" json:"version"` + Engine string `yaml:"engine" json:"engine"` + StartUrl string `yaml:"start_url" json:"start_url"` + StartStage string `yaml:"start_stage" json:"start_stage"` + Stages map[string]Stage `yaml:"stages" json:"stages"` + Settings map[string]string `yaml:"settings" json:"settings"` } type Stage struct { @@ -20,10 +20,11 @@ type Stage struct { Fields []Field `yaml:"fields" json:"fields"` } -type ConfigSpiderData struct { - Version string `yaml:"version" json:"version"` - Engine string `yaml:"engine" json:"engine"` - StartUrl string `yaml:"start_url" json:"start_url"` - StartStage string `yaml:"start_stage" json:"start_stage"` - Stages map[string]Stage `yaml:"stages" json:"stages"` +type Field struct { + Name string `yaml:"name" json:"name"` + Css string `yaml:"css" json:"css"` + Xpath string `yaml:"xpath" json:"xpath"` + Attr string `yaml:"attr" json:"attr"` + NextStage string `yaml:"next_stage" json:"next_stage"` + Remark string `yaml:"remark" json:"remark"` } diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index 982af28d..61067f98 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -205,8 +205,8 @@ func PostConfigSpiderSpiderfile(c *gin.Context) { return } - // 根据序列化后的数据处理爬虫文件 - if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { + // 校验configData + if err := services.ValidateSpiderfile(configData); err != nil { HandleError(http.StatusInternalServerError, c, err) return } @@ -217,6 +217,12 @@ func PostConfigSpiderSpiderfile(c *gin.Context) { return } + // 根据序列化后的数据处理爬虫文件 + if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", diff --git a/backend/services/task.go b/backend/services/task.go index 9336e75d..0cd53d86 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -219,12 +219,18 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 环境变量配置 envs := s.Envs if s.Type == constants.Configurable { + // 数据库配置 envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_HOST", Value: viper.GetString("mongo.host")}) envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PORT", Value: viper.GetString("mongo.port")}) envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_DB", Value: viper.GetString("mongo.db")}) envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_USERNAME", Value: viper.GetString("mongo.username")}) envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PASSWORD", Value: viper.GetString("mongo.password")}) envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_AUTHSOURCE", Value: viper.GetString("mongo.authSource")}) + + // 设置配置 + for envName, envValue := range s.Config.Settings { + envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue}) + } } cmd = SetEnv(cmd, envs, t.Id, s.Col) diff --git a/backend/template/Spiderfile b/backend/template/Spiderfile index 8d0e05cf..d748d5f8 100644 --- a/backend/template/Spiderfile +++ b/backend/template/Spiderfile @@ -23,3 +23,5 @@ stages: fields: - name: "description" css: "#product_description + p" +settings: + ROBOTSTXT_OBEY: true diff --git a/backend/template/scrapy/config_spider/settings.py b/backend/template/scrapy/config_spider/settings.py index a0112373..3dde4c6c 100644 --- a/backend/template/scrapy/config_spider/settings.py +++ b/backend/template/scrapy/config_spider/settings.py @@ -9,7 +9,7 @@ # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html # https://docs.scrapy.org/en/latest/topics/spider-middleware.html -BOT_NAME = 'config_spider' +BOT_NAME = 'Crawlab Configurable Spider' SPIDER_MODULES = ['config_spider.spiders'] NEWSPIDER_MODULE = 'config_spider.spiders' @@ -88,3 +88,9 @@ ITEM_PIPELINES = { #HTTPCACHE_DIR = 'httpcache' #HTTPCACHE_IGNORE_HTTP_CODES = [] #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' + +for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWALAB_SETTING_')]: + setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '') + setting_value = os.environ.get('setting_env_name') + locals()[setting_name] = setting_value + diff --git a/frontend/src/components/Config/ConfigList.vue b/frontend/src/components/Config/ConfigList.vue index 15eba993..ac66e8d4 100644 --- a/frontend/src/components/Config/ConfigList.vue +++ b/frontend/src/components/Config/ConfigList.vue @@ -182,6 +182,20 @@ + + + + + + +
@@ -209,10 +223,12 @@ import CrawlConfirmDialog from '../Common/CrawlConfirmDialog' import 'codemirror/lib/codemirror.js' import 'codemirror/mode/yaml/yaml.js' import FileDetail from '../File/FileDetail' +import SettingFieldsTableView from '../TableView/SettingFieldsTableView' export default { name: 'ConfigList', components: { + SettingFieldsTableView, FileDetail, CrawlConfirmDialog, FieldsTableView diff --git a/frontend/src/components/TableView/SettingFieldsTableView.vue b/frontend/src/components/TableView/SettingFieldsTableView.vue new file mode 100644 index 00000000..cfba6d20 --- /dev/null +++ b/frontend/src/components/TableView/SettingFieldsTableView.vue @@ -0,0 +1,349 @@ + + + + +