diff --git a/backend/main.go b/backend/main.go index 72ab2c25..226b3bd5 100644 --- a/backend/main.go +++ b/backend/main.go @@ -140,11 +140,12 @@ func main() { authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 // 可配置爬虫 - authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置 - authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置 - authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫 - authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫 - authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫 + authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置 + authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置 + authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫 + authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫 + authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫 + authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫 // 任务 authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 diff --git a/backend/model/config_spider/scrapy.go b/backend/model/config_spider/scrapy.go index 62c5cd78..5a6730d1 100644 --- a/backend/model/config_spider/scrapy.go +++ b/backend/model/config_spider/scrapy.go @@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string { // 如果为CSS if f.Attr == "" { // 文本 - return fmt.Sprintf(`css('%s::text()')`, f.Css) + return fmt.Sprintf(`css('%s::text')`, f.Css) } else { // 属性 return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr) @@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string { if stage.PageCss != "" { // 如果为CSS - return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr) + return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr) } else { // 如果为XPath - return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr) + return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr) } } diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index 1d10335e..68998737 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) { }) } +func PostConfigSpiderSpiderfile(c *gin.Context) { + type Body struct { + Content string `json:"content"` + } + + id := c.Param("id") + + // 文件内容 + var reqBody Body + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + content := reqBody.Content + + // 获取爬虫 + var spider model.Spider + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) + return + } + + // 反序列化 + var configData entity.ConfigSpiderData + if err := yaml.Unmarshal([]byte(content), &configData); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + // 根据序列化后的数据处理爬虫文件 + if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 写文件 + if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + func PostConfigSpiderConfig(c *gin.Context) { id := c.Param("id") @@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) { spider, err := model.GetSpider(bson.ObjectIdHex(id)) if err != nil { HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) + return } // 反序列化配置数据 var configData entity.ConfigSpiderData if err := c.ShouldBindJSON(&configData); err != nil { HandleError(http.StatusBadRequest, c, err) + return } // 根据序列化后的数据处理爬虫文件 if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { HandleError(http.StatusInternalServerError, c, err) + return } - // TODO: 替换Spiderfile文件 + // 替换Spiderfile文件 + if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } c.JSON(http.StatusOK, Response{ Status: "ok", diff --git a/backend/services/config_spider.go b/backend/services/config_spider.go index 2d9acaba..d96146f5 100644 --- a/backend/services/config_spider.go +++ b/backend/services/config_spider.go @@ -13,6 +13,7 @@ import ( "github.com/globalsign/mgo/bson" uuid "github.com/satori/go.uuid" "github.com/spf13/viper" + "gopkg.in/yaml.v2" "os" "path/filepath" "strings" @@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con return nil } + +func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error { + // Spiderfile 路径 + sfPath := filepath.Join(spider.Src, "Spiderfile") + + // 生成Yaml内容 + sfContentByte, err := yaml.Marshal(configData) + if err != nil { + return err + } + + // 打开文件 + f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777) + if err != nil { + return err + } + defer f.Close() + + // 写入内容 + if _, err := f.Write(sfContentByte); err != nil { + return err + } + + return nil +} diff --git a/frontend/src/components/Config/ConfigList.vue b/frontend/src/components/Config/ConfigList.vue index e7c7212a..014bfd8d 100644 --- a/frontend/src/components/Config/ConfigList.vue +++ b/frontend/src/components/Config/ConfigList.vue @@ -120,10 +120,10 @@
{{$t('Run')}} - - {{$t('ExtractFields')}} - - {{$t('Preview')}} + + + + {{$t('Save')}}
@@ -181,6 +181,18 @@
+ + + +
+ + + {{$t('Save')}} + +
+ +
+ @@ -194,16 +206,25 @@ import echarts from 'echarts' import FieldsTableView from '../TableView/FieldsTableView' import CrawlConfirmDialog from '../Common/CrawlConfirmDialog' +import 'codemirror/lib/codemirror.js' +import 'codemirror/mode/yaml/yaml.js' +import FileDetail from '../File/FileDetail' + export default { name: 'ConfigList', components: { + FileDetail, CrawlConfirmDialog, FieldsTableView }, watch: { activeTab () { setTimeout(() => { + // 渲染流程图 this.renderProcessChart() + + // 获取Spiderfile + this.getSpiderfile() }, 0) } }, @@ -229,7 +250,15 @@ export default { { name: 'next_stage', label: 'Next Stage' } ], activeTab: 'stages', - processChart: undefined + processChart: undefined, + fileOptions: { + mode: 'text/x-yaml', + theme: 'darcula', + styleActiveLine: true, + lineNumbers: true, + line: true, + matchBrackets: true + } } }, computed: { @@ -318,34 +347,16 @@ export default { onSelectCrawlType (value) { this.spiderForm.crawl_type = value }, - onSave () { + async onSave () { this.$st.sendEv('爬虫详情-配置', '保存') - return new Promise((resolve, reject) => { - this.saveLoading = true - this.$store.dispatch('spider/updateSpiderFields') - .then(() => { - this.$store.dispatch('spider/editSpider') - .then(() => { - this.$message.success(this.$t('Spider info has been saved successfully')) - resolve() - }) - .catch(() => { - this.$message.error(this.$t('Something wrong happened')) - reject(new Error()) - }) - .finally(() => { - this.saveLoading = false - }) - }) - .then(() => { - this.$store.dispatch('spider/updateSpiderDetailFields') - }) - .catch(() => { - this.$message.error(this.$t('Something wrong happened')) - this.saveLoading = false - reject(new Error()) - }) - }) + this.saveLoading = true + try { + await this.$store.dispatch('spider/postConfigSpiderConfig') + this.$message.success(this.$t('Spider info has been saved successfully')) + } catch (e) { + this.$message.error(this.$t('Something wrong happened')) + } + this.saveLoading = false }, onDialogClose () { this.dialogVisible = false @@ -378,17 +389,8 @@ export default { }) }, onCrawl () { - this.$confirm(this.$t('Are you sure to run this spider?'), this.$t('Notification'), { - confirmButtonText: this.$t('Confirm'), - cancelButtonText: this.$t('Cancel') - }) - .then(() => { - this.$store.dispatch('spider/crawlSpider', this.spiderForm._id) - .then(() => { - this.$message.success(this.$t(`Spider task has been scheduled`)) - }) - this.$st.sendEv('爬虫详情-配置', '运行') - }) + this.crawlConfirmDialogVisible = true + this.$st.sendEv('爬虫详情-配置', '点击运行') }, onExtractFields () { this.$refs['form'].validate(res => { @@ -571,39 +573,20 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' -- const totalWidth = Number(getComputedStyle(elStages).width.replace('px', '')) const paddingRight = Number(getComputedStyle(elStages).paddingRight.replace('px', '')) elBar.setAttribute('style', 'width:' + (totalWidth - paddingRight) + 'px') - } - }, - created () { - // fields for list page - if (!this.spiderForm.fields) { - this.spiderForm.fields = [] - for (let i = 0; i < 3; i++) { - this.spiderForm.fields.push({ - name: 'field_' + (i + 1), - type: 'css', - extract_type: 'text' - }) + }, + getSpiderfile () { + this.$store.commit('file/SET_FILE_CONTENT', '') + this.$store.commit('file/SET_CURRENT_PATH', 'Spiderfile') + this.$store.dispatch('file/getFileContent', { path: 'Spiderfile' }) + }, + async onSpiderfileSave () { + try { + await this.$store.dispatch('spider/saveConfigSpiderSpiderfile') + this.$message.success(this.$t('Spiderfile saved successfully')) + } catch (e) { + this.$message.error('Something wrong happened') } } - - // fields for detail page - if (!this.spiderForm.detail_fields) { - this.spiderForm.detail_fields = [] - for (let i = 0; i < 3; i++) { - this.spiderForm.detail_fields.push({ - name: 'field_' + (i + 1), - type: 'css', - extract_type: 'text' - }) - } - } - - if (!this.spiderForm.crawl_type) this.$set(this.spiderForm, 'crawl_type', 'list') - // if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com') - if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css') - if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css') - if (this.spiderForm.obey_robots_txt == null) this.$set(this.spiderForm, 'obey_robots_txt', true) - if (this.spiderForm.item_threshold == null) this.$set(this.spiderForm, 'item_threshold', 10) }, mounted () { this.activeNames = Object.keys(this.spiderForm.config.stages) @@ -770,4 +753,13 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' -- width: 100%; height: 480px; } + + .config-list >>> .file-content { + height: calc(100vh - 280px); + } + + .spiderfile-actions { + margin-bottom: 5px; + text-align: right; + } diff --git a/frontend/src/components/File/FileDetail.vue b/frontend/src/components/File/FileDetail.vue index d74f73b3..f5f8a4cc 100644 --- a/frontend/src/components/File/FileDetail.vue +++ b/frontend/src/components/File/FileDetail.vue @@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js' import 'codemirror/mode/shell/shell.js' import 'codemirror/mode/markdown/markdown.js' import 'codemirror/mode/php/php.js' +import 'codemirror/mode/yaml/yaml.js' export default { name: 'FileDetail', @@ -38,7 +39,7 @@ export default { }, options () { return { - mode: this.lanaguage, + mode: this.language, theme: 'darcula', styleActiveLine: true, lineNumbers: true, @@ -46,8 +47,9 @@ export default { matchBrackets: true } }, - lanaguage () { + language () { const fileName = this.$store.state.file.currentPath + if (!fileName) return '' if (fileName.match(/\.js$/)) { return 'text/javascript' } else if (fileName.match(/\.py$/)) { @@ -60,6 +62,8 @@ export default { return 'text/x-php' } else if (fileName.match(/\.md$/)) { return 'text/x-markdown' + } else if (fileName === 'Spiderfile') { + return 'text/x-yaml' } else { return 'text' } @@ -74,7 +78,7 @@ export default {