diff --git a/backend/main.go b/backend/main.go
index 72ab2c25..226b3bd5 100644
--- a/backend/main.go
+++ b/backend/main.go
@@ -140,11 +140,12 @@ func main() {
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
// 可配置爬虫
- authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
- authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
- authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
- authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
- authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
+ authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
+ authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
+ authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
+ authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
+ authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
+ authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
// 任务
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
diff --git a/backend/model/config_spider/scrapy.go b/backend/model/config_spider/scrapy.go
index 62c5cd78..5a6730d1 100644
--- a/backend/model/config_spider/scrapy.go
+++ b/backend/model/config_spider/scrapy.go
@@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
// 如果为CSS
if f.Attr == "" {
// 文本
- return fmt.Sprintf(`css('%s::text()')`, f.Css)
+ return fmt.Sprintf(`css('%s::text')`, f.Css)
} else {
// 属性
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
@@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
if stage.PageCss != "" {
// 如果为CSS
- return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr)
+ return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
} else {
// 如果为XPath
- return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr)
+ return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
}
}
diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go
index 1d10335e..68998737 100644
--- a/backend/routes/config_spider.go
+++ b/backend/routes/config_spider.go
@@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) {
})
}
+func PostConfigSpiderSpiderfile(c *gin.Context) {
+ type Body struct {
+ Content string `json:"content"`
+ }
+
+ id := c.Param("id")
+
+ // 文件内容
+ var reqBody Body
+ if err := c.ShouldBindJSON(&reqBody); err != nil {
+ HandleError(http.StatusBadRequest, c, err)
+ return
+ }
+ content := reqBody.Content
+
+ // 获取爬虫
+ var spider model.Spider
+ spider, err := model.GetSpider(bson.ObjectIdHex(id))
+ if err != nil {
+ HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
+ return
+ }
+
+ // 反序列化
+ var configData entity.ConfigSpiderData
+ if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
+ HandleError(http.StatusBadRequest, c, err)
+ return
+ }
+
+ // 根据序列化后的数据处理爬虫文件
+ if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ // 写文件
+ if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ c.JSON(http.StatusOK, Response{
+ Status: "ok",
+ Message: "success",
+ })
+}
+
func PostConfigSpiderConfig(c *gin.Context) {
id := c.Param("id")
@@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) {
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
+ return
}
// 反序列化配置数据
var configData entity.ConfigSpiderData
if err := c.ShouldBindJSON(&configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
+ return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
+ return
}
- // TODO: 替换Spiderfile文件
+ // 替换Spiderfile文件
+ if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
c.JSON(http.StatusOK, Response{
Status: "ok",
diff --git a/backend/services/config_spider.go b/backend/services/config_spider.go
index 2d9acaba..d96146f5 100644
--- a/backend/services/config_spider.go
+++ b/backend/services/config_spider.go
@@ -13,6 +13,7 @@ import (
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
+ "gopkg.in/yaml.v2"
"os"
"path/filepath"
"strings"
@@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
return nil
}
+
+func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
+ // Spiderfile 路径
+ sfPath := filepath.Join(spider.Src, "Spiderfile")
+
+ // 生成Yaml内容
+ sfContentByte, err := yaml.Marshal(configData)
+ if err != nil {
+ return err
+ }
+
+ // 打开文件
+ f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ // 写入内容
+ if _, err := f.Write(sfContentByte); err != nil {
+ return err
+ }
+
+ return nil
+}
diff --git a/frontend/src/components/Config/ConfigList.vue b/frontend/src/components/Config/ConfigList.vue
index e7c7212a..014bfd8d 100644
--- a/frontend/src/components/Config/ConfigList.vue
+++ b/frontend/src/components/Config/ConfigList.vue
@@ -120,10 +120,10 @@
@@ -181,6 +181,18 @@
+
+
+
+
+
+
+ {{$t('Save')}}
+
+
+
+
+
@@ -194,16 +206,25 @@ import echarts from 'echarts'
import FieldsTableView from '../TableView/FieldsTableView'
import CrawlConfirmDialog from '../Common/CrawlConfirmDialog'
+import 'codemirror/lib/codemirror.js'
+import 'codemirror/mode/yaml/yaml.js'
+import FileDetail from '../File/FileDetail'
+
export default {
name: 'ConfigList',
components: {
+ FileDetail,
CrawlConfirmDialog,
FieldsTableView
},
watch: {
activeTab () {
setTimeout(() => {
+ // 渲染流程图
this.renderProcessChart()
+
+ // 获取Spiderfile
+ this.getSpiderfile()
}, 0)
}
},
@@ -229,7 +250,15 @@ export default {
{ name: 'next_stage', label: 'Next Stage' }
],
activeTab: 'stages',
- processChart: undefined
+ processChart: undefined,
+ fileOptions: {
+ mode: 'text/x-yaml',
+ theme: 'darcula',
+ styleActiveLine: true,
+ lineNumbers: true,
+ line: true,
+ matchBrackets: true
+ }
}
},
computed: {
@@ -318,34 +347,16 @@ export default {
onSelectCrawlType (value) {
this.spiderForm.crawl_type = value
},
- onSave () {
+ async onSave () {
this.$st.sendEv('爬虫详情-配置', '保存')
- return new Promise((resolve, reject) => {
- this.saveLoading = true
- this.$store.dispatch('spider/updateSpiderFields')
- .then(() => {
- this.$store.dispatch('spider/editSpider')
- .then(() => {
- this.$message.success(this.$t('Spider info has been saved successfully'))
- resolve()
- })
- .catch(() => {
- this.$message.error(this.$t('Something wrong happened'))
- reject(new Error())
- })
- .finally(() => {
- this.saveLoading = false
- })
- })
- .then(() => {
- this.$store.dispatch('spider/updateSpiderDetailFields')
- })
- .catch(() => {
- this.$message.error(this.$t('Something wrong happened'))
- this.saveLoading = false
- reject(new Error())
- })
- })
+ this.saveLoading = true
+ try {
+ await this.$store.dispatch('spider/postConfigSpiderConfig')
+ this.$message.success(this.$t('Spider info has been saved successfully'))
+ } catch (e) {
+ this.$message.error(this.$t('Something wrong happened'))
+ }
+ this.saveLoading = false
},
onDialogClose () {
this.dialogVisible = false
@@ -378,17 +389,8 @@ export default {
})
},
onCrawl () {
- this.$confirm(this.$t('Are you sure to run this spider?'), this.$t('Notification'), {
- confirmButtonText: this.$t('Confirm'),
- cancelButtonText: this.$t('Cancel')
- })
- .then(() => {
- this.$store.dispatch('spider/crawlSpider', this.spiderForm._id)
- .then(() => {
- this.$message.success(this.$t(`Spider task has been scheduled`))
- })
- this.$st.sendEv('爬虫详情-配置', '运行')
- })
+ this.crawlConfirmDialogVisible = true
+ this.$st.sendEv('爬虫详情-配置', '点击运行')
},
onExtractFields () {
this.$refs['form'].validate(res => {
@@ -571,39 +573,20 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
const totalWidth = Number(getComputedStyle(elStages).width.replace('px', ''))
const paddingRight = Number(getComputedStyle(elStages).paddingRight.replace('px', ''))
elBar.setAttribute('style', 'width:' + (totalWidth - paddingRight) + 'px')
- }
- },
- created () {
- // fields for list page
- if (!this.spiderForm.fields) {
- this.spiderForm.fields = []
- for (let i = 0; i < 3; i++) {
- this.spiderForm.fields.push({
- name: 'field_' + (i + 1),
- type: 'css',
- extract_type: 'text'
- })
+ },
+ getSpiderfile () {
+ this.$store.commit('file/SET_FILE_CONTENT', '')
+ this.$store.commit('file/SET_CURRENT_PATH', 'Spiderfile')
+ this.$store.dispatch('file/getFileContent', { path: 'Spiderfile' })
+ },
+ async onSpiderfileSave () {
+ try {
+ await this.$store.dispatch('spider/saveConfigSpiderSpiderfile')
+ this.$message.success(this.$t('Spiderfile saved successfully'))
+ } catch (e) {
+ this.$message.error('Something wrong happened')
}
}
-
- // fields for detail page
- if (!this.spiderForm.detail_fields) {
- this.spiderForm.detail_fields = []
- for (let i = 0; i < 3; i++) {
- this.spiderForm.detail_fields.push({
- name: 'field_' + (i + 1),
- type: 'css',
- extract_type: 'text'
- })
- }
- }
-
- if (!this.spiderForm.crawl_type) this.$set(this.spiderForm, 'crawl_type', 'list')
- // if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
- if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
- if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css')
- if (this.spiderForm.obey_robots_txt == null) this.$set(this.spiderForm, 'obey_robots_txt', true)
- if (this.spiderForm.item_threshold == null) this.$set(this.spiderForm, 'item_threshold', 10)
},
mounted () {
this.activeNames = Object.keys(this.spiderForm.config.stages)
@@ -770,4 +753,13 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
width: 100%;
height: 480px;
}
+
+ .config-list >>> .file-content {
+ height: calc(100vh - 280px);
+ }
+
+ .spiderfile-actions {
+ margin-bottom: 5px;
+ text-align: right;
+ }
diff --git a/frontend/src/components/File/FileDetail.vue b/frontend/src/components/File/FileDetail.vue
index d74f73b3..f5f8a4cc 100644
--- a/frontend/src/components/File/FileDetail.vue
+++ b/frontend/src/components/File/FileDetail.vue
@@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js'
import 'codemirror/mode/shell/shell.js'
import 'codemirror/mode/markdown/markdown.js'
import 'codemirror/mode/php/php.js'
+import 'codemirror/mode/yaml/yaml.js'
export default {
name: 'FileDetail',
@@ -38,7 +39,7 @@ export default {
},
options () {
return {
- mode: this.lanaguage,
+ mode: this.language,
theme: 'darcula',
styleActiveLine: true,
lineNumbers: true,
@@ -46,8 +47,9 @@ export default {
matchBrackets: true
}
},
- lanaguage () {
+ language () {
const fileName = this.$store.state.file.currentPath
+ if (!fileName) return ''
if (fileName.match(/\.js$/)) {
return 'text/javascript'
} else if (fileName.match(/\.py$/)) {
@@ -60,6 +62,8 @@ export default {
return 'text/x-php'
} else if (fileName.match(/\.md$/)) {
return 'text/x-markdown'
+ } else if (fileName === 'Spiderfile') {
+ return 'text/x-yaml'
} else {
return 'text'
}
@@ -74,7 +78,7 @@ export default {