From d931f9af8e2613f0e040b9d082c38208255e9b52 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Fri, 21 Feb 2020 15:07:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E5=85=A5scrapy=20items.py=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/entity/spider.go | 11 +- backend/main.go | 2 + backend/routes/spider.go | 65 ++- backend/services/scrapy.go | 70 ++- .../src/components/Scrapy/SpiderScrapy.vue | 522 ++++++++++++------ frontend/src/i18n/zh.js | 2 + frontend/src/store/modules/spider.js | 13 + frontend/src/views/spider/SpiderDetail.vue | 20 +- 8 files changed, 525 insertions(+), 180 deletions(-) diff --git a/backend/entity/spider.go b/backend/entity/spider.go index 6f8fbee1..616d3bbf 100644 --- a/backend/entity/spider.go +++ b/backend/entity/spider.go @@ -6,7 +6,12 @@ type SpiderType struct { } type ScrapySettingParam struct { - Key string - Value interface{} - Type string + Key string `json:"key"` + Value interface{} `json:"value"` + Type string `json:"type"` +} + +type ScrapyItem struct { + Name string `json:"name"` + Fields []string `json:"fields"` } diff --git a/backend/main.go b/backend/main.go index 6c00c797..63c19229 100644 --- a/backend/main.go +++ b/backend/main.go @@ -178,6 +178,8 @@ func main() { authGroup.PUT("/spiders/:id/scrapy/spiders", routes.PutSpiderScrapySpiders) // Scrapy 爬虫创建爬虫 authGroup.GET("/spiders/:id/scrapy/settings", routes.GetSpiderScrapySettings) // Scrapy 爬虫设置 authGroup.POST("/spiders/:id/scrapy/settings", routes.PostSpiderScrapySettings) // Scrapy 爬虫修改设置 + authGroup.GET("/spiders/:id/scrapy/items", routes.GetSpiderScrapyItems) // Scrapy 爬虫 items + authGroup.POST("/spiders/:id/scrapy/items", routes.PostSpiderScrapyItems) // Scrapy 爬虫修改 items authGroup.POST("/spiders/:id/git/sync", routes.PostSpiderSyncGit) // 爬虫 Git 同步 authGroup.POST("/spiders/:id/git/reset", routes.PostSpiderResetGit) // 爬虫 Git 重置 } diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 4d9b824a..7b86469d 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -974,8 +974,9 @@ func GetSpiderScrapySpiders(c *gin.Context) { func PutSpiderScrapySpiders(c *gin.Context) { type ReqBody struct { - Name string `json:"name"` - Domain string `json:"domain"` + Name string `json:"name"` + Domain string `json:"domain"` + Template string `json:"template"` } id := c.Param("id") @@ -997,7 +998,7 @@ func PutSpiderScrapySpiders(c *gin.Context) { return } - if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain); err != nil { + if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain, reqBody.Template); err != nil { HandleError(http.StatusInternalServerError, c, err) return } @@ -1066,6 +1067,64 @@ func PostSpiderScrapySettings(c *gin.Context) { }) } +func GetSpiderScrapyItems(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") + return + } + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + data, err := services.GetScrapyItems(spider) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: data, + }) +} + +func PostSpiderScrapyItems(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") + return + } + + var reqData []entity.ScrapyItem + if err := c.ShouldBindJSON(&reqData); err != nil { + HandleErrorF(http.StatusBadRequest, c, "invalid request") + return + } + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + if err := services.SaveScrapyItems(spider, reqData); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + func PostSpiderSyncGit(c *gin.Context) { id := c.Param("id") diff --git a/backend/services/scrapy.go b/backend/services/scrapy.go index 52c316c3..5a7c4d4e 100644 --- a/backend/services/scrapy.go +++ b/backend/services/scrapy.go @@ -135,11 +135,77 @@ func SaveScrapySettings(s model.Spider, settingsData []entity.ScrapySettingParam return } -func CreateScrapySpider(s model.Spider, name string, domain string) (err error) { +func GetScrapyItems(s model.Spider) (res []map[string]interface{}, err error) { var stdout bytes.Buffer var stderr bytes.Buffer - cmd := exec.Command("scrapy", "genspider", name, domain) + cmd := exec.Command("crawlab", "items") + cmd.Dir = s.Src + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + log.Errorf(err.Error()) + log.Errorf(stderr.String()) + debug.PrintStack() + return res, err + } + + if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return res, err + } + + return res, nil +} + +func SaveScrapyItems(s model.Spider, itemsData []entity.ScrapyItem) (err error) { + // 读取 scrapy.cfg + cfg, err := goconfig.LoadConfigFile(path.Join(s.Src, "scrapy.cfg")) + if err != nil { + return + } + modName, err := cfg.GetValue("settings", "default") + if err != nil { + return + } + + // 定位到 settings.py 文件 + arr := strings.Split(modName, ".") + dirName := arr[0] + fileName := "items" + filePath := fmt.Sprintf("%s/%s/%s.py", s.Src, dirName, fileName) + + // 生成文件内容 + content := "" + content += "import scrapy\n" + content += "\n\n" + for _, item := range itemsData { + content += fmt.Sprintf("class %s(scrapy.Item):\n", item.Name) + for _, field := range item.Fields { + content += fmt.Sprintf(" %s = scrapy.Field()\n", field) + } + content += "\n\n" + } + + // 写到 settings.py + if err := ioutil.WriteFile(filePath, []byte(content), os.ModePerm); err != nil { + return err + } + + // 同步到GridFS + if err := UploadSpiderToGridFsFromMaster(s); err != nil { + return err + } + + return +} + +func CreateScrapySpider(s model.Spider, name string, domain string, template string) (err error) { + var stdout bytes.Buffer + var stderr bytes.Buffer + + cmd := exec.Command("scrapy", "genspider", name, domain, "-t", template) cmd.Dir = s.Src cmd.Stdout = &stdout cmd.Stderr = &stderr diff --git a/frontend/src/components/Scrapy/SpiderScrapy.vue b/frontend/src/components/Scrapy/SpiderScrapy.vue index 38ff3e2b..85e1d645 100644 --- a/frontend/src/components/Scrapy/SpiderScrapy.vue +++ b/frontend/src/components/Scrapy/SpiderScrapy.vue @@ -13,7 +13,7 @@ type="primary" size="small" icon="el-icon-plus" - @click="onActiveParamAdd" + @click="onSettingsActiveParamAdd" > {{$t('Add')}} @@ -58,14 +58,14 @@ size="mini" icon="el-icon-delete" circle - @click="onActiveParamRemove(scope.$index)" + @click="onSettingsActiveParamRemove(scope.$index)" /> @@ -114,143 +114,235 @@ - -
-

{{$t('Scrapy Spiders')}}

-
- - {{$t('Add Spider')}} - -
- -
- - - -
-

{{$t('Settings')}}

-
- - {{$t('Add')}} - - - {{$t('Save')}} - -
- - - - - - - - - - - - - - -
- + type="primary" + size="small" + icon="el-icon-plus" + @click="onSettingsAdd" + > + {{$t('Add')}} + + + {{$t('Save')}} + + + + + + + + + + + + + + + + + + + + + + +
+
+ + {{$t('Add Spider')}} + +
+
    +
  • + + {{s}} +
  • +
+
+
+ + + + +
+
+ + {{$t('Add Item')}} + + + {{$t('Save')}} + +
+ + + + + + +
+
+ + + + + + + + @@ -264,7 +356,8 @@ export default { computed: { ...mapState('spider', [ 'spiderForm', - 'spiderScrapySettings' + 'spiderScrapySettings', + 'spiderScrapyItems' ]), activeParamData () { if (this.activeParam.type === 'array') { @@ -280,6 +373,24 @@ export default { }) } return [] + }, + spiderScrapyItemsConverted () { + let id = 0 + return this.spiderScrapyItems.map(d => { + d.id = id++ + d.label = d.name + d.level = 1 + d.isEdit = false + d.children = d.fields.map(f => { + return { + id: id++, + label: f, + level: 2, + isEdit: false + } + }) + return d + }) } }, data () { @@ -293,7 +404,8 @@ export default { domain: '', template: 'basic' }, - isAddSpiderLoading: false + isAddSpiderLoading: false, + activeTabName: 'settings' } }, methods: { @@ -303,7 +415,7 @@ export default { onCloseDialog () { this.dialogVisible = false }, - onConfirm () { + onSettingsConfirm () { if (this.activeParam.type === 'array') { this.activeParam.value = this.activeParamData.map(d => d.value) } else if (this.activeParam.type === 'object') { @@ -317,20 +429,20 @@ export default { this.dialogVisible = false this.$st('爬虫详情', 'Scrapy 设置', '确认编辑参数') }, - onEditParam (row, index) { + onSettingsEditParam (row, index) { this.activeParam = JSON.parse(JSON.stringify(row)) this.activeParamIndex = index this.onOpenDialog() this.$st('爬虫详情', 'Scrapy 设置', '点击编辑参数') }, - async onSave () { + async onSettingsSave () { const res = await this.$store.dispatch('spider/saveSpiderScrapySettings', this.$route.params.id) if (!res.data.error) { this.$message.success(this.$t('Saved successfully')) } this.$st('爬虫详情', 'Scrapy 设置', '保存设置') }, - onAdd () { + onSettingsAdd () { const data = JSON.parse(JSON.stringify(this.spiderScrapySettings)) data.push({ key: '', @@ -340,13 +452,13 @@ export default { this.$store.commit('spider/SET_SPIDER_SCRAPY_SETTINGS', data) this.$st('爬虫详情', 'Scrapy 设置', '添加参数') }, - onRemove (index) { + onSettingsRemove (index) { const data = JSON.parse(JSON.stringify(this.spiderScrapySettings)) data.splice(index, 1) this.$store.commit('spider/SET_SPIDER_SCRAPY_SETTINGS', data) this.$st('爬虫详情', 'Scrapy 设置', '删除参数') }, - onActiveParamAdd () { + onSettingsActiveParamAdd () { if (this.activeParam.type === 'array') { this.activeParam.value.push('') } else if (this.activeParam.type === 'object') { @@ -357,7 +469,7 @@ export default { } this.$st('爬虫详情', 'Scrapy 设置', '添加参数中参数') }, - onActiveParamRemove (index) { + onSettingsActiveParamRemove (index) { if (this.activeParam.type === 'array') { this.activeParam.value.splice(index, 1) } else if (this.activeParam.type === 'object') { @@ -385,7 +497,7 @@ export default { }) cb(data) }, - onParamTypeChange (row) { + onSettingsParamTypeChange (row) { if (row.type === 'number') { row.value = Number(row.value) } @@ -415,6 +527,67 @@ export default { } this.isAddSpiderVisible = true this.$st('爬虫详情', 'Scrapy 设置', '添加爬虫') + }, + onAddItem () { + this.spiderScrapyItems.push({ + name: `Item_${+new Date()}`, + fields: [ + `field_${+new Date()}` + ] + }) + this.$st('爬虫详情', 'Scrapy 设置', '添加Item') + }, + removeItem (data, ev) { + ev.stopPropagation() + for (let i = 0; i < this.spiderScrapyItems.length; i++) { + const item = this.spiderScrapyItems[i] + if (item.name === data.label) { + this.spiderScrapyItems.splice(i, 1) + break + } + } + this.$st('爬虫详情', 'Scrapy 设置', '删除Item') + }, + onAddItemField (data, ev) { + ev.stopPropagation() + for (let i = 0; i < this.spiderScrapyItems.length; i++) { + const item = this.spiderScrapyItems[i] + if (item.name === data.label) { + item.fields.push(`field_${+new Date()}`) + break + } + } + this.$st('爬虫详情', 'Scrapy 设置', '添加Items字段') + }, + onRemoveItemField (node, data, ev) { + ev.stopPropagation() + for (let i = 0; i < this.spiderScrapyItems.length; i++) { + const item = this.spiderScrapyItems[i] + if (item.name === node.parent.label) { + for (let j = 0; j < item.fields.length; j++) { + const field = item.fields[j] + if (field === data.label) { + item.fields.splice(j, 1) + break + } + } + } + } + this.$st('爬虫详情', 'Scrapy 设置', '删除Items字段') + }, + onItemLabelEdit (node, data, ev) { + ev.stopPropagation() + this.$set(node, 'isEdit', true) + setTimeout(() => { + this.$refs[`el-input-${data.id}`].focus() + }, 0) + }, + async onItemsSave () { + const res = await this.$store.dispatch('spider/saveSpiderScrapyItems', this.$route.params.id) + if (!res.data.error) { + this.$message.success(this.$t('Saved successfully')) + } + this.$st('爬虫详情', 'Scrapy 设置', '保存Items') } } } @@ -427,13 +600,8 @@ export default { } .spiders { - float: left; - display: inline-block; - width: 240px; + width: 100%; height: 100%; - border: 1px solid #DCDFE6; - border-radius: 3px; - padding: 0 10px; } .spiders .title { @@ -462,13 +630,8 @@ export default { } .settings { - margin-left: 20px; - border: 1px solid #DCDFE6; - float: left; - width: calc(100% - 240px - 20px); + width: 100%; height: 100%; - border-radius: 3px; - padding: 0 20px; } .settings .title { @@ -504,4 +667,41 @@ export default { .settings >>> .top-action-wrapper .el-button { margin-left: 10px; } + + .items { + width: 100%; + height: 100%; + } + + .items >>> .action-wrapper { + text-align: right; + padding-bottom: 10px; + border-bottom: 1px solid #DCDFE6; + } + + .items >>> .custom-tree-node { + flex: 1; + display: flex; + align-items: center; + justify-content: space-between; + font-size: 14px; + padding-right: 8px; + min-height: 36px; + } + + .items >>> .el-tree-node__content { + height: auto; + } + + .items >>> .custom-tree-node .label i.el-icon-edit { + visibility: hidden; + } + + .items >>> .custom-tree-node:hover .label i.el-icon-edit { + visibility: visible; + } + + .items >>> .custom-tree-node .el-input { + width: 240px; + } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 5c7e03e2..4d33bb18 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -217,6 +217,8 @@ export default { 'Long Task': '长任务', 'Running Task Count': '运行中的任务数', 'Running Tasks': '运行中的任务', + 'Item Name': 'Item 名称', + 'Add Item': '添加 Item', // 爬虫列表 'Name': '名称', diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index c5a84ef1..9fbef204 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -13,6 +13,9 @@ const state = { // spider scrapy settings spiderScrapySettings: [], + // spider scrapy items + spiderScrapyItems: [], + // node to deploy/run activeNode: {}, @@ -98,6 +101,9 @@ const mutations = { }, SET_SPIDER_SCRAPY_SETTINGS (state, value) { state.spiderScrapySettings = value + }, + SET_SPIDER_SCRAPY_ITEMS (state, value) { + state.spiderScrapyItems = value } } @@ -150,6 +156,13 @@ const actions = { async saveSpiderScrapySettings ({ state }, id) { return request.post(`/spiders/${id}/scrapy/settings`, state.spiderScrapySettings) }, + async getSpiderScrapyItems ({ state, commit }, id) { + const res = await request.get(`/spiders/${id}/scrapy/items`) + commit('SET_SPIDER_SCRAPY_ITEMS', res.data.data) + }, + async saveSpiderScrapyItems ({ state }, id) { + return request.post(`/spiders/${id}/scrapy/items`, state.spiderScrapyItems) + }, addSpiderScrapySpider ({ state }, payload) { const { id, form } = payload return request.put(`/spiders/${id}/scrapy/spiders`, form) diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index a55a5212..70e4128b 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -190,7 +190,7 @@ export default { } }, methods: { - onTabClick (tab) { + async onTabClick (tab) { if (this.activeTabName === 'analytics') { setTimeout(() => { this.$refs['spider-stats'].update() @@ -207,12 +207,11 @@ export default { }, 100) } } else if (this.activeTabName === 'scrapy-settings') { - this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) - this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id) + await this.getScrapyData() } else if (this.activeTabName === 'files') { - this.$store.dispatch('spider/getFileTree') + await this.$store.dispatch('spider/getFileTree') if (this.currentPath) { - this.$store.dispatch('file/getFileContent', { path: this.currentPath }) + await this.$store.dispatch('file/getFileContent', { path: this.currentPath }) } } this.$st.sendEv('爬虫详情', '切换标签', tab.name) @@ -220,6 +219,11 @@ export default { onSpiderChange (id) { this.$router.push(`/spiders/${id}`) this.$st.sendEv('爬虫详情', '切换爬虫') + }, + async getScrapyData () { + await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) + await this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id) + await this.$store.dispatch('spider/getSpiderScrapyItems', this.$route.params.id) } }, async created () { @@ -237,12 +241,6 @@ export default { // get spider list await this.$store.dispatch('spider/getSpiderList') - - // get scrapy spider names - if (this.spiderForm.is_scrapy) { - await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) - await this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id) - } }, mounted () { if (!this.$utils.tour.isFinishedTour('spider-detail')) {