diff --git a/backend/entity/spider.go b/backend/entity/spider.go
index 6f8fbee1..616d3bbf 100644
--- a/backend/entity/spider.go
+++ b/backend/entity/spider.go
@@ -6,7 +6,12 @@ type SpiderType struct {
}
type ScrapySettingParam struct {
- Key string
- Value interface{}
- Type string
+ Key string `json:"key"`
+ Value interface{} `json:"value"`
+ Type string `json:"type"`
+}
+
+type ScrapyItem struct {
+ Name string `json:"name"`
+ Fields []string `json:"fields"`
}
diff --git a/backend/main.go b/backend/main.go
index 6c00c797..63c19229 100644
--- a/backend/main.go
+++ b/backend/main.go
@@ -178,6 +178,8 @@ func main() {
authGroup.PUT("/spiders/:id/scrapy/spiders", routes.PutSpiderScrapySpiders) // Scrapy 爬虫创建爬虫
authGroup.GET("/spiders/:id/scrapy/settings", routes.GetSpiderScrapySettings) // Scrapy 爬虫设置
authGroup.POST("/spiders/:id/scrapy/settings", routes.PostSpiderScrapySettings) // Scrapy 爬虫修改设置
+ authGroup.GET("/spiders/:id/scrapy/items", routes.GetSpiderScrapyItems) // Scrapy 爬虫 items
+ authGroup.POST("/spiders/:id/scrapy/items", routes.PostSpiderScrapyItems) // Scrapy 爬虫修改 items
authGroup.POST("/spiders/:id/git/sync", routes.PostSpiderSyncGit) // 爬虫 Git 同步
authGroup.POST("/spiders/:id/git/reset", routes.PostSpiderResetGit) // 爬虫 Git 重置
}
diff --git a/backend/routes/spider.go b/backend/routes/spider.go
index 4d9b824a..7b86469d 100644
--- a/backend/routes/spider.go
+++ b/backend/routes/spider.go
@@ -974,8 +974,9 @@ func GetSpiderScrapySpiders(c *gin.Context) {
func PutSpiderScrapySpiders(c *gin.Context) {
type ReqBody struct {
- Name string `json:"name"`
- Domain string `json:"domain"`
+ Name string `json:"name"`
+ Domain string `json:"domain"`
+ Template string `json:"template"`
}
id := c.Param("id")
@@ -997,7 +998,7 @@ func PutSpiderScrapySpiders(c *gin.Context) {
return
}
- if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain); err != nil {
+ if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain, reqBody.Template); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
@@ -1066,6 +1067,64 @@ func PostSpiderScrapySettings(c *gin.Context) {
})
}
+func GetSpiderScrapyItems(c *gin.Context) {
+ id := c.Param("id")
+
+ if !bson.IsObjectIdHex(id) {
+ HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
+ return
+ }
+
+ spider, err := model.GetSpider(bson.ObjectIdHex(id))
+ if err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ data, err := services.GetScrapyItems(spider)
+ if err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ c.JSON(http.StatusOK, Response{
+ Status: "ok",
+ Message: "success",
+ Data: data,
+ })
+}
+
+func PostSpiderScrapyItems(c *gin.Context) {
+ id := c.Param("id")
+
+ if !bson.IsObjectIdHex(id) {
+ HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
+ return
+ }
+
+ var reqData []entity.ScrapyItem
+ if err := c.ShouldBindJSON(&reqData); err != nil {
+ HandleErrorF(http.StatusBadRequest, c, "invalid request")
+ return
+ }
+
+ spider, err := model.GetSpider(bson.ObjectIdHex(id))
+ if err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ if err := services.SaveScrapyItems(spider, reqData); err != nil {
+ HandleError(http.StatusInternalServerError, c, err)
+ return
+ }
+
+ c.JSON(http.StatusOK, Response{
+ Status: "ok",
+ Message: "success",
+ })
+}
+
func PostSpiderSyncGit(c *gin.Context) {
id := c.Param("id")
diff --git a/backend/services/scrapy.go b/backend/services/scrapy.go
index 52c316c3..5a7c4d4e 100644
--- a/backend/services/scrapy.go
+++ b/backend/services/scrapy.go
@@ -135,11 +135,77 @@ func SaveScrapySettings(s model.Spider, settingsData []entity.ScrapySettingParam
return
}
-func CreateScrapySpider(s model.Spider, name string, domain string) (err error) {
+func GetScrapyItems(s model.Spider) (res []map[string]interface{}, err error) {
var stdout bytes.Buffer
var stderr bytes.Buffer
- cmd := exec.Command("scrapy", "genspider", name, domain)
+ cmd := exec.Command("crawlab", "items")
+ cmd.Dir = s.Src
+ cmd.Stdout = &stdout
+ cmd.Stderr = &stderr
+ if err := cmd.Run(); err != nil {
+ log.Errorf(err.Error())
+ log.Errorf(stderr.String())
+ debug.PrintStack()
+ return res, err
+ }
+
+ if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil {
+ log.Errorf(err.Error())
+ debug.PrintStack()
+ return res, err
+ }
+
+ return res, nil
+}
+
+func SaveScrapyItems(s model.Spider, itemsData []entity.ScrapyItem) (err error) {
+ // 读取 scrapy.cfg
+ cfg, err := goconfig.LoadConfigFile(path.Join(s.Src, "scrapy.cfg"))
+ if err != nil {
+ return
+ }
+ modName, err := cfg.GetValue("settings", "default")
+ if err != nil {
+ return
+ }
+
+ // 定位到 settings.py 文件
+ arr := strings.Split(modName, ".")
+ dirName := arr[0]
+ fileName := "items"
+ filePath := fmt.Sprintf("%s/%s/%s.py", s.Src, dirName, fileName)
+
+ // 生成文件内容
+ content := ""
+ content += "import scrapy\n"
+ content += "\n\n"
+ for _, item := range itemsData {
+ content += fmt.Sprintf("class %s(scrapy.Item):\n", item.Name)
+ for _, field := range item.Fields {
+ content += fmt.Sprintf(" %s = scrapy.Field()\n", field)
+ }
+ content += "\n\n"
+ }
+
+ // 写到 settings.py
+ if err := ioutil.WriteFile(filePath, []byte(content), os.ModePerm); err != nil {
+ return err
+ }
+
+ // 同步到GridFS
+ if err := UploadSpiderToGridFsFromMaster(s); err != nil {
+ return err
+ }
+
+ return
+}
+
+func CreateScrapySpider(s model.Spider, name string, domain string, template string) (err error) {
+ var stdout bytes.Buffer
+ var stderr bytes.Buffer
+
+ cmd := exec.Command("scrapy", "genspider", name, domain, "-t", template)
cmd.Dir = s.Src
cmd.Stdout = &stdout
cmd.Stderr = &stderr
diff --git a/frontend/src/components/Scrapy/SpiderScrapy.vue b/frontend/src/components/Scrapy/SpiderScrapy.vue
index 38ff3e2b..85e1d645 100644
--- a/frontend/src/components/Scrapy/SpiderScrapy.vue
+++ b/frontend/src/components/Scrapy/SpiderScrapy.vue
@@ -13,7 +13,7 @@
type="primary"
size="small"
icon="el-icon-plus"
- @click="onActiveParamAdd"
+ @click="onSettingsActiveParamAdd"
>
{{$t('Add')}}
@@ -58,14 +58,14 @@
size="mini"
icon="el-icon-delete"
circle
- @click="onActiveParamRemove(scope.$index)"
+ @click="onSettingsActiveParamRemove(scope.$index)"
/>
{{$t('Cancel')}}
-
+
{{$t('Confirm')}}
@@ -114,143 +114,235 @@
-
-
-
{{$t('Scrapy Spiders')}}
-
-
- {{$t('Add Spider')}}
-
-
-
-
-
-
-
-
-
{{$t('Settings')}}
-
-
- {{$t('Add')}}
-
-
- {{$t('Save')}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {{JSON.stringify(scope.row.value)}}
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
+ type="primary"
+ size="small"
+ icon="el-icon-plus"
+ @click="onSettingsAdd"
+ >
+ {{$t('Add')}}
+
+
+ {{$t('Save')}}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{JSON.stringify(scope.row.value)}}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{$t('Add Spider')}}
+
+
+
+
+
+
+
+
+
+
+
+
+ {{$t('Add Item')}}
+
+
+ {{$t('Save')}}
+
+
+
+
+
+
+ {{ node.label }}
+
+
+
+
+
+ {{$t('Add Field')}}
+
+
+ {{$t('Remove')}}
+
+
+
+
+
+ {{ node.label }}
+
+
+
+
+
+ {{$t('Remove')}}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -264,7 +356,8 @@ export default {
computed: {
...mapState('spider', [
'spiderForm',
- 'spiderScrapySettings'
+ 'spiderScrapySettings',
+ 'spiderScrapyItems'
]),
activeParamData () {
if (this.activeParam.type === 'array') {
@@ -280,6 +373,24 @@ export default {
})
}
return []
+ },
+ spiderScrapyItemsConverted () {
+ let id = 0
+ return this.spiderScrapyItems.map(d => {
+ d.id = id++
+ d.label = d.name
+ d.level = 1
+ d.isEdit = false
+ d.children = d.fields.map(f => {
+ return {
+ id: id++,
+ label: f,
+ level: 2,
+ isEdit: false
+ }
+ })
+ return d
+ })
}
},
data () {
@@ -293,7 +404,8 @@ export default {
domain: '',
template: 'basic'
},
- isAddSpiderLoading: false
+ isAddSpiderLoading: false,
+ activeTabName: 'settings'
}
},
methods: {
@@ -303,7 +415,7 @@ export default {
onCloseDialog () {
this.dialogVisible = false
},
- onConfirm () {
+ onSettingsConfirm () {
if (this.activeParam.type === 'array') {
this.activeParam.value = this.activeParamData.map(d => d.value)
} else if (this.activeParam.type === 'object') {
@@ -317,20 +429,20 @@ export default {
this.dialogVisible = false
this.$st('爬虫详情', 'Scrapy 设置', '确认编辑参数')
},
- onEditParam (row, index) {
+ onSettingsEditParam (row, index) {
this.activeParam = JSON.parse(JSON.stringify(row))
this.activeParamIndex = index
this.onOpenDialog()
this.$st('爬虫详情', 'Scrapy 设置', '点击编辑参数')
},
- async onSave () {
+ async onSettingsSave () {
const res = await this.$store.dispatch('spider/saveSpiderScrapySettings', this.$route.params.id)
if (!res.data.error) {
this.$message.success(this.$t('Saved successfully'))
}
this.$st('爬虫详情', 'Scrapy 设置', '保存设置')
},
- onAdd () {
+ onSettingsAdd () {
const data = JSON.parse(JSON.stringify(this.spiderScrapySettings))
data.push({
key: '',
@@ -340,13 +452,13 @@ export default {
this.$store.commit('spider/SET_SPIDER_SCRAPY_SETTINGS', data)
this.$st('爬虫详情', 'Scrapy 设置', '添加参数')
},
- onRemove (index) {
+ onSettingsRemove (index) {
const data = JSON.parse(JSON.stringify(this.spiderScrapySettings))
data.splice(index, 1)
this.$store.commit('spider/SET_SPIDER_SCRAPY_SETTINGS', data)
this.$st('爬虫详情', 'Scrapy 设置', '删除参数')
},
- onActiveParamAdd () {
+ onSettingsActiveParamAdd () {
if (this.activeParam.type === 'array') {
this.activeParam.value.push('')
} else if (this.activeParam.type === 'object') {
@@ -357,7 +469,7 @@ export default {
}
this.$st('爬虫详情', 'Scrapy 设置', '添加参数中参数')
},
- onActiveParamRemove (index) {
+ onSettingsActiveParamRemove (index) {
if (this.activeParam.type === 'array') {
this.activeParam.value.splice(index, 1)
} else if (this.activeParam.type === 'object') {
@@ -385,7 +497,7 @@ export default {
})
cb(data)
},
- onParamTypeChange (row) {
+ onSettingsParamTypeChange (row) {
if (row.type === 'number') {
row.value = Number(row.value)
}
@@ -415,6 +527,67 @@ export default {
}
this.isAddSpiderVisible = true
this.$st('爬虫详情', 'Scrapy 设置', '添加爬虫')
+ },
+ onAddItem () {
+ this.spiderScrapyItems.push({
+ name: `Item_${+new Date()}`,
+ fields: [
+ `field_${+new Date()}`
+ ]
+ })
+ this.$st('爬虫详情', 'Scrapy 设置', '添加Item')
+ },
+ removeItem (data, ev) {
+ ev.stopPropagation()
+ for (let i = 0; i < this.spiderScrapyItems.length; i++) {
+ const item = this.spiderScrapyItems[i]
+ if (item.name === data.label) {
+ this.spiderScrapyItems.splice(i, 1)
+ break
+ }
+ }
+ this.$st('爬虫详情', 'Scrapy 设置', '删除Item')
+ },
+ onAddItemField (data, ev) {
+ ev.stopPropagation()
+ for (let i = 0; i < this.spiderScrapyItems.length; i++) {
+ const item = this.spiderScrapyItems[i]
+ if (item.name === data.label) {
+ item.fields.push(`field_${+new Date()}`)
+ break
+ }
+ }
+ this.$st('爬虫详情', 'Scrapy 设置', '添加Items字段')
+ },
+ onRemoveItemField (node, data, ev) {
+ ev.stopPropagation()
+ for (let i = 0; i < this.spiderScrapyItems.length; i++) {
+ const item = this.spiderScrapyItems[i]
+ if (item.name === node.parent.label) {
+ for (let j = 0; j < item.fields.length; j++) {
+ const field = item.fields[j]
+ if (field === data.label) {
+ item.fields.splice(j, 1)
+ break
+ }
+ }
+ }
+ }
+ this.$st('爬虫详情', 'Scrapy 设置', '删除Items字段')
+ },
+ onItemLabelEdit (node, data, ev) {
+ ev.stopPropagation()
+ this.$set(node, 'isEdit', true)
+ setTimeout(() => {
+ this.$refs[`el-input-${data.id}`].focus()
+ }, 0)
+ },
+ async onItemsSave () {
+ const res = await this.$store.dispatch('spider/saveSpiderScrapyItems', this.$route.params.id)
+ if (!res.data.error) {
+ this.$message.success(this.$t('Saved successfully'))
+ }
+ this.$st('爬虫详情', 'Scrapy 设置', '保存Items')
}
}
}
@@ -427,13 +600,8 @@ export default {
}
.spiders {
- float: left;
- display: inline-block;
- width: 240px;
+ width: 100%;
height: 100%;
- border: 1px solid #DCDFE6;
- border-radius: 3px;
- padding: 0 10px;
}
.spiders .title {
@@ -462,13 +630,8 @@ export default {
}
.settings {
- margin-left: 20px;
- border: 1px solid #DCDFE6;
- float: left;
- width: calc(100% - 240px - 20px);
+ width: 100%;
height: 100%;
- border-radius: 3px;
- padding: 0 20px;
}
.settings .title {
@@ -504,4 +667,41 @@ export default {
.settings >>> .top-action-wrapper .el-button {
margin-left: 10px;
}
+
+ .items {
+ width: 100%;
+ height: 100%;
+ }
+
+ .items >>> .action-wrapper {
+ text-align: right;
+ padding-bottom: 10px;
+ border-bottom: 1px solid #DCDFE6;
+ }
+
+ .items >>> .custom-tree-node {
+ flex: 1;
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ font-size: 14px;
+ padding-right: 8px;
+ min-height: 36px;
+ }
+
+ .items >>> .el-tree-node__content {
+ height: auto;
+ }
+
+ .items >>> .custom-tree-node .label i.el-icon-edit {
+ visibility: hidden;
+ }
+
+ .items >>> .custom-tree-node:hover .label i.el-icon-edit {
+ visibility: visible;
+ }
+
+ .items >>> .custom-tree-node .el-input {
+ width: 240px;
+ }
diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js
index 5c7e03e2..4d33bb18 100644
--- a/frontend/src/i18n/zh.js
+++ b/frontend/src/i18n/zh.js
@@ -217,6 +217,8 @@ export default {
'Long Task': '长任务',
'Running Task Count': '运行中的任务数',
'Running Tasks': '运行中的任务',
+ 'Item Name': 'Item 名称',
+ 'Add Item': '添加 Item',
// 爬虫列表
'Name': '名称',
diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js
index c5a84ef1..9fbef204 100644
--- a/frontend/src/store/modules/spider.js
+++ b/frontend/src/store/modules/spider.js
@@ -13,6 +13,9 @@ const state = {
// spider scrapy settings
spiderScrapySettings: [],
+ // spider scrapy items
+ spiderScrapyItems: [],
+
// node to deploy/run
activeNode: {},
@@ -98,6 +101,9 @@ const mutations = {
},
SET_SPIDER_SCRAPY_SETTINGS (state, value) {
state.spiderScrapySettings = value
+ },
+ SET_SPIDER_SCRAPY_ITEMS (state, value) {
+ state.spiderScrapyItems = value
}
}
@@ -150,6 +156,13 @@ const actions = {
async saveSpiderScrapySettings ({ state }, id) {
return request.post(`/spiders/${id}/scrapy/settings`, state.spiderScrapySettings)
},
+ async getSpiderScrapyItems ({ state, commit }, id) {
+ const res = await request.get(`/spiders/${id}/scrapy/items`)
+ commit('SET_SPIDER_SCRAPY_ITEMS', res.data.data)
+ },
+ async saveSpiderScrapyItems ({ state }, id) {
+ return request.post(`/spiders/${id}/scrapy/items`, state.spiderScrapyItems)
+ },
addSpiderScrapySpider ({ state }, payload) {
const { id, form } = payload
return request.put(`/spiders/${id}/scrapy/spiders`, form)
diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue
index a55a5212..70e4128b 100644
--- a/frontend/src/views/spider/SpiderDetail.vue
+++ b/frontend/src/views/spider/SpiderDetail.vue
@@ -190,7 +190,7 @@ export default {
}
},
methods: {
- onTabClick (tab) {
+ async onTabClick (tab) {
if (this.activeTabName === 'analytics') {
setTimeout(() => {
this.$refs['spider-stats'].update()
@@ -207,12 +207,11 @@ export default {
}, 100)
}
} else if (this.activeTabName === 'scrapy-settings') {
- this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
- this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id)
+ await this.getScrapyData()
} else if (this.activeTabName === 'files') {
- this.$store.dispatch('spider/getFileTree')
+ await this.$store.dispatch('spider/getFileTree')
if (this.currentPath) {
- this.$store.dispatch('file/getFileContent', { path: this.currentPath })
+ await this.$store.dispatch('file/getFileContent', { path: this.currentPath })
}
}
this.$st.sendEv('爬虫详情', '切换标签', tab.name)
@@ -220,6 +219,11 @@ export default {
onSpiderChange (id) {
this.$router.push(`/spiders/${id}`)
this.$st.sendEv('爬虫详情', '切换爬虫')
+ },
+ async getScrapyData () {
+ await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
+ await this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id)
+ await this.$store.dispatch('spider/getSpiderScrapyItems', this.$route.params.id)
}
},
async created () {
@@ -237,12 +241,6 @@ export default {
// get spider list
await this.$store.dispatch('spider/getSpiderList')
-
- // get scrapy spider names
- if (this.spiderForm.is_scrapy) {
- await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
- await this.$store.dispatch('spider/getSpiderScrapySettings', this.$route.params.id)
- }
},
mounted () {
if (!this.$utils.tour.isFinishedTour('spider-detail')) {