From d46f43b05b517c7bab9af04832be484bcb606ca1 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Sat, 15 Feb 2020 18:28:37 +0800 Subject: [PATCH 01/17] =?UTF-8?q?=E5=8A=A0=E5=85=A5scrapy=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/spider.go | 4 + backend/routes/spider.go | 32 ++++++- backend/services/scrapy.go | 32 +++++++ backend/services/task.go | 1 - .../components/Common/CrawlConfirmDialog.vue | 83 ++++++++++++++----- .../components/InfoView/SpiderInfoView.vue | 51 +++++++----- frontend/src/components/Status/StatusTag.vue | 4 + frontend/src/i18n/zh.js | 2 + frontend/src/store/modules/spider.js | 5 ++ frontend/src/views/schedule/ScheduleList.vue | 34 +++++++- frontend/src/views/spider/SpiderDetail.vue | 5 ++ frontend/src/views/spider/SpiderList.vue | 18 ++++ 12 files changed, 222 insertions(+), 49 deletions(-) create mode 100644 backend/services/scrapy.go diff --git a/backend/model/spider.go b/backend/model/spider.go index c1544259..95a1c358 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -37,6 +37,10 @@ type Spider struct { // 自定义爬虫 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 + // Scrapy 爬虫(属于自定义爬虫) + IsScrapy bool `json:"is_scrapy" bson:"is_scrapy"` // 是否为 Scrapy 爬虫 + SpiderNames []string `json:"spider_names" bson:"spider_names"` // 爬虫名称列表 + // 可配置爬虫 Template string `json:"template" bson:"template"` // Spiderfile模版 diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 1abb6463..18ed5071 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -88,15 +88,16 @@ func GetSpider(c *gin.Context) { HandleErrorF(http.StatusBadRequest, c, "invalid id") } - result, err := model.GetSpider(bson.ObjectIdHex(id)) + spider, err := model.GetSpider(bson.ObjectIdHex(id)) if err != nil { HandleError(http.StatusInternalServerError, c, err) return } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: result, + Data: spider, }) } @@ -901,3 +902,30 @@ func GetSpiderSchedules(c *gin.Context) { Data: list, }) } + +func GetSpiderScrapySpiders(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") + return + } + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + spiderNames, err := services.GetScrapySpiderNames(spider) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: spiderNames, + }) +} diff --git a/backend/services/scrapy.go b/backend/services/scrapy.go new file mode 100644 index 00000000..2e984e6d --- /dev/null +++ b/backend/services/scrapy.go @@ -0,0 +1,32 @@ +package services + +import ( + "bytes" + "crawlab/model" + "os/exec" + "strings" +) + +func GetScrapySpiderNames(s model.Spider) ([]string, error) { + var stdout bytes.Buffer + var stderr bytes.Buffer + + cmd := exec.Command("scrapy", "list") + cmd.Dir = s.Src + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return []string{}, err + } + + spiderNames := strings.Split(stdout.String(), "\n") + + var res []string + for _, sn := range spiderNames { + if sn != "" { + res = append(res, sn) + } + } + + return res, nil +} diff --git a/backend/services/task.go b/backend/services/task.go index c71d344f..e940b325 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -243,7 +243,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e if runtime.GOOS == constants.Windows { cmd = exec.Command("cmd", "/C", cmdStr) } else { - cmd = exec.Command("") cmd = exec.Command("sh", "-c", cmdStr) } diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index 6739ae9e..de067295 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -3,11 +3,11 @@ :title="$t('Notification')" :visible="visible" class="crawl-confirm-dialog" - width="480px" + width="540px" :before-close="beforeClose" >
{{$t('Are you sure to run this spider?')}}
- + @@ -26,6 +26,16 @@ /> + + + + + @@ -44,14 +54,17 @@ diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 801c0fce..cd9e9f95 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -36,24 +36,19 @@ - - - + - - - - @@ -63,6 +58,13 @@ + + + @@ -159,16 +161,14 @@ export default { this.$st.sendEv('爬虫详情', '概览', '点击运行') }, onSave () { - this.$refs['spiderForm'].validate(res => { - if (res) { - this.$store.dispatch('spider/editSpider') - .then(() => { - this.$message.success(this.$t('Spider info has been saved successfully')) - }) - .catch(error => { - this.$message.error(error) - }) + this.$refs['spiderForm'].validate(async valid => { + if (!valid) return + const res = await this.$store.dispatch('spider/editSpider') + if (!res.data.error) { + this.$message.success(this.$t('Spider info has been saved successfully')) } + await this.$store.dispatch('spider/getSpiderData', this.$route.params.id) + await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) }) this.$st.sendEv('爬虫详情', '概览', '保存') }, @@ -197,6 +197,11 @@ export default { }, onUploadError () { this.uploadLoading = false + }, + onIsScrapyChange (value) { + if (value) { + this.spiderForm.cmd = 'scrapy crawl' + } } }, async created () { diff --git a/frontend/src/components/Status/StatusTag.vue b/frontend/src/components/Status/StatusTag.vue index befe2ab3..29f53fcd 100644 --- a/frontend/src/components/Status/StatusTag.vue +++ b/frontend/src/components/Status/StatusTag.vue @@ -47,6 +47,10 @@ export default { return 'el-icon-loading' } else if (this.status === 'error') { return 'el-icon-error' + } else if (this.status === 'cancelled') { + return 'el-icon-video-pause' + } else if (this.status === 'abnormal') { + return 'el-icon-question' } return '' } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 419fcd9d..67a5437f 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -182,6 +182,8 @@ export default { 'Settings': '设置', 'Display Name': '显示名称', 'Template': '模版', + 'Is Scrapy': '是否为 Scrapy', + 'Scrapy Spider': 'Scrapy 爬虫', // 爬虫列表 'Name': '名称', diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index b6dcf223..dbb54514 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -116,6 +116,11 @@ const actions = { commit('SET_SPIDER_FORM', data) }) }, + async getSpiderScrapySpiders ({ state, commit }, id) { + const res = await request.get(`/spiders/${id}/scrapy/spiders`) + state.spiderForm.spider_names = res.data.data + commit('SET_SPIDER_FORM', state.spiderForm) + }, crawlSpider ({ state, dispatch }, payload) { const { spiderId, runType, nodeIds, param } = payload return request.put(`/tasks`, { diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index 3b0d85da..f987386d 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -83,6 +83,17 @@ + + + + + + + + Date: Sat, 15 Feb 2020 21:25:21 +0800 Subject: [PATCH 02/17] =?UTF-8?q?=E5=8A=A0=E5=85=A5scrapy=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E9=80=89=E6=8B=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/main.go | 39 +++++++++--------- backend/model/schedule.go | 25 +++++------ backend/routes/task.go | 6 +-- backend/services/schedule.go | 28 +++++++++++-- .../components/Common/CrawlConfirmDialog.vue | 41 ++++++++++++++++--- frontend/src/i18n/zh.js | 1 + frontend/src/views/schedule/ScheduleList.vue | 19 ++++++++- 7 files changed, 115 insertions(+), 44 deletions(-) diff --git a/backend/main.go b/backend/main.go index ab0d0e7b..19c111d7 100644 --- a/backend/main.go +++ b/backend/main.go @@ -154,25 +154,26 @@ func main() { } // 爬虫 { - authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表 - authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 - authGroup.PUT("/spiders", routes.PutSpider) // 添加爬虫 - authGroup.POST("/spiders", routes.UploadSpider) // 上传爬虫 - authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 - authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 - authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID) - authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 - authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 - authGroup.GET("/spiders/:id/file/tree", routes.GetSpiderFileTree) // 爬虫文件目录树读取 - authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 - authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫文件更改 - authGroup.PUT("/spiders/:id/file", routes.PutSpiderFile) // 爬虫文件创建 - authGroup.PUT("/spiders/:id/dir", routes.PutSpiderDir) // 爬虫目录创建 - authGroup.DELETE("/spiders/:id/file", routes.DeleteSpiderFile) // 爬虫文件删除 - authGroup.POST("/spiders/:id/file/rename", routes.RenameSpiderFile) // 爬虫文件重命名 - authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 - authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 - authGroup.GET("/spiders/:id/schedules", routes.GetSpiderSchedules) // 爬虫定时任务 + authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表 + authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 + authGroup.PUT("/spiders", routes.PutSpider) // 添加爬虫 + authGroup.POST("/spiders", routes.UploadSpider) // 上传爬虫 + authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 + authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 + authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID) + authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 + authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 + authGroup.GET("/spiders/:id/file/tree", routes.GetSpiderFileTree) // 爬虫文件目录树读取 + authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 + authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫文件更改 + authGroup.PUT("/spiders/:id/file", routes.PutSpiderFile) // 爬虫文件创建 + authGroup.PUT("/spiders/:id/dir", routes.PutSpiderDir) // 爬虫目录创建 + authGroup.DELETE("/spiders/:id/file", routes.DeleteSpiderFile) // 爬虫文件删除 + authGroup.POST("/spiders/:id/file/rename", routes.RenameSpiderFile) // 爬虫文件重命名 + authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 + authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 + authGroup.GET("/spiders/:id/schedules", routes.GetSpiderSchedules) // 爬虫定时任务 + authGroup.GET("/spiders/:id/scrapy/spiders", routes.GetSpiderScrapySpiders) // Scrapy 爬虫名称列表 } // 可配置爬虫 { diff --git a/backend/model/schedule.go b/backend/model/schedule.go index d98dabf6..1b988f2a 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -12,18 +12,19 @@ import ( ) type Schedule struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Name string `json:"name" bson:"name"` - Description string `json:"description" bson:"description"` - SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` - Cron string `json:"cron" bson:"cron"` - EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` - Param string `json:"param" bson:"param"` - RunType string `json:"run_type" bson:"run_type"` - NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"` - Status string `json:"status" bson:"status"` - Enabled bool `json:"enabled" bson:"enabled"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` + Id bson.ObjectId `json:"_id" bson:"_id"` + Name string `json:"name" bson:"name"` + Description string `json:"description" bson:"description"` + SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` + Cron string `json:"cron" bson:"cron"` + EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` + Param string `json:"param" bson:"param"` + RunType string `json:"run_type" bson:"run_type"` + NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"` + Status string `json:"status" bson:"status"` + Enabled bool `json:"enabled" bson:"enabled"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + ScrapySpider string `json:"scrapy_spider" bson:"scrapy_spider"` // 前端展示 SpiderName string `json:"spider_name" bson:"spider_name"` diff --git a/backend/routes/task.go b/backend/routes/task.go index d1071881..ae1c431c 100644 --- a/backend/routes/task.go +++ b/backend/routes/task.go @@ -118,7 +118,7 @@ func PutTask(c *gin.Context) { UserId: services.GetCurrentUser(c).Id, } - id, err := services.AddTask(t); + id, err := services.AddTask(t) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -133,7 +133,7 @@ func PutTask(c *gin.Context) { Param: reqBody.Param, UserId: services.GetCurrentUser(c).Id, } - id, err := services.AddTask(t); + id, err := services.AddTask(t) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -149,7 +149,7 @@ func PutTask(c *gin.Context) { UserId: services.GetCurrentUser(c).Id, } - id, err := services.AddTask(t); + id, err := services.AddTask(t) if err != nil { HandleError(http.StatusInternalServerError, c, err) return diff --git a/backend/services/schedule.go b/backend/services/schedule.go index 1bf70e8a..ab47d7e3 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -22,6 +22,28 @@ func AddScheduleTask(s model.Schedule) func() { // 生成任务ID id := uuid.NewV4() + // 参数 + var param string + + // 爬虫 + spider, err := model.GetSpider(s.SpiderId) + if err != nil { + return + } + + // scrapy 爬虫 + if spider.IsScrapy { + if s.ScrapySpider == "" { + log.Errorf("scrapy spider is not set") + debug.PrintStack() + return + } + + param = s.ScrapySpider + " " + s.Param + } else { + param = s.Param + } + if s.RunType == constants.RunTypeAllNodes { // 所有节点 nodes, err := model.GetNodeList(nil) @@ -33,7 +55,7 @@ func AddScheduleTask(s model.Schedule) func() { Id: id.String(), SpiderId: s.SpiderId, NodeId: node.Id, - Param: s.Param, + Param: param, UserId: s.UserId, } @@ -46,7 +68,7 @@ func AddScheduleTask(s model.Schedule) func() { t := model.Task{ Id: id.String(), SpiderId: s.SpiderId, - Param: s.Param, + Param: param, UserId: s.UserId, } if _, err := AddTask(t); err != nil { @@ -61,7 +83,7 @@ func AddScheduleTask(s model.Schedule) func() { Id: id.String(), SpiderId: s.SpiderId, NodeId: nodeId, - Param: s.Param, + Param: param, UserId: s.UserId, } diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index de067295..a10442f3 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -3,11 +3,11 @@ :title="$t('Notification')" :visible="visible" class="crawl-confirm-dialog" - width="540px" + width="580px" :before-close="beforeClose" >
{{$t('Are you sure to run this spider?')}}
- + @@ -36,8 +36,23 @@ /> - - + + + + + + + + + + +
@@ -84,6 +99,7 @@ export default { runType: 'random', nodeIds: undefined, spider: undefined, + scrapy_log_level: 'INFO', param: '', nodeList: [] }, @@ -120,7 +136,7 @@ export default { const res = await this.$store.dispatch('spider/crawlSpider', { spiderId: this.spiderId, nodeIds: this.form.nodeIds, - param: this.form.param + ' ' + this.form.spider, + param: `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}`, runType: this.form.runType }) @@ -177,4 +193,19 @@ export default { .crawl-confirm-dialog >>> .disclaimer-wrapper a { color: #409eff; } + + .crawl-confirm-dialog >>> .param-input { + width: calc(100% - 56px); + } + .crawl-confirm-dialog >>> .param-input .el-input__inner { + border-top-right-radius: 0; + border-bottom-right-radius: 0; + border-right: none; + } + + .crawl-confirm-dialog >>> .param-btn { + width: 56px; + border-top-left-radius: 0; + border-bottom-left-radius: 0; + } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 67a5437f..c6a8afa7 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -184,6 +184,7 @@ export default { 'Template': '模版', 'Is Scrapy': '是否为 Scrapy', 'Scrapy Spider': 'Scrapy 爬虫', + 'Scrapy Log Level': 'Scrapy 日志等级', // 爬虫列表 'Name': '名称', diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index f987386d..3646f403 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -94,6 +94,15 @@ /> + + + + + + + +