diff --git a/backend/main.go b/backend/main.go index 5edf4e6c..7ddfdbf8 100644 --- a/backend/main.go +++ b/backend/main.go @@ -162,6 +162,7 @@ func main() { authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID) + authGroup.DELETE("/spiders", routes.DeleteSelectedSpider) // 删除选择的爬虫 authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 authGroup.GET("/spiders/:id/file/tree", routes.GetSpiderFileTree) // 爬虫文件目录树读取 @@ -184,6 +185,8 @@ func main() { authGroup.GET("/spiders/:id/scrapy/spider/filepath", routes.GetSpiderScrapySpiderFilepath) // Scrapy 爬虫 pipelines authGroup.POST("/spiders/:id/git/sync", routes.PostSpiderSyncGit) // 爬虫 Git 同步 authGroup.POST("/spiders/:id/git/reset", routes.PostSpiderResetGit) // 爬虫 Git 重置 + authGroup.POST("/spiders-cancel", routes.CancelSelectedSpider) // 停止所选爬虫任务 + authGroup.POST("/spiders-run", routes.RunSelectedSpider) // 运行所选爬虫 } // 可配置爬虫 { @@ -201,8 +204,8 @@ func main() { authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 authGroup.PUT("/tasks", routes.PutTask) // 派发任务 authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务 - authGroup.DELETE("/tasks_multiple", routes.DeleteMultipleTask) // 删除多个任务 - authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) //删除指定状态的任务 + authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务 + authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务 authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务 authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志 authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果 diff --git a/backend/model/spider.go b/backend/model/spider.go index a741fc89..49f735a4 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -279,6 +279,8 @@ func RemoveSpider(id bson.ObjectId) error { var result Spider if err := c.FindId(id).One(&result); err != nil { + log.Errorf("find spider error: %s, id:%s", err.Error(), id.Hex()) + debug.PrintStack() return err } @@ -291,12 +293,10 @@ func RemoveSpider(id bson.ObjectId) error { // gf上的文件 s, gf := database.GetGridFs("files") defer s.Close() - if result.FileId.Hex() != constants.ObjectIdNull { if err := gf.RemoveId(result.FileId); err != nil { log.Error("remove file error, id:" + result.FileId.Hex()) debug.PrintStack() - return err } } diff --git a/backend/routes/spider.go b/backend/routes/spider.go index d53d9715..9ad77948 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -482,6 +482,149 @@ func DeleteSpider(c *gin.Context) { }) } +func DeleteSelectedSpider(c *gin.Context) { + type ReqBody struct { + SpiderIds []string `json:"spider_ids"` + } + + var reqBody ReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleErrorF(http.StatusBadRequest, c, "invalid request") + return + } + + for _, spiderId := range reqBody.SpiderIds { + if err := services.RemoveSpider(spiderId); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + } + + // 更新 GitCron + if err := services.GitCron.Update(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func CancelSelectedSpider(c *gin.Context) { + type ReqBody struct { + SpiderIds []string `json:"spider_ids"` + } + + var reqBody ReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleErrorF(http.StatusBadRequest, c, "invalid request") + return + } + + for _, spiderId := range reqBody.SpiderIds { + if err := services.CancelSpider(spiderId); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + } + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func RunSelectedSpider(c *gin.Context) { + type TaskParam struct { + SpiderId bson.ObjectId `json:"spider_id"` + Param string `json:"param"` + } + type ReqBody struct { + RunType string `json:"run_type"` + NodeIds []bson.ObjectId `json:"node_ids"` + TaskParams []TaskParam `json:"task_params"` + } + + var reqBody ReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleErrorF(http.StatusBadRequest, c, "invalid request") + return + } + + // 任务ID + var taskIds []string + + // 遍历爬虫 + // TODO: 优化此部分代码,与 routes.PutTask 有重合部分 + for _, taskParam := range reqBody.TaskParams { + if reqBody.RunType == constants.RunTypeAllNodes { + // 所有节点 + nodes, err := model.GetNodeList(nil) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + for _, node := range nodes { + t := model.Task{ + SpiderId: taskParam.SpiderId, + NodeId: node.Id, + Param: taskParam.Param, + UserId: services.GetCurrentUser(c).Id, + } + + id, err := services.AddTask(t) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + taskIds = append(taskIds, id) + } + } else if reqBody.RunType == constants.RunTypeRandom { + // 随机 + t := model.Task{ + SpiderId: taskParam.SpiderId, + Param: taskParam.Param, + UserId: services.GetCurrentUser(c).Id, + } + id, err := services.AddTask(t) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + taskIds = append(taskIds, id) + } else if reqBody.RunType == constants.RunTypeSelectedNodes { + // 指定节点 + for _, nodeId := range reqBody.NodeIds { + t := model.Task{ + SpiderId: taskParam.SpiderId, + NodeId: nodeId, + Param: taskParam.Param, + UserId: services.GetCurrentUser(c).Id, + } + + id, err := services.AddTask(t) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + taskIds = append(taskIds, id) + } + } else { + HandleErrorF(http.StatusInternalServerError, c, "invalid run_type") + return + } + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: taskIds, + }) +} + func GetSpiderTasks(c *gin.Context) { id := c.Param("id") diff --git a/backend/routes/task.go b/backend/routes/task.go index ae1c431c..2880abb9 100644 --- a/backend/routes/task.go +++ b/backend/routes/task.go @@ -183,7 +183,7 @@ func DeleteTaskByStatus(c *gin.Context) { } // 删除多个任务 -func DeleteMultipleTask(c *gin.Context) { +func DeleteSelectedTask(c *gin.Context) { ids := make(map[string][]string) if err := c.ShouldBindJSON(&ids); err != nil { HandleError(http.StatusInternalServerError, c, err) diff --git a/backend/routes/version.go b/backend/routes/version.go index ec3b80c7..f62d1387 100644 --- a/backend/routes/version.go +++ b/backend/routes/version.go @@ -2,15 +2,17 @@ package routes import ( "crawlab/services" + "github.com/apex/log" "github.com/gin-gonic/gin" "net/http" + "runtime/debug" ) func GetLatestRelease(c *gin.Context) { latestRelease, err := services.GetLatestRelease() if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return + log.Errorf(err.Error()) + debug.PrintStack() } c.JSON(http.StatusOK, Response{ Status: "ok", diff --git a/backend/services/spider.go b/backend/services/spider.go index 5b50bbf0..6d450ef1 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -261,6 +261,38 @@ func RemoveSpider(id string) error { return nil } +func CancelSpider(id string) error { + // 获取该爬虫 + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + return err + } + + // 获取该爬虫待定或运行中的任务列表 + query := bson.M{ + "spider_id": spider.Id, + "status": bson.M{ + "$in": []string{ + constants.StatusPending, + constants.StatusRunning, + }, + }, + } + tasks, err := model.GetTaskList(query, 0, constants.Infinite, "-create_ts") + if err != nil { + return err + } + + // 遍历任务列表,依次停止 + for _, task := range tasks { + if err := CancelTask(task.Id); err != nil { + return err + } + } + + return nil +} + // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index 9881b257..ab1d5774 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -46,11 +46,12 @@ func (s *SpiderSync) CheckIsScrapy() { return } s.Spider.IsScrapy = utils.Exists(path.Join(s.Spider.Src, "scrapy.cfg")) - if err := s.Spider.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } + // TODO: 暂时停用自动检测Scrapy项目功能 + //if err := s.Spider.Save(); err != nil { + // log.Errorf(err.Error()) + // debug.PrintStack() + // return + //} } func (s *SpiderSync) AfterRemoveDownCreate() { diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index 1d5e2d7a..a1d6b1ad 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -44,8 +44,13 @@ /> - + @@ -68,7 +73,7 @@ 我已阅读并同意 《免责声明》 所有内容 -
+
跳转到任务详情页
@@ -100,9 +105,19 @@ export default { type: String, default: '' }, + spiders: { + type: Array, + default () { + return [] + } + }, visible: { type: Boolean, default: false + }, + multiple: { + type: Boolean, + default: false } }, data () { @@ -118,7 +133,8 @@ export default { isAllowDisclaimer: true, isRedirect: true, isLoading: false, - isParametersVisible: false + isParametersVisible: false, + scrapySpidersNamesDict: {} } }, computed: { @@ -129,6 +145,9 @@ export default { if (this.isLoading) return true if (!this.isAllowDisclaimer) return true return false + }, + scrapySpiders () { + return this.spiders.filter(d => d.type === 'customized' && d.is_scrapy) } }, watch: { @@ -145,30 +164,77 @@ export default { beforeParameterClose () { this.isParametersVisible = false }, + async fetchScrapySpiderName (id) { + const res = await this.$request.get(`/spiders/${id}/scrapy/spiders`) + this.scrapySpidersNamesDict[id] = res.data.data + }, onConfirm () { this.$refs['form'].validate(async valid => { if (!valid) return - let param = this.form.param - if (this.spiderForm.type === 'customized' && this.spiderForm.is_scrapy) { - param = `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}` + // 请求响应 + let res + + if (!this.multiple) { + // 运行单个爬虫 + + // 参数 + let param = this.form.param + + // Scrapy爬虫特殊处理 + if (this.spiderForm.type === 'customized' && this.spiderForm.is_scrapy) { + param = `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}` + } + + // 发起请求 + res = await this.$store.dispatch('spider/crawlSpider', { + spiderId: this.spiderId, + nodeIds: this.form.nodeIds, + param, + runType: this.form.runType + }) + } else { + // 运行多个爬虫 + + // 发起请求 + res = await this.$store.dispatch('spider/crawlSelectedSpiders', { + nodeIds: this.form.nodeIds, + runType: this.form.runType, + taskParams: this.spiders.map(d => { + // 参数 + let param = this.form.param + + // Scrapy爬虫特殊处理 + if (d.type === 'customized' && d.is_scrapy) { + param = `${this.scrapySpidersNamesDict[d._id] ? this.scrapySpidersNamesDict[d._id][0] : ''} --loglevel=${this.form.scrapy_log_level} ${this.form.param}` + } + + return { + spider_id: d._id, + param + } + }) + }) } - const res = await this.$store.dispatch('spider/crawlSpider', { - spiderId: this.spiderId, - nodeIds: this.form.nodeIds, - param, - runType: this.form.runType - }) - - const id = res.data.data[0] - + // 消息提示 this.$message.success(this.$t('A task has been scheduled successfully')) this.$emit('close') - this.$st.sendEv('爬虫确认', '确认运行', this.form.runType) + if (this.multiple) { + this.$st.sendEv('爬虫确认', '确认批量运行', this.form.runType) + } else { + this.$st.sendEv('爬虫确认', '确认运行', this.form.runType) + } - if (this.isRedirect && !this.spiderForm.is_long_task) { + // 是否重定向 + if ( + this.isRedirect && + !this.spiderForm.is_long_task && + !this.multiple + ) { + // 返回任务id + const id = res.data.data[0] this.$router.push('/tasks/' + id) this.$st.sendEv('爬虫确认', '跳转到任务详情') } @@ -194,15 +260,32 @@ export default { }) // 爬虫列表 - this.isLoading = true - await this.$store.dispatch('spider/getSpiderData', this.spiderId) - if (this.spiderForm.is_scrapy) { - await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId) - if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) { - this.$set(this.form, 'spider', this.spiderForm.spider_names[0]) + if (!this.multiple) { + // 单个爬虫 + this.isLoading = true + try { + await this.$store.dispatch('spider/getSpiderData', this.spiderId) + if (this.spiderForm.is_scrapy) { + await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId) + if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) { + this.$set(this.form, 'spider', this.spiderForm.spider_names[0]) + } + } + } finally { + this.isLoading = false + } + } else { + // 多个爬虫 + this.isLoading = true + try { + // 遍历 Scrapy 爬虫列表 + await Promise.all(this.scrapySpiders.map(async d => { + return this.fetchScrapySpiderName(d._id) + })) + } finally { + this.isLoading = false } } - this.isLoading = false }, onOpenParameters () { this.isParametersVisible = true diff --git a/frontend/src/components/File/FileList.vue b/frontend/src/components/File/FileList.vue index 935dc0fe..002e26bf 100644 --- a/frontend/src/components/File/FileList.vue +++ b/frontend/src/components/File/FileList.vue @@ -25,8 +25,8 @@ - {{$t('Cancel')}} - {{$t('Confirm')}} + {{$t('Cancel')}} + {{$t('Confirm')}} @@ -415,7 +415,9 @@ export default { const data = node.data this.onFileClick(data) node.parent.expanded = true + this.$set(this.nodeExpandedDict, node.parent.data.path, true) node.parent.parent.expanded = true + this.$set(this.nodeExpandedDict, node.parent.parent.data.path, true) }, clickPipeline () { const filename = 'pipelines.py' @@ -428,6 +430,7 @@ export default { if (dataLv2.path.match(filename)) { this.onFileClick(dataLv2) nodeLv1.expanded = true + this.$set(this.nodeExpandedDict, dataLv1.path, true) return } } diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 02a22818..7a95fede 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -45,7 +45,7 @@ /> - + diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 01759c8a..5f68aa66 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -558,6 +558,10 @@ docker run -d --restart always --name crawlab_worker \\ 'Git has been reset successfully': 'Git 已经成功重置', 'This would delete all files of the spider. Are you sure to continue?': '重置将删除该爬虫所有文件,您希望继续吗?', 'SSH Public Key is copied to the clipboard': 'SSH 公钥已粘贴到剪切板', + 'Removed successfully': '已成功删除', + 'Are you sure to delete selected items?': '您是否确认删除所选项?', + 'Are you sure to stop selected items?': '您是否确认停止所选项?', + 'Sent signals to cancel selected tasks': '已经向所选任务发送取消任务信号', // 其他 'Star crawlab-team/crawlab on GitHub': '在 GitHub 上为 Crawlab 加星吧' diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index f4e5dd02..fe982ded 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -212,6 +212,14 @@ const actions = { param: param }) }, + crawlSelectedSpiders ({ state, dispatch }, payload) { + const { taskParams, runType, nodeIds } = payload + return request.post(`/spiders-run`, { + task_params: taskParams, + run_type: runType, + node_ids: nodeIds + }) + }, getTaskList ({ state, commit }, id) { return request.get(`/spiders/${id}/tasks`) .then(response => { diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 85270729..67f6a153 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -136,7 +136,7 @@ const actions = { }) }, deleteTaskMultiple ({ state }, ids) { - return request.delete(`/tasks_multiple`, { + return request.delete(`/tasks`, { ids: ids }) }, diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 049a9b26..4783aaab 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -52,7 +52,7 @@ :disabled="spiderForm.is_scrapy" /> - + @@ -283,7 +283,9 @@ @@ -336,8 +338,38 @@
- - {{$t('Import Spiders')}} + + {{$t('Run')}} + + + {{$t('Stop')}} + + + {{$t('Remove')}} {{$t('Add Spider')}} -
@@ -371,11 +402,20 @@ +