diff --git a/backend/main.go b/backend/main.go index e295440a..14f7ded4 100644 --- a/backend/main.go +++ b/backend/main.go @@ -174,6 +174,7 @@ func main() { authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 authGroup.GET("/spiders/:id/schedules", routes.GetSpiderSchedules) // 爬虫定时任务 authGroup.GET("/spiders/:id/scrapy/spiders", routes.GetSpiderScrapySpiders) // Scrapy 爬虫名称列表 + authGroup.PUT("/spiders/:id/scrapy/spiders", routes.PutSpiderScrapySpiders) // Scrapy 爬虫创建爬虫 authGroup.GET("/spiders/:id/scrapy/settings", routes.GetSpiderScrapySettings) // Scrapy 爬虫设置 authGroup.POST("/spiders/:id/scrapy/settings", routes.PostSpiderScrapySettings) // Scrapy 爬虫修改设置 } diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 2748a02e..752d30b7 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -930,6 +930,42 @@ func GetSpiderScrapySpiders(c *gin.Context) { }) } +func PutSpiderScrapySpiders(c *gin.Context) { + type ReqBody struct { + Name string `json:"name"` + Domain string `json:"domain"` + } + + id := c.Param("id") + + var reqBody ReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleErrorF(http.StatusBadRequest, c, "invalid request") + return + } + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") + return + } + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + func GetSpiderScrapySettings(c *gin.Context) { id := c.Param("id") diff --git a/backend/services/scrapy.go b/backend/services/scrapy.go index c1e5158f..374df4ee 100644 --- a/backend/services/scrapy.go +++ b/backend/services/scrapy.go @@ -134,3 +134,20 @@ func SaveScrapySettings(s model.Spider, settingsData []entity.ScrapySettingParam return } + +func CreateScrapySpider(s model.Spider, name string, domain string) (err error) { + var stdout bytes.Buffer + var stderr bytes.Buffer + + cmd := exec.Command("scrapy", "genspider", name, domain) + cmd.Dir = s.Src + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + return +} diff --git a/backend/services/spider.go b/backend/services/spider.go index fb785d85..a3ef2426 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -83,6 +83,9 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error { // 生成MD5 spiderSync.CreateMd5File(gfFile2.Md5) + // 检查是否为 Scrapy 爬虫 + spiderSync.CheckIsScrapy() + return nil } @@ -200,6 +203,7 @@ func PublishSpider(spider model.Spider) { log.Infof("path not found: %s", path) spiderSync.Download() spiderSync.CreateMd5File(gfFile.Md5) + spiderSync.CheckIsScrapy() return } // md5文件不存在,则下载 diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index ddc94b57..9881b257 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -1,6 +1,7 @@ package spider_handler import ( + "crawlab/constants" "crawlab/database" "crawlab/model" "crawlab/utils" @@ -12,6 +13,7 @@ import ( "io" "os" "os/exec" + "path" "path/filepath" "runtime/debug" ) @@ -39,10 +41,29 @@ func (s *SpiderSync) CreateMd5File(md5 string) { } } +func (s *SpiderSync) CheckIsScrapy() { + if s.Spider.Type == constants.Configurable { + return + } + s.Spider.IsScrapy = utils.Exists(path.Join(s.Spider.Src, "scrapy.cfg")) + if err := s.Spider.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } +} + +func (s *SpiderSync) AfterRemoveDownCreate() { + if model.IsMaster() { + s.CheckIsScrapy() + } +} + func (s *SpiderSync) RemoveDownCreate(md5 string) { s.RemoveSpiderFile() s.Download() s.CreateMd5File(md5) + s.AfterRemoveDownCreate() } // 获得下载锁的key diff --git a/frontend/src/components/Scrapy/SpiderScrapy.vue b/frontend/src/components/Scrapy/SpiderScrapy.vue index 8a752fc9..823cde6c 100644 --- a/frontend/src/components/Scrapy/SpiderScrapy.vue +++ b/frontend/src/components/Scrapy/SpiderScrapy.vue @@ -33,11 +33,17 @@ > + + + + + + + + + + + + +

{{$t('Scrapy Spiders')}}

+
+ + {{$t('Add Spider')}} + +
{ + if (!valid) return + this.isAddSpiderLoading = true + const res = await this.$store.dispatch('spider/addSpiderScrapySpider', { + id: this.$route.params.id, + form: this.addSpiderForm + }) + console.log(res) + if (!res.data.error) { + this.$message.success('Saved successfully') + } + this.isAddSpiderVisible = false + this.isAddSpiderLoading = false + }) } } } @@ -334,6 +406,11 @@ export default { padding-bottom: 15px; } + .spiders .action-wrapper { + margin-bottom: 10px; + text-align: right; + } + .spiders .spider-list { list-style: none; padding: 0; diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 821b24e7..4a64d2cc 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -195,6 +195,8 @@ export default { 'Variable Name': '变量名', 'Variable Type': '变量类型', 'Variable Value': '变量值', + 'Parameter Edit': '参数编辑', + 'Add Scrapy Spider': '添加 Scrapy 爬虫', // 爬虫列表 'Name': '名称', diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index d93679d6..c5a84ef1 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -150,6 +150,10 @@ const actions = { async saveSpiderScrapySettings ({ state }, id) { return request.post(`/spiders/${id}/scrapy/settings`, state.spiderScrapySettings) }, + addSpiderScrapySpider ({ state }, payload) { + const { id, form } = payload + return request.put(`/spiders/${id}/scrapy/spiders`, form) + }, crawlSpider ({ state, dispatch }, payload) { const { spiderId, runType, nodeIds, param } = payload return request.put(`/tasks`, {