From 88118cf894b0f8a0da8d7125cd32f160522a6a97 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Wed, 27 Nov 2019 10:40:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=87=86=E5=A4=87=E5=89=8D=E7=AB=AF=E5=8F=AF?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E7=88=AC=E8=99=AB=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/main.go | 8 ++--- backend/model/spider.go | 56 ++++++++++++++++++++++++++++----- backend/routes/config_spider.go | 27 +--------------- backend/routes/spider.go | 2 +- 4 files changed, 55 insertions(+), 38 deletions(-) diff --git a/backend/main.go b/backend/main.go index 8b91e950..436c537e 100644 --- a/backend/main.go +++ b/backend/main.go @@ -140,10 +140,10 @@ func main() { authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 // 可配置爬虫 - authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 可配置爬虫配置 - authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫 - authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫 - authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫 + authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 可配置爬虫配置 + authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫 + authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫 + authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫 // 任务 authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 diff --git a/backend/model/spider.go b/backend/model/spider.go index 53c5ab1f..559234fc 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -1,11 +1,16 @@ package model import ( + "crawlab/constants" "crawlab/database" "crawlab/entity" + "errors" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" + "gopkg.in/yaml.v2" + "io/ioutil" + "path/filepath" "runtime/debug" "time" ) @@ -25,14 +30,15 @@ type Spider struct { Site string `json:"site" bson:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 Remark string `json:"remark" bson:"remark"` // 备注 + Src string `json:"src" bson:"src"` // 源码位置 // 自定义爬虫 - Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 // 前端展示 - LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 - LastStatus string `json:"last_status"` // 最后执行状态 + LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 + LastStatus string `json:"last_status"` // 最后执行状态 + Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置 // 时间 CreateTs time.Time `json:"create_ts" bson:"create_ts"` @@ -161,15 +167,25 @@ func GetSpider(id bson.ObjectId) (Spider, error) { s, c := database.GetCol("spiders") defer s.Close() - var result Spider - if err := c.FindId(id).One(&result); err != nil { + // 获取爬虫 + var spider Spider + if err := c.FindId(id).One(&spider); err != nil { if err != mgo.ErrNotFound { log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex()) debug.PrintStack() } - return result, err + return spider, err } - return result, nil + + // 如果为可配置爬虫,获取爬虫配置 + if spider.Type == constants.Configurable { + config, err := GetConfigSpiderData(spider) + if err != nil { + return spider, err + } + spider.Config = config + } + return spider, nil } // 更新爬虫 @@ -269,3 +285,29 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) { return types, nil } + +func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) { + // 构造配置数据 + configData := entity.ConfigSpiderData{} + + // 校验爬虫类别 + if spider.Type != constants.Configurable { + return configData, errors.New("not a configurable spider") + } + + // Spiderfile 目录 + sfPath := filepath.Join(spider.Src, "Spiderfile") + + // 读取YAML文件 + yamlFile, err := ioutil.ReadFile(sfPath) + if err != nil { + return configData, err + } + + // 反序列化 + if err := yaml.Unmarshal(yamlFile, &configData); err != nil { + return configData, err + } + + return configData, nil +} diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index 6261e2e5..085f5541 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -252,34 +252,9 @@ func GetConfigSpiderConfig(c *gin.Context) { return } - // 校验爬虫类别 - if spider.Type != constants.Configurable { - HandleErrorF(http.StatusBadRequest, c, "not a configurable spider") - return - } - - // Spiderfile 目录 - sfPath := filepath.Join(spider.Src, "Spiderfile") - - // 构造配置数据 - configData := entity.ConfigSpiderData{} - - // 读取YAML文件 - yamlFile, err := ioutil.ReadFile(sfPath) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 反序列化 - if err := yaml.Unmarshal(yamlFile, &configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: configData, + Data: spider.Config, }) } diff --git a/backend/routes/spider.go b/backend/routes/spider.go index d351f1bb..588811e3 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -34,7 +34,7 @@ func GetSpiderList(c *gin.Context) { "name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}, } - if t != "" { + if t != "" && t != "all" { filter["type"] = t }