优化可配置爬虫前端界面

This commit is contained in:
marvzhang
2019-11-28 21:36:17 +08:00
parent bc81462a80
commit ba4f5fd20c
11 changed files with 186 additions and 93 deletions

View File

@@ -140,11 +140,12 @@ func main() {
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
// 可配置爬虫
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
// 任务
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情

View File

@@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
// 如果为CSS
if f.Attr == "" {
// 文本
return fmt.Sprintf(`css('%s::text()')`, f.Css)
return fmt.Sprintf(`css('%s::text')`, f.Css)
} else {
// 属性
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
@@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
if stage.PageCss != "" {
// 如果为CSS
return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr)
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
} else {
// 如果为XPath
return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr)
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
}
}

View File

@@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) {
})
}
func PostConfigSpiderSpiderfile(c *gin.Context) {
type Body struct {
Content string `json:"content"`
}
id := c.Param("id")
// 文件内容
var reqBody Body
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
content := reqBody.Content
// 获取爬虫
var spider model.Spider
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化
var configData entity.ConfigSpiderData
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 写文件
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PostConfigSpiderConfig(c *gin.Context) {
id := c.Param("id")
@@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) {
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化配置数据
var configData entity.ConfigSpiderData
if err := c.ShouldBindJSON(&configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// TODO: 替换Spiderfile文件
// 替换Spiderfile文件
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",

View File

@@ -13,6 +13,7 @@ import (
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
"os"
"path/filepath"
"strings"
@@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
return nil
}
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
// Spiderfile 路径
sfPath := filepath.Join(spider.Src, "Spiderfile")
// 生成Yaml内容
sfContentByte, err := yaml.Marshal(configData)
if err != nil {
return err
}
// 打开文件
f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
if err != nil {
return err
}
defer f.Close()
// 写入内容
if _, err := f.Write(sfContentByte); err != nil {
return err
}
return nil
}