mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-26 17:49:15 +01:00
优化可配置爬虫前端界面
This commit is contained in:
@@ -140,11 +140,12 @@ func main() {
|
||||
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
|
||||
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
|
||||
// 可配置爬虫
|
||||
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
|
||||
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
|
||||
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
|
||||
// 任务
|
||||
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
|
||||
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
|
||||
|
||||
@@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
|
||||
// 如果为CSS
|
||||
if f.Attr == "" {
|
||||
// 文本
|
||||
return fmt.Sprintf(`css('%s::text()')`, f.Css)
|
||||
return fmt.Sprintf(`css('%s::text')`, f.Css)
|
||||
} else {
|
||||
// 属性
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
|
||||
@@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
|
||||
|
||||
if stage.PageCss != "" {
|
||||
// 如果为CSS
|
||||
return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr)
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
|
||||
} else {
|
||||
// 如果为XPath
|
||||
return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr)
|
||||
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderSpiderfile(c *gin.Context) {
|
||||
type Body struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
id := c.Param("id")
|
||||
|
||||
// 文件内容
|
||||
var reqBody Body
|
||||
if err := c.ShouldBindJSON(&reqBody); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
content := reqBody.Content
|
||||
|
||||
// 获取爬虫
|
||||
var spider model.Spider
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 写文件
|
||||
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderConfig(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
@@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) {
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化配置数据
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := c.ShouldBindJSON(&configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: 替换Spiderfile文件
|
||||
// 替换Spiderfile文件
|
||||
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/globalsign/mgo/bson"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
|
||||
// Spiderfile 路径
|
||||
sfPath := filepath.Join(spider.Src, "Spiderfile")
|
||||
|
||||
// 生成Yaml内容
|
||||
sfContentByte, err := yaml.Marshal(configData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 打开文件
|
||||
f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 写入内容
|
||||
if _, err := f.Write(sfContentByte); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user