mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-28 17:50:56 +01:00
加入scrapy items.py支持
This commit is contained in:
@@ -6,7 +6,12 @@ type SpiderType struct {
|
||||
}
|
||||
|
||||
type ScrapySettingParam struct {
|
||||
Key string
|
||||
Value interface{}
|
||||
Type string
|
||||
Key string `json:"key"`
|
||||
Value interface{} `json:"value"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type ScrapyItem struct {
|
||||
Name string `json:"name"`
|
||||
Fields []string `json:"fields"`
|
||||
}
|
||||
|
||||
@@ -178,6 +178,8 @@ func main() {
|
||||
authGroup.PUT("/spiders/:id/scrapy/spiders", routes.PutSpiderScrapySpiders) // Scrapy 爬虫创建爬虫
|
||||
authGroup.GET("/spiders/:id/scrapy/settings", routes.GetSpiderScrapySettings) // Scrapy 爬虫设置
|
||||
authGroup.POST("/spiders/:id/scrapy/settings", routes.PostSpiderScrapySettings) // Scrapy 爬虫修改设置
|
||||
authGroup.GET("/spiders/:id/scrapy/items", routes.GetSpiderScrapyItems) // Scrapy 爬虫 items
|
||||
authGroup.POST("/spiders/:id/scrapy/items", routes.PostSpiderScrapyItems) // Scrapy 爬虫修改 items
|
||||
authGroup.POST("/spiders/:id/git/sync", routes.PostSpiderSyncGit) // 爬虫 Git 同步
|
||||
authGroup.POST("/spiders/:id/git/reset", routes.PostSpiderResetGit) // 爬虫 Git 重置
|
||||
}
|
||||
|
||||
@@ -974,8 +974,9 @@ func GetSpiderScrapySpiders(c *gin.Context) {
|
||||
|
||||
func PutSpiderScrapySpiders(c *gin.Context) {
|
||||
type ReqBody struct {
|
||||
Name string `json:"name"`
|
||||
Domain string `json:"domain"`
|
||||
Name string `json:"name"`
|
||||
Domain string `json:"domain"`
|
||||
Template string `json:"template"`
|
||||
}
|
||||
|
||||
id := c.Param("id")
|
||||
@@ -997,7 +998,7 @@ func PutSpiderScrapySpiders(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain); err != nil {
|
||||
if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain, reqBody.Template); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
@@ -1066,6 +1067,64 @@ func PostSpiderScrapySettings(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func GetSpiderScrapyItems(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
|
||||
return
|
||||
}
|
||||
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := services.GetScrapyItems(spider)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: data,
|
||||
})
|
||||
}
|
||||
|
||||
func PostSpiderScrapyItems(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
|
||||
return
|
||||
}
|
||||
|
||||
var reqData []entity.ScrapyItem
|
||||
if err := c.ShouldBindJSON(&reqData); err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, "invalid request")
|
||||
return
|
||||
}
|
||||
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := services.SaveScrapyItems(spider, reqData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostSpiderSyncGit(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
|
||||
@@ -135,11 +135,77 @@ func SaveScrapySettings(s model.Spider, settingsData []entity.ScrapySettingParam
|
||||
return
|
||||
}
|
||||
|
||||
func CreateScrapySpider(s model.Spider, name string, domain string) (err error) {
|
||||
func GetScrapyItems(s model.Spider) (res []map[string]interface{}, err error) {
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
|
||||
cmd := exec.Command("scrapy", "genspider", name, domain)
|
||||
cmd := exec.Command("crawlab", "items")
|
||||
cmd.Dir = s.Src
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
log.Errorf(stderr.String())
|
||||
debug.PrintStack()
|
||||
return res, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return res, err
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func SaveScrapyItems(s model.Spider, itemsData []entity.ScrapyItem) (err error) {
|
||||
// 读取 scrapy.cfg
|
||||
cfg, err := goconfig.LoadConfigFile(path.Join(s.Src, "scrapy.cfg"))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
modName, err := cfg.GetValue("settings", "default")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// 定位到 settings.py 文件
|
||||
arr := strings.Split(modName, ".")
|
||||
dirName := arr[0]
|
||||
fileName := "items"
|
||||
filePath := fmt.Sprintf("%s/%s/%s.py", s.Src, dirName, fileName)
|
||||
|
||||
// 生成文件内容
|
||||
content := ""
|
||||
content += "import scrapy\n"
|
||||
content += "\n\n"
|
||||
for _, item := range itemsData {
|
||||
content += fmt.Sprintf("class %s(scrapy.Item):\n", item.Name)
|
||||
for _, field := range item.Fields {
|
||||
content += fmt.Sprintf(" %s = scrapy.Field()\n", field)
|
||||
}
|
||||
content += "\n\n"
|
||||
}
|
||||
|
||||
// 写到 settings.py
|
||||
if err := ioutil.WriteFile(filePath, []byte(content), os.ModePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 同步到GridFS
|
||||
if err := UploadSpiderToGridFsFromMaster(s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func CreateScrapySpider(s model.Spider, name string, domain string, template string) (err error) {
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
|
||||
cmd := exec.Command("scrapy", "genspider", name, domain, "-t", template)
|
||||
cmd.Dir = s.Src
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
Reference in New Issue
Block a user