mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
优化添加爬虫逻辑
This commit is contained in:
@@ -131,9 +131,11 @@ func main() {
|
||||
// 爬虫
|
||||
authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表
|
||||
authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情
|
||||
authGroup.POST("/spiders", routes.PutSpider) // 上传爬虫 TODO: 名称不对
|
||||
authGroup.PUT("/spiders", routes.PutSpider) // 添加爬虫
|
||||
authGroup.POST("/spiders", routes.UploadSpider) // 上传爬虫
|
||||
authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫
|
||||
authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫
|
||||
authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID)
|
||||
authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫
|
||||
authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表
|
||||
authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取
|
||||
|
||||
@@ -119,6 +119,58 @@ func PublishSpider(c *gin.Context) {
|
||||
}
|
||||
|
||||
func PutSpider(c *gin.Context) {
|
||||
var spider model.Spider
|
||||
if err := c.ShouldBindJSON(&spider); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 爬虫名称不能为空
|
||||
if spider.Name == "" {
|
||||
HandleErrorF(http.StatusBadRequest, c, "spider name should not be empty")
|
||||
return
|
||||
}
|
||||
|
||||
// 判断爬虫是否存在
|
||||
if spider := model.GetSpiderByName(spider.Name); spider != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name))
|
||||
return
|
||||
}
|
||||
|
||||
// 设置爬虫类别
|
||||
spider.Type = constants.Customized
|
||||
|
||||
// 将FileId置空
|
||||
spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull)
|
||||
|
||||
// 创建爬虫目录
|
||||
spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name)
|
||||
if utils.Exists(spiderDir) {
|
||||
if err := os.RemoveAll(spiderDir); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := os.MkdirAll(spiderDir, 0777); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
spider.Src = spiderDir
|
||||
|
||||
// 添加爬虫到数据库
|
||||
if err := spider.Add(); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: spider,
|
||||
})
|
||||
}
|
||||
|
||||
func UploadSpider(c *gin.Context) {
|
||||
// 从body中获取文件
|
||||
uploadFile, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
@@ -203,6 +255,86 @@ func PutSpider(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func UploadSpiderFromId(c *gin.Context) {
|
||||
// TODO: 与 UploadSpider 部分逻辑重复,需要优化代码
|
||||
// 爬虫ID
|
||||
spiderId := c.Param("id")
|
||||
|
||||
// 获取爬虫
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(spiderId))
|
||||
if err != nil {
|
||||
if err == mgo.ErrNotFound {
|
||||
HandleErrorF(http.StatusNotFound, c, "cannot find spider")
|
||||
} else {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// 从body中获取文件
|
||||
uploadFile, err := c.FormFile("file")
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 如果不为zip文件,返回错误
|
||||
if !strings.HasSuffix(uploadFile.Filename, ".zip") {
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file"))
|
||||
return
|
||||
}
|
||||
|
||||
// 以防tmp目录不存在
|
||||
tmpPath := viper.GetString("other.tmppath")
|
||||
if !utils.Exists(tmpPath) {
|
||||
if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil {
|
||||
log.Error("mkdir other.tmppath dir error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error"))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 保存到本地临时文件
|
||||
randomId := uuid.NewV4()
|
||||
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
|
||||
if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil {
|
||||
log.Error("save upload file error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 获取 GridFS 实例
|
||||
s, gf := database.GetGridFs("files")
|
||||
defer s.Close()
|
||||
|
||||
// 判断文件是否已经存在
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
|
||||
if err != nil {
|
||||
log.Errorf("upload to grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
// 更新file_id
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func DeleteSpider(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user