diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index 4c65964e..52b61ff5 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -181,6 +181,7 @@ func UploadConfigSpider(c *gin.Context) { // 根据序列化后的数据处理爬虫文件 if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { HandleError(http.StatusInternalServerError, c, err) + return } c.JSON(http.StatusOK, Response{ diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 6f4d88b3..53990308 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -363,7 +363,12 @@ func UploadSpider(c *gin.Context) { var gfFile model.GridFs if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: %s", err.Error()) + debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) + return + } } // 上传到GridFs @@ -506,22 +511,32 @@ func UploadSpiderFromId(c *gin.Context) { // 判断文件是否已经存在 var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { + if err := gf.Find(bson.M{"filename": spider.Name}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: " + err.Error()) + debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) + return + } } // 上传到GridFs - fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) + fid, err := services.UploadToGridFs(spider.Name, tmpFilePath) if err != nil { log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) return } // 更新file_id spider.FileId = fid - _ = spider.Save() + if err := spider.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } // 发起同步 services.PublishSpider(spider) diff --git a/backend/services/config_spider.go b/backend/services/config_spider.go index 29e1c2ca..68c170df 100644 --- a/backend/services/config_spider.go +++ b/backend/services/config_spider.go @@ -17,6 +17,7 @@ import ( "gopkg.in/yaml.v2" "os" "path/filepath" + "runtime/debug" "strings" ) @@ -214,7 +215,11 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con var gfFile model.GridFs if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: %s", err.Error()) + debug.PrintStack() + return err + } } // 上传到GridFs diff --git a/backend/services/spider.go b/backend/services/spider.go index f9623316..4c3f6d45 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -60,7 +60,12 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error { var gfFile model.GridFs if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + log.Errorf(gfFile.Id.Hex() + " already exists. removing...") + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } } // 上传到GridFs @@ -72,7 +77,9 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error { // 保存爬虫 FileId spider.FileId = fid - _ = spider.Save() + if err := spider.Save(); err != nil { + return err + } // 获取爬虫同步实例 spiderSync := spider_handler.SpiderSync{ @@ -102,27 +109,33 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er // 创建一个新GridFS文件 f, err := gf.Create(fileName) if err != nil { + log.Errorf("create file error: " + err.Error()) debug.PrintStack() return } - //分片读取爬虫zip文件 + // 分片读取爬虫zip文件 err = ReadFileByStep(filePath, WriteToGridFS, f) if err != nil { + log.Errorf("read file by step error: " + err.Error()) debug.PrintStack() return "", err } // 删除zip文件 if err = os.Remove(filePath); err != nil { + log.Errorf("remove file error: " + err.Error()) debug.PrintStack() return } + // 关闭文件,提交写入 if err = f.Close(); err != nil { + log.Errorf("close file error: " + err.Error()) debug.PrintStack() return "", err } + // 文件ID fid = f.Id().(bson.ObjectId) @@ -183,8 +196,14 @@ func PublishSpider(spider model.Spider) { // 查询gf file,不存在则标记为爬虫文件不存在 gfFile = model.GetGridFs(spider.FileId) if gfFile == nil { - spider.FileId = constants.ObjectIdNull - _ = spider.Save() + log.Errorf("get grid fs file error: cannot find grid fs file") + log.Errorf("grid fs file_id: " + spider.FileId.Hex()) + log.Errorf("spider_name: " + spider.Name) + debug.PrintStack() + //spider.FileId = constants.ObjectIdNull + //if err := spider.Save(); err != nil { + // return + //} return } } @@ -208,6 +227,7 @@ func PublishSpider(spider model.Spider) { spiderSync.CheckIsScrapy() return } + // md5文件不存在,则下载 md5 := filepath.Join(path, spider_handler.Md5File) if !utils.Exists(md5) { @@ -215,6 +235,7 @@ func PublishSpider(spider model.Spider) { spiderSync.RemoveDownCreate(gfFile.Md5) return } + // md5值不一样,则下载 md5Str := utils.GetSpiderMd5Str(md5) if gfFile.Md5 != md5Str { @@ -412,7 +433,7 @@ func CopySpider(spider model.Spider, newName string) error { return nil } -func InitDemoSpiders () { +func InitDemoSpiders() { // 添加Demo爬虫 templateSpidersDir := "./template/spiders" for _, info := range utils.ListDir(templateSpidersDir) { diff --git a/backend/services/task.go b/backend/services/task.go index 469fa8da..c1e1fa33 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -455,7 +455,7 @@ func ExecuteTask(id int) { } // 开始执行任务 - log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")") + log.Infof(GetWorkerPrefix(id) + "start task (id:" + t.Id + ")") // 储存任务 _ = t.Save() @@ -529,7 +529,7 @@ func ExecuteTask(id int) { // 统计时长 duration := toc.Sub(tic).Seconds() durationStr := strconv.FormatFloat(duration, 'f', 6, 64) - log.Infof(GetWorkerPrefix(id) + "任务(ID:" + t.Id + ")" + "执行完毕. 消耗时间:" + durationStr + "秒") + log.Infof(GetWorkerPrefix(id) + "task (id:" + t.Id + ")" + " finished. elapsed:" + durationStr + " sec") } func SpiderFileCheck(t model.Task, spider model.Spider) error {