From 5416b7ac60de5eb2ac3ea0d7606cb072daf72698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 19:44:12 +0800 Subject: [PATCH] =?UTF-8?q?fix=20=E4=B8=8A=E4=BC=A0=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/spider.go | 2 -- backend/routes/spider.go | 7 +---- backend/services/spider.go | 38 +++++++++++++---------- backend/services/spider_handler/spider.go | 35 +++++++++++++-------- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/backend/model/spider.go b/backend/model/spider.go index c498287d..dd7d505c 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -24,8 +24,6 @@ type Spider struct { Site string `json:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 Remark string `json:"remark"` // 备注 - Md5 string `json:"md_5" bson:"md5"` // ZIP文件的MD5 - OldMd5 string `json:"old_md_5" bson:"old_md5"` //上一次的MD5值 // 自定义爬虫 Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 9b8bd50d..76f89bf7 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -148,7 +148,7 @@ func PutSpider(c *gin.Context) { } // 上传到GridFs - fid, md5, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) + fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) if err != nil { log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() @@ -166,13 +166,8 @@ func PutSpider(c *gin.Context) { Type: constants.Customized, Src: filepath.Join(srcPath, spiderName), FileId: fid, - Md5: md5, } _ = spider.Add() - } else { - spider.OldMd5 = spider.Md5 - spider.Md5 = md5 - _ = spider.Save() } c.JSON(http.StatusOK, Response{ diff --git a/backend/services/spider.go b/backend/services/spider.go index 34693bf5..90925346 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -29,7 +29,7 @@ type SpiderUploadMessage struct { } // 上传zip文件到GridFS -func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 string, err error) { +func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" // 获取MongoDB GridFS连接 @@ -47,7 +47,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st err = ReadFileByStep(filePath, WriteToGridFS, f) if err != nil { debug.PrintStack() - return "", "", err + return "", err } // 删除zip文件 @@ -57,12 +57,12 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st } // 关闭文件,提交写入 if err = f.Close(); err != nil { - return "", "", err + return "", err } // 文件ID fid = f.Id().(bson.ObjectId) - return fid, f.MD5(), nil + return fid, nil } func WriteToGridFS(content []byte, f *mgo.GridFile) { @@ -105,9 +105,9 @@ func PublishAllSpiders() { // 遍历爬虫列表 for _, spider := range spiders { // 异步发布爬虫 - go func() { - PublishSpider(spider) - }() + go func(s model.Spider) { + PublishSpider(s) + }(spider) } } @@ -119,30 +119,34 @@ func PublishSpider(spider model.Spider) { _ = model.RemoveSpider(spider.FileId) return } - spiderSync := spider_handler.SpiderSync{} - defer spiderSync.CreateMd5File(gfFile.Md5, spider.Name) + spiderSync := spider_handler.SpiderSync{ + Spider: spider, + } //目录不存在,则直接下载 path := filepath.Join(viper.GetString("spider.path"), spider.Name) if !utils.Exists(path) { log.Infof("path not found: %s", path) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } // md5文件不存在,则下载 md5 := filepath.Join(path, spider_handler.Md5File) if !utils.Exists(md5) { - log.Infof("md5.txt file not found: %s", md5) - spiderSync.RemoveSpiderFile(spider.Name) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + log.Infof("md5 file not found: %s", md5) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } // md5值不一样,则下载 md5Str := utils.ReadFile(md5) - if spider.Md5 != md5Str { - log.Infof("md5 is different: %s:%s ", md5Str, md5) - spiderSync.RemoveSpiderFile(spider.Name) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + if gfFile.Md5 != md5Str { + log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } } diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index c78b3d5b..87dedeb0 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -19,17 +19,18 @@ const ( ) type SpiderSync struct { + Spider model.Spider } -func (s *SpiderSync) CreateMd5File(md5 string, spiderName string) { - path := filepath.Join(viper.GetString("spider.path"), spiderName) +func (s *SpiderSync) CreateMd5File(md5 string) { + path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name) utils.CreateFilePath(path) fileName := filepath.Join(path, Md5File) file := utils.OpenFile(fileName) defer file.Close() if file != nil { - if _, err := file.WriteString(md5); err != nil { + if _, err := file.WriteString(md5 + "\n"); err != nil { log.Errorf("file write string error: %s", err.Error()) debug.PrintStack() } @@ -43,11 +44,11 @@ func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { } // 删除本地文件 -func (s *SpiderSync) RemoveSpiderFile(spiderName string) { +func (s *SpiderSync) RemoveSpiderFile() { //爬虫文件有变化,先删除本地文件 _ = os.Remove(filepath.Join( viper.GetString("spider.path"), - spiderName, + s.Spider.Name, )) } @@ -62,7 +63,16 @@ func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, str } // 下载爬虫 -func (s *SpiderSync) Download(spiderId string, fileId string) { +func (s *SpiderSync) Download() { + spiderId := s.Spider.Id.Hex() + fileId := s.Spider.FileId.Hex() + isDownloading, key := s.CheckDownLoading(spiderId, fileId) + if isDownloading { + log.Infof("spider is downloading, spider_id: %s", spiderId) + return + } else { + _ = database.RedisClient.HSet("spider", key, key) + } session, gf := database.GetGridFs("files") defer session.Close() @@ -85,18 +95,14 @@ func (s *SpiderSync) Download(spiderId string, fileId string) { } } // 创建临时文件 + tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } + tmpFile := utils.OpenFile(tmpFilePath) defer tmpFile.Close() // 将该文件写入临时文件 if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf(err.Error()) + log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id()) debug.PrintStack() return } @@ -124,4 +130,7 @@ func (s *SpiderSync) Download(spiderId string, fileId string) { debug.PrintStack() return } + + log.Infof("del key : %s", key) + _ = database.RedisClient.HDel("spider", key) }