fix 上传的问题

This commit is contained in:
陈景阳
2019-09-26 19:44:12 +08:00
parent 79ee09aacb
commit 5416b7ac60
4 changed files with 44 additions and 38 deletions

View File

@@ -24,8 +24,6 @@ type Spider struct {
Site string `json:"site"` // 爬虫网站
Envs []Env `json:"envs" bson:"envs"` // 环境变量
Remark string `json:"remark"` // 备注
Md5 string `json:"md_5" bson:"md5"` // ZIP文件的MD5
OldMd5 string `json:"old_md_5" bson:"old_md5"` //上一次的MD5值
// 自定义爬虫
Src string `json:"src" bson:"src"` // 源码位置
Cmd string `json:"cmd" bson:"cmd"` // 执行命令

View File

@@ -148,7 +148,7 @@ func PutSpider(c *gin.Context) {
}
// 上传到GridFs
fid, md5, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
if err != nil {
log.Errorf("upload to grid fs error: %s", err.Error())
debug.PrintStack()
@@ -166,13 +166,8 @@ func PutSpider(c *gin.Context) {
Type: constants.Customized,
Src: filepath.Join(srcPath, spiderName),
FileId: fid,
Md5: md5,
}
_ = spider.Add()
} else {
spider.OldMd5 = spider.Md5
spider.Md5 = md5
_ = spider.Save()
}
c.JSON(http.StatusOK, Response{

View File

@@ -29,7 +29,7 @@ type SpiderUploadMessage struct {
}
// 上传zip文件到GridFS
func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 string, err error) {
func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) {
fid = ""
// 获取MongoDB GridFS连接
@@ -47,7 +47,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st
err = ReadFileByStep(filePath, WriteToGridFS, f)
if err != nil {
debug.PrintStack()
return "", "", err
return "", err
}
// 删除zip文件
@@ -57,12 +57,12 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st
}
// 关闭文件,提交写入
if err = f.Close(); err != nil {
return "", "", err
return "", err
}
// 文件ID
fid = f.Id().(bson.ObjectId)
return fid, f.MD5(), nil
return fid, nil
}
func WriteToGridFS(content []byte, f *mgo.GridFile) {
@@ -105,9 +105,9 @@ func PublishAllSpiders() {
// 遍历爬虫列表
for _, spider := range spiders {
// 异步发布爬虫
go func() {
PublishSpider(spider)
}()
go func(s model.Spider) {
PublishSpider(s)
}(spider)
}
}
@@ -119,30 +119,34 @@ func PublishSpider(spider model.Spider) {
_ = model.RemoveSpider(spider.FileId)
return
}
spiderSync := spider_handler.SpiderSync{}
defer spiderSync.CreateMd5File(gfFile.Md5, spider.Name)
spiderSync := spider_handler.SpiderSync{
Spider: spider,
}
//目录不存在,则直接下载
path := filepath.Join(viper.GetString("spider.path"), spider.Name)
if !utils.Exists(path) {
log.Infof("path not found: %s", path)
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
spiderSync.Download()
spiderSync.CreateMd5File(gfFile.Md5)
return
}
// md5文件不存在则下载
md5 := filepath.Join(path, spider_handler.Md5File)
if !utils.Exists(md5) {
log.Infof("md5.txt file not found: %s", md5)
spiderSync.RemoveSpiderFile(spider.Name)
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
log.Infof("md5 file not found: %s", md5)
spiderSync.RemoveSpiderFile()
spiderSync.Download()
spiderSync.CreateMd5File(gfFile.Md5)
return
}
// md5值不一样则下载
md5Str := utils.ReadFile(md5)
if spider.Md5 != md5Str {
log.Infof("md5 is different: %s:%s ", md5Str, md5)
spiderSync.RemoveSpiderFile(spider.Name)
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
if gfFile.Md5 != md5Str {
log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5)
spiderSync.RemoveSpiderFile()
spiderSync.Download()
spiderSync.CreateMd5File(gfFile.Md5)
return
}
}

View File

@@ -19,17 +19,18 @@ const (
)
type SpiderSync struct {
Spider model.Spider
}
func (s *SpiderSync) CreateMd5File(md5 string, spiderName string) {
path := filepath.Join(viper.GetString("spider.path"), spiderName)
func (s *SpiderSync) CreateMd5File(md5 string) {
path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name)
utils.CreateFilePath(path)
fileName := filepath.Join(path, Md5File)
file := utils.OpenFile(fileName)
defer file.Close()
if file != nil {
if _, err := file.WriteString(md5); err != nil {
if _, err := file.WriteString(md5 + "\n"); err != nil {
log.Errorf("file write string error: %s", err.Error())
debug.PrintStack()
}
@@ -43,11 +44,11 @@ func (s *SpiderSync) GetLockDownloadKey(spiderId string) string {
}
// 删除本地文件
func (s *SpiderSync) RemoveSpiderFile(spiderName string) {
func (s *SpiderSync) RemoveSpiderFile() {
//爬虫文件有变化,先删除本地文件
_ = os.Remove(filepath.Join(
viper.GetString("spider.path"),
spiderName,
s.Spider.Name,
))
}
@@ -62,7 +63,16 @@ func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, str
}
// 下载爬虫
func (s *SpiderSync) Download(spiderId string, fileId string) {
func (s *SpiderSync) Download() {
spiderId := s.Spider.Id.Hex()
fileId := s.Spider.FileId.Hex()
isDownloading, key := s.CheckDownLoading(spiderId, fileId)
if isDownloading {
log.Infof("spider is downloading, spider_id: %s", spiderId)
return
} else {
_ = database.RedisClient.HSet("spider", key, key)
}
session, gf := database.GetGridFs("files")
defer session.Close()
@@ -85,18 +95,14 @@ func (s *SpiderSync) Download(spiderId string, fileId string) {
}
}
// 创建临时文件
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm)
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
tmpFile := utils.OpenFile(tmpFilePath)
defer tmpFile.Close()
// 将该文件写入临时文件
if _, err := io.Copy(tmpFile, f); err != nil {
log.Errorf(err.Error())
log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id())
debug.PrintStack()
return
}
@@ -124,4 +130,7 @@ func (s *SpiderSync) Download(spiderId string, fileId string) {
debug.PrintStack()
return
}
log.Infof("del key : %s", key)
_ = database.RedisClient.HDel("spider", key)
}