mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
fix 上传的问题
This commit is contained in:
@@ -24,8 +24,6 @@ type Spider struct {
|
||||
Site string `json:"site"` // 爬虫网站
|
||||
Envs []Env `json:"envs" bson:"envs"` // 环境变量
|
||||
Remark string `json:"remark"` // 备注
|
||||
Md5 string `json:"md_5" bson:"md5"` // ZIP文件的MD5
|
||||
OldMd5 string `json:"old_md_5" bson:"old_md5"` //上一次的MD5值
|
||||
// 自定义爬虫
|
||||
Src string `json:"src" bson:"src"` // 源码位置
|
||||
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
|
||||
|
||||
@@ -148,7 +148,7 @@ func PutSpider(c *gin.Context) {
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
fid, md5, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
|
||||
fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
|
||||
if err != nil {
|
||||
log.Errorf("upload to grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
@@ -166,13 +166,8 @@ func PutSpider(c *gin.Context) {
|
||||
Type: constants.Customized,
|
||||
Src: filepath.Join(srcPath, spiderName),
|
||||
FileId: fid,
|
||||
Md5: md5,
|
||||
}
|
||||
_ = spider.Add()
|
||||
} else {
|
||||
spider.OldMd5 = spider.Md5
|
||||
spider.Md5 = md5
|
||||
_ = spider.Save()
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
|
||||
@@ -29,7 +29,7 @@ type SpiderUploadMessage struct {
|
||||
}
|
||||
|
||||
// 上传zip文件到GridFS
|
||||
func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 string, err error) {
|
||||
func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) {
|
||||
fid = ""
|
||||
|
||||
// 获取MongoDB GridFS连接
|
||||
@@ -47,7 +47,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st
|
||||
err = ReadFileByStep(filePath, WriteToGridFS, f)
|
||||
if err != nil {
|
||||
debug.PrintStack()
|
||||
return "", "", err
|
||||
return "", err
|
||||
}
|
||||
|
||||
// 删除zip文件
|
||||
@@ -57,12 +57,12 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st
|
||||
}
|
||||
// 关闭文件,提交写入
|
||||
if err = f.Close(); err != nil {
|
||||
return "", "", err
|
||||
return "", err
|
||||
}
|
||||
// 文件ID
|
||||
fid = f.Id().(bson.ObjectId)
|
||||
|
||||
return fid, f.MD5(), nil
|
||||
return fid, nil
|
||||
}
|
||||
|
||||
func WriteToGridFS(content []byte, f *mgo.GridFile) {
|
||||
@@ -105,9 +105,9 @@ func PublishAllSpiders() {
|
||||
// 遍历爬虫列表
|
||||
for _, spider := range spiders {
|
||||
// 异步发布爬虫
|
||||
go func() {
|
||||
PublishSpider(spider)
|
||||
}()
|
||||
go func(s model.Spider) {
|
||||
PublishSpider(s)
|
||||
}(spider)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,30 +119,34 @@ func PublishSpider(spider model.Spider) {
|
||||
_ = model.RemoveSpider(spider.FileId)
|
||||
return
|
||||
}
|
||||
spiderSync := spider_handler.SpiderSync{}
|
||||
defer spiderSync.CreateMd5File(gfFile.Md5, spider.Name)
|
||||
spiderSync := spider_handler.SpiderSync{
|
||||
Spider: spider,
|
||||
}
|
||||
|
||||
//目录不存在,则直接下载
|
||||
path := filepath.Join(viper.GetString("spider.path"), spider.Name)
|
||||
if !utils.Exists(path) {
|
||||
log.Infof("path not found: %s", path)
|
||||
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
|
||||
spiderSync.Download()
|
||||
spiderSync.CreateMd5File(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
// md5文件不存在,则下载
|
||||
md5 := filepath.Join(path, spider_handler.Md5File)
|
||||
if !utils.Exists(md5) {
|
||||
log.Infof("md5.txt file not found: %s", md5)
|
||||
spiderSync.RemoveSpiderFile(spider.Name)
|
||||
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
|
||||
log.Infof("md5 file not found: %s", md5)
|
||||
spiderSync.RemoveSpiderFile()
|
||||
spiderSync.Download()
|
||||
spiderSync.CreateMd5File(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
// md5值不一样,则下载
|
||||
md5Str := utils.ReadFile(md5)
|
||||
if spider.Md5 != md5Str {
|
||||
log.Infof("md5 is different: %s:%s ", md5Str, md5)
|
||||
spiderSync.RemoveSpiderFile(spider.Name)
|
||||
spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex())
|
||||
if gfFile.Md5 != md5Str {
|
||||
log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5)
|
||||
spiderSync.RemoveSpiderFile()
|
||||
spiderSync.Download()
|
||||
spiderSync.CreateMd5File(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,17 +19,18 @@ const (
|
||||
)
|
||||
|
||||
type SpiderSync struct {
|
||||
Spider model.Spider
|
||||
}
|
||||
|
||||
func (s *SpiderSync) CreateMd5File(md5 string, spiderName string) {
|
||||
path := filepath.Join(viper.GetString("spider.path"), spiderName)
|
||||
func (s *SpiderSync) CreateMd5File(md5 string) {
|
||||
path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name)
|
||||
utils.CreateFilePath(path)
|
||||
|
||||
fileName := filepath.Join(path, Md5File)
|
||||
file := utils.OpenFile(fileName)
|
||||
defer file.Close()
|
||||
if file != nil {
|
||||
if _, err := file.WriteString(md5); err != nil {
|
||||
if _, err := file.WriteString(md5 + "\n"); err != nil {
|
||||
log.Errorf("file write string error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
}
|
||||
@@ -43,11 +44,11 @@ func (s *SpiderSync) GetLockDownloadKey(spiderId string) string {
|
||||
}
|
||||
|
||||
// 删除本地文件
|
||||
func (s *SpiderSync) RemoveSpiderFile(spiderName string) {
|
||||
func (s *SpiderSync) RemoveSpiderFile() {
|
||||
//爬虫文件有变化,先删除本地文件
|
||||
_ = os.Remove(filepath.Join(
|
||||
viper.GetString("spider.path"),
|
||||
spiderName,
|
||||
s.Spider.Name,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -62,7 +63,16 @@ func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, str
|
||||
}
|
||||
|
||||
// 下载爬虫
|
||||
func (s *SpiderSync) Download(spiderId string, fileId string) {
|
||||
func (s *SpiderSync) Download() {
|
||||
spiderId := s.Spider.Id.Hex()
|
||||
fileId := s.Spider.FileId.Hex()
|
||||
isDownloading, key := s.CheckDownLoading(spiderId, fileId)
|
||||
if isDownloading {
|
||||
log.Infof("spider is downloading, spider_id: %s", spiderId)
|
||||
return
|
||||
} else {
|
||||
_ = database.RedisClient.HSet("spider", key, key)
|
||||
}
|
||||
|
||||
session, gf := database.GetGridFs("files")
|
||||
defer session.Close()
|
||||
@@ -85,18 +95,14 @@ func (s *SpiderSync) Download(spiderId string, fileId string) {
|
||||
}
|
||||
}
|
||||
// 创建临时文件
|
||||
|
||||
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
|
||||
tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm)
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
tmpFile := utils.OpenFile(tmpFilePath)
|
||||
defer tmpFile.Close()
|
||||
|
||||
// 将该文件写入临时文件
|
||||
if _, err := io.Copy(tmpFile, f); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
@@ -124,4 +130,7 @@ func (s *SpiderSync) Download(spiderId string, fileId string) {
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("del key : %s", key)
|
||||
_ = database.RedisClient.HDel("spider", key)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user