mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
fix 无法及时同步爬虫的问题
This commit is contained in:
@@ -12,12 +12,11 @@ import (
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
"github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type SpiderFileData struct {
|
||||
@@ -192,21 +191,14 @@ func PublishSpider(spider model.Spider) {
|
||||
md5 := filepath.Join(path, spider_handler.Md5File)
|
||||
if !utils.Exists(md5) {
|
||||
log.Infof("md5 file not found: %s", md5)
|
||||
spiderSync.RemoveSpiderFile()
|
||||
spiderSync.Download()
|
||||
spiderSync.CreateMd5File(gfFile.Md5)
|
||||
spiderSync.RemoveDownCreate(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
// md5值不一样,则下载
|
||||
md5Str := utils.ReadFileOneLine(md5)
|
||||
// 去掉空格以及换行符
|
||||
md5Str = strings.Replace(md5Str, " ", "", -1)
|
||||
md5Str = strings.Replace(md5Str, "\n", "", -1)
|
||||
md5Str := utils.GetSpiderMd5Str(md5)
|
||||
if gfFile.Md5 != md5Str {
|
||||
log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str)
|
||||
spiderSync.RemoveSpiderFile()
|
||||
spiderSync.Download()
|
||||
spiderSync.CreateMd5File(gfFile.Md5)
|
||||
spiderSync.RemoveDownCreate(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,6 +38,12 @@ func (s *SpiderSync) CreateMd5File(md5 string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SpiderSync) RemoveDownCreate(md5 string) {
|
||||
s.RemoveSpiderFile()
|
||||
s.Download()
|
||||
s.CreateMd5File(md5)
|
||||
}
|
||||
|
||||
// 获得下载锁的key
|
||||
func (s *SpiderSync) GetLockDownloadKey(spiderId string) string {
|
||||
node, _ := model.GetCurrentNode()
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"crawlab/lib/cron"
|
||||
"crawlab/model"
|
||||
"crawlab/services/notification"
|
||||
"crawlab/services/spider_handler"
|
||||
"crawlab/utils"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@@ -450,15 +451,9 @@ func ExecuteTask(id int) {
|
||||
t.Status = constants.StatusRunning // 任务状态
|
||||
t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长
|
||||
|
||||
// 判断爬虫文件是否存在
|
||||
gfFile := model.GetGridFs(spider.FileId)
|
||||
if gfFile == nil {
|
||||
t.Error = "找不到爬虫文件,请重新上传"
|
||||
t.Status = constants.StatusError
|
||||
t.FinishTs = time.Now() // 结束时间
|
||||
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
|
||||
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
|
||||
_ = t.Save()
|
||||
// 文件检查
|
||||
if err := SpiderFileCheck(t, spider); err != nil {
|
||||
log.Errorf("spider file check error: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
@@ -538,6 +533,30 @@ func ExecuteTask(id int) {
|
||||
log.Infof(GetWorkerPrefix(id) + "任务(ID:" + t.Id + ")" + "执行完毕. 消耗时间:" + durationStr + "秒")
|
||||
}
|
||||
|
||||
func SpiderFileCheck(t model.Task, spider model.Spider) error {
|
||||
// 判断爬虫文件是否存在
|
||||
gfFile := model.GetGridFs(spider.FileId)
|
||||
if gfFile == nil {
|
||||
t.Error = "找不到爬虫文件,请重新上传"
|
||||
t.Status = constants.StatusError
|
||||
t.FinishTs = time.Now() // 结束时间
|
||||
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
|
||||
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
|
||||
_ = t.Save()
|
||||
return errors.New(t.Error)
|
||||
}
|
||||
|
||||
// 判断md5值是否一致
|
||||
path := filepath.Join(viper.GetString("spider.path"), spider.Name)
|
||||
md5File := filepath.Join(path, spider_handler.Md5File)
|
||||
md5 := utils.GetSpiderMd5Str(md5File)
|
||||
if gfFile.Md5 != md5 {
|
||||
spiderSync := spider_handler.SpiderSync{Spider: spider}
|
||||
spiderSync.RemoveDownCreate(gfFile.Md5)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetTaskLog(id string) (logStr string, err error) {
|
||||
task, err := model.GetTask(id)
|
||||
|
||||
@@ -680,19 +699,6 @@ func AddTask(t model.Task) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func HandleTaskError(t model.Task, err error) {
|
||||
log.Error("handle task error:" + err.Error())
|
||||
t.Status = constants.StatusError
|
||||
t.Error = err.Error()
|
||||
t.FinishTs = time.Now()
|
||||
if err := t.Save(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
debug.PrintStack()
|
||||
}
|
||||
|
||||
func GetTaskEmailMarkdownContent(t model.Task, s model.Spider) string {
|
||||
n, _ := model.GetNode(t.NodeId)
|
||||
errMsg := ""
|
||||
|
||||
@@ -33,7 +33,14 @@ func ReadFileOneLine(fileName string) string {
|
||||
return ""
|
||||
}
|
||||
return line
|
||||
}
|
||||
|
||||
func GetSpiderMd5Str(file string) string {
|
||||
md5Str := ReadFileOneLine(file)
|
||||
// 去掉空格以及换行符
|
||||
md5Str = strings.Replace(md5Str, " ", "", -1)
|
||||
md5Str = strings.Replace(md5Str, "\n", "", -1)
|
||||
return md5Str
|
||||
}
|
||||
|
||||
// 创建文件
|
||||
|
||||
Reference in New Issue
Block a user