fix 无法及时同步爬虫的问题

This commit is contained in:
陈景阳
2020-01-28 15:43:57 +08:00
parent 3e792a24d2
commit 4825653ae0
4 changed files with 45 additions and 34 deletions

View File

@@ -7,6 +7,7 @@ import (
"crawlab/lib/cron"
"crawlab/model"
"crawlab/services/notification"
"crawlab/services/spider_handler"
"crawlab/utils"
"encoding/json"
"errors"
@@ -450,15 +451,9 @@ func ExecuteTask(id int) {
t.Status = constants.StatusRunning // 任务状态
t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长
// 判断爬虫文件是否存在
gfFile := model.GetGridFs(spider.FileId)
if gfFile == nil {
t.Error = "找不到爬虫文件,请重新上传"
t.Status = constants.StatusError
t.FinishTs = time.Now() // 结束时间
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
_ = t.Save()
// 文件检查
if err := SpiderFileCheck(t, spider); err != nil {
log.Errorf("spider file check error: %s", err.Error())
return
}
@@ -538,6 +533,30 @@ func ExecuteTask(id int) {
log.Infof(GetWorkerPrefix(id) + "任务(ID:" + t.Id + ")" + "执行完毕. 消耗时间:" + durationStr + "秒")
}
func SpiderFileCheck(t model.Task, spider model.Spider) error {
// 判断爬虫文件是否存在
gfFile := model.GetGridFs(spider.FileId)
if gfFile == nil {
t.Error = "找不到爬虫文件,请重新上传"
t.Status = constants.StatusError
t.FinishTs = time.Now() // 结束时间
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
_ = t.Save()
return errors.New(t.Error)
}
// 判断md5值是否一致
path := filepath.Join(viper.GetString("spider.path"), spider.Name)
md5File := filepath.Join(path, spider_handler.Md5File)
md5 := utils.GetSpiderMd5Str(md5File)
if gfFile.Md5 != md5 {
spiderSync := spider_handler.SpiderSync{Spider: spider}
spiderSync.RemoveDownCreate(gfFile.Md5)
}
return nil
}
func GetTaskLog(id string) (logStr string, err error) {
task, err := model.GetTask(id)
@@ -680,19 +699,6 @@ func AddTask(t model.Task) error {
return nil
}
func HandleTaskError(t model.Task, err error) {
log.Error("handle task error:" + err.Error())
t.Status = constants.StatusError
t.Error = err.Error()
t.FinishTs = time.Now()
if err := t.Save(); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
debug.PrintStack()
}
func GetTaskEmailMarkdownContent(t model.Task, s model.Spider) string {
n, _ := model.GetNode(t.NodeId)
errMsg := ""