mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
fixed https://github.com/crawlab-team/crawlab/issues/643 https://github.com/crawlab-team/crawlab/issues/640
This commit is contained in:
@@ -181,6 +181,7 @@ func UploadConfigSpider(c *gin.Context) {
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
|
||||
@@ -363,7 +363,12 @@ func UploadSpider(c *gin.Context) {
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
if err := gf.RemoveId(gfFile.Id); err != nil {
|
||||
log.Errorf("remove grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
@@ -506,22 +511,32 @@ func UploadSpiderFromId(c *gin.Context) {
|
||||
|
||||
// 判断文件是否已经存在
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil {
|
||||
if err := gf.Find(bson.M{"filename": spider.Name}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
if err := gf.RemoveId(gfFile.Id); err != nil {
|
||||
log.Errorf("remove grid fs error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath)
|
||||
fid, err := services.UploadToGridFs(spider.Name, tmpFilePath)
|
||||
if err != nil {
|
||||
log.Errorf("upload to grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 更新file_id
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
if err := spider.Save(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
// 发起同步
|
||||
services.PublishSpider(spider)
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
"gopkg.in/yaml.v2"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -214,7 +215,11 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
if err := gf.RemoveId(gfFile.Id); err != nil {
|
||||
log.Errorf("remove grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
|
||||
@@ -60,7 +60,12 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error {
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
log.Errorf(gfFile.Id.Hex() + " already exists. removing...")
|
||||
if err := gf.RemoveId(gfFile.Id); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
@@ -72,7 +77,9 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error {
|
||||
|
||||
// 保存爬虫 FileId
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
if err := spider.Save(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 获取爬虫同步实例
|
||||
spiderSync := spider_handler.SpiderSync{
|
||||
@@ -102,27 +109,33 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er
|
||||
// 创建一个新GridFS文件
|
||||
f, err := gf.Create(fileName)
|
||||
if err != nil {
|
||||
log.Errorf("create file error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
//分片读取爬虫zip文件
|
||||
// 分片读取爬虫zip文件
|
||||
err = ReadFileByStep(filePath, WriteToGridFS, f)
|
||||
if err != nil {
|
||||
log.Errorf("read file by step error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
return "", err
|
||||
}
|
||||
|
||||
// 删除zip文件
|
||||
if err = os.Remove(filePath); err != nil {
|
||||
log.Errorf("remove file error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
// 关闭文件,提交写入
|
||||
if err = f.Close(); err != nil {
|
||||
log.Errorf("close file error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
return "", err
|
||||
}
|
||||
|
||||
// 文件ID
|
||||
fid = f.Id().(bson.ObjectId)
|
||||
|
||||
@@ -183,8 +196,14 @@ func PublishSpider(spider model.Spider) {
|
||||
// 查询gf file,不存在则标记为爬虫文件不存在
|
||||
gfFile = model.GetGridFs(spider.FileId)
|
||||
if gfFile == nil {
|
||||
spider.FileId = constants.ObjectIdNull
|
||||
_ = spider.Save()
|
||||
log.Errorf("get grid fs file error: cannot find grid fs file")
|
||||
log.Errorf("grid fs file_id: " + spider.FileId.Hex())
|
||||
log.Errorf("spider_name: " + spider.Name)
|
||||
debug.PrintStack()
|
||||
//spider.FileId = constants.ObjectIdNull
|
||||
//if err := spider.Save(); err != nil {
|
||||
// return
|
||||
//}
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -208,6 +227,7 @@ func PublishSpider(spider model.Spider) {
|
||||
spiderSync.CheckIsScrapy()
|
||||
return
|
||||
}
|
||||
|
||||
// md5文件不存在,则下载
|
||||
md5 := filepath.Join(path, spider_handler.Md5File)
|
||||
if !utils.Exists(md5) {
|
||||
@@ -215,6 +235,7 @@ func PublishSpider(spider model.Spider) {
|
||||
spiderSync.RemoveDownCreate(gfFile.Md5)
|
||||
return
|
||||
}
|
||||
|
||||
// md5值不一样,则下载
|
||||
md5Str := utils.GetSpiderMd5Str(md5)
|
||||
if gfFile.Md5 != md5Str {
|
||||
@@ -412,7 +433,7 @@ func CopySpider(spider model.Spider, newName string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func InitDemoSpiders () {
|
||||
func InitDemoSpiders() {
|
||||
// 添加Demo爬虫
|
||||
templateSpidersDir := "./template/spiders"
|
||||
for _, info := range utils.ListDir(templateSpidersDir) {
|
||||
|
||||
@@ -455,7 +455,7 @@ func ExecuteTask(id int) {
|
||||
}
|
||||
|
||||
// 开始执行任务
|
||||
log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")")
|
||||
log.Infof(GetWorkerPrefix(id) + "start task (id:" + t.Id + ")")
|
||||
|
||||
// 储存任务
|
||||
_ = t.Save()
|
||||
@@ -529,7 +529,7 @@ func ExecuteTask(id int) {
|
||||
// 统计时长
|
||||
duration := toc.Sub(tic).Seconds()
|
||||
durationStr := strconv.FormatFloat(duration, 'f', 6, 64)
|
||||
log.Infof(GetWorkerPrefix(id) + "任务(ID:" + t.Id + ")" + "执行完毕. 消耗时间:" + durationStr + "秒")
|
||||
log.Infof(GetWorkerPrefix(id) + "task (id:" + t.Id + ")" + " finished. elapsed:" + durationStr + " sec")
|
||||
}
|
||||
|
||||
func SpiderFileCheck(t model.Task, spider model.Spider) error {
|
||||
|
||||
Reference in New Issue
Block a user