From 951f6a9f07822d38d3aca3a0e566935417ac514d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 25 Sep 2019 14:38:46 +0800 Subject: [PATCH 01/81] fix --- backend/constants/task.go | 15 ++++++++++---- backend/model/task.go | 26 ++++++++++++++++++++++++ backend/services/msg_handler/msg_task.go | 17 +++++++++++++++- backend/services/node.go | 6 ++++++ backend/services/task.go | 21 +++++++++++++++---- 5 files changed, 76 insertions(+), 9 deletions(-) diff --git a/backend/constants/task.go b/backend/constants/task.go index 5eeee967..b6fb615c 100644 --- a/backend/constants/task.go +++ b/backend/constants/task.go @@ -1,11 +1,18 @@ package constants const ( - StatusPending string = "pending" - StatusRunning string = "running" - StatusFinished string = "finished" - StatusError string = "error" + // 调度中 + StatusPending string = "pending" + // 运行中 + StatusRunning string = "running" + // 已完成 + StatusFinished string = "finished" + // 错误 + StatusError string = "error" + // 取消 StatusCancelled string = "cancelled" + // 节点重启导致的异常终止 + StatusAbnormal string = "abnormal" ) const ( diff --git a/backend/model/task.go b/backend/model/task.go index 177edccb..f568b7fe 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -25,6 +25,7 @@ type Task struct { WaitDuration float64 `json:"wait_duration" bson:"wait_duration"` RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` TotalDuration float64 `json:"total_duration" bson:"total_duration"` + Pid int `json:"pid" bson:"pid"` // 前端数据 SpiderName string `json:"spider_name"` @@ -191,6 +192,7 @@ func RemoveTask(id string) error { return nil } +// 删除task by spider_id func RemoveTaskBySpiderId(id bson.ObjectId) error { tasks, err := GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") if err != nil { @@ -206,6 +208,7 @@ func RemoveTaskBySpiderId(id bson.ObjectId) error { return nil } +// task 总数 func GetTaskCount(query interface{}) (int, error) { s, c := database.GetCol("tasks") defer s.Close() @@ -308,6 +311,7 @@ func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) { return dailyItems, nil } +// 更新task的结果数 func UpdateTaskResultCount(id string) (err error) { // 获取任务 task, err := GetTask(id) @@ -343,3 +347,25 @@ func UpdateTaskResultCount(id string) (err error) { } return nil } + +func UpdateTaskToAbnormal(nodeId bson.ObjectId) error { + s, c := database.GetCol("tasks") + defer s.Close() + + selector := bson.M{ + "node_id": nodeId, + "status": constants.StatusRunning, + } + update := bson.M{ + "$set": bson.M{ + "status": constants.StatusAbnormal, + }, + } + _, err := c.UpdateAll(selector, update) + if err != nil { + log.Errorf("update task to abnormal error: %s, node_id : %s", err.Error(), nodeId.Hex()) + debug.PrintStack() + return err + } + return nil +} diff --git a/backend/services/msg_handler/msg_task.go b/backend/services/msg_handler/msg_task.go index 1d218264..087217a8 100644 --- a/backend/services/msg_handler/msg_task.go +++ b/backend/services/msg_handler/msg_task.go @@ -2,7 +2,11 @@ package msg_handler import ( "crawlab/constants" + "crawlab/model" "crawlab/utils" + "github.com/apex/log" + "runtime/debug" + "time" ) type Task struct { @@ -12,6 +16,17 @@ type Task struct { func (t *Task) Handle() error { // 取消任务 ch := utils.TaskExecChanMap.ChanBlocked(t.msg.TaskId) - ch <- constants.TaskCancel + if ch != nil { + ch <- constants.TaskCancel + } else { + // 节点可能被重启,找不到chan + t, _ := model.GetTask(t.msg.TaskId) + t.Status = constants.StatusCancelled + t.FinishTs = time.Now() + if err := t.Save(); err != nil { + debug.PrintStack() + log.Infof("cancel task error: %s", err.Error()) + } + } return nil } diff --git a/backend/services/node.go b/backend/services/node.go index 44fa3905..5526cb01 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -328,6 +328,12 @@ func InitNodeService() error { } } + // 更新在当前节点执行的任务状态为:abnormal + if err := model.UpdateTaskToAbnormal(node.Id); err != nil { + debug.PrintStack() + return err + } + c.Start() return nil } diff --git a/backend/services/task.go b/backend/services/task.go index 5f3a4d07..53c96b6b 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -159,13 +159,26 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e } }() - // 开始执行 - if err := cmd.Run(); err != nil { - HandleTaskError(t, err) + // 异步启动进程 + if err := cmd.Start(); err != nil { + log.Errorf("start spider error:{}", err.Error()) + debug.PrintStack() + return err + } + // 保存pid到task + t.Pid = cmd.Process.Pid + if err := t.Save(); err != nil { + log.Errorf("save task pid error: %s", err.Error()) + debug.PrintStack() + return err + } + // 同步等待进程完成 + if err := cmd.Wait(); err != nil { + log.Errorf("wait process finish error: %s", err.Error()) + debug.PrintStack() return err } ch <- constants.TaskFinish - return nil } From 82999225de701b875edee66adfeffc10d5d5ea0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 25 Sep 2019 15:11:27 +0800 Subject: [PATCH 02/81] =?UTF-8?q?fix=20=E6=97=A0=E6=B3=95=E8=8E=B7?= =?UTF-8?q?=E5=8F=96task=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 12 ++++++++++-- frontend/src/store/modules/task.js | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 53c96b6b..216bdfb7 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -447,6 +447,8 @@ func CancelTask(id string) (err error) { // 获取任务 task, err := model.GetTask(id) if err != nil { + log.Errorf("task not found, task id : %s, error: %s", id, err.Error()) + debug.PrintStack() return err } @@ -458,6 +460,8 @@ func CancelTask(id string) (err error) { // 获取当前节点(默认当前节点为主节点) node, err := GetCurrentNode() if err != nil { + log.Errorf("get current node error: %s", err.Error()) + debug.PrintStack() return err } @@ -466,9 +470,13 @@ func CancelTask(id string) (err error) { // 获取任务执行频道 ch := utils.TaskExecChanMap.ChanBlocked(id) + if ch != nil { + // 发出取消进程信号 + ch <- constants.TaskCancel + } else { + model. + } - // 发出取消进程信号 - ch <- constants.TaskCancel } else { // 任务节点为工作节点 diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 1d7e6c09..bb182706 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -139,7 +139,7 @@ const actions = { cancelTask ({ state, dispatch }, id) { return request.post(`/tasks/${id}/cancel`) .then(() => { - dispatch('getTaskData') + dispatch('getTaskData', id) }) } } From fda7e56e1ad57eee1c041c23bb4e2ba1bdf1c7b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 25 Sep 2019 16:19:58 +0800 Subject: [PATCH 03/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/database/pubsub.go | 1 - backend/services/msg_handler/msg_task.go | 15 +++++++++++---- backend/services/task.go | 9 ++++++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/backend/database/pubsub.go b/backend/database/pubsub.go index 8487df11..0eb8639b 100644 --- a/backend/database/pubsub.go +++ b/backend/database/pubsub.go @@ -33,7 +33,6 @@ func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...s done <- fmt.Errorf("redis pubsub receive err: %v", msg) return case redis.Message: - fmt.Println(msg) if err := consume(msg); err != nil { fmt.Printf("redis pubsub consume message err: %v", err) continue diff --git a/backend/services/msg_handler/msg_task.go b/backend/services/msg_handler/msg_task.go index 087217a8..5f120f80 100644 --- a/backend/services/msg_handler/msg_task.go +++ b/backend/services/msg_handler/msg_task.go @@ -14,16 +14,23 @@ type Task struct { } func (t *Task) Handle() error { + log.Infof("received cancel task msg, task_id: %s", t.msg.TaskId) // 取消任务 ch := utils.TaskExecChanMap.ChanBlocked(t.msg.TaskId) if ch != nil { ch <- constants.TaskCancel } else { + log.Infof("chan is empty, update status to abnormal") // 节点可能被重启,找不到chan - t, _ := model.GetTask(t.msg.TaskId) - t.Status = constants.StatusCancelled - t.FinishTs = time.Now() - if err := t.Save(); err != nil { + task, err := model.GetTask(t.msg.TaskId) + if err != nil { + log.Errorf("task not found, task_id: %s", t.msg.TaskId) + debug.PrintStack() + return err + } + task.Status = constants.StatusAbnormal + task.FinishTs = time.Now() + if err := task.Save(); err != nil { debug.PrintStack() log.Infof("cancel task error: %s", err.Error()) } diff --git a/backend/services/task.go b/backend/services/task.go index 216bdfb7..bcd5cd2e 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -465,6 +465,9 @@ func CancelTask(id string) (err error) { return err } + log.Infof("current node id is: %s", node.Id.Hex()) + log.Infof("task node id is: %s", task.NodeId.Hex()) + if node.Id == task.NodeId { // 任务节点为主节点 @@ -474,7 +477,11 @@ func CancelTask(id string) (err error) { // 发出取消进程信号 ch <- constants.TaskCancel } else { - model. + if err := model.UpdateTaskToAbnormal(node.Id); err != nil { + log.Errorf("update task to abnormal : {}", err.Error()) + debug.PrintStack() + return + } } } else { From 7218a4f4ded4d93f5a3e7cc0b0db6a2fb9fb74df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 09:37:10 +0800 Subject: [PATCH 04/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index bcd5cd2e..4c02eead 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -139,11 +139,11 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e go func() { // 传入信号,此处阻塞 signal := <-ch - + log.Infof("cancel process signal: %s", signal) if signal == constants.TaskCancel { // 取消进程 if err := cmd.Process.Kill(); err != nil { - log.Errorf(err.Error()) + log.Errorf("process kill error: %s", err.Error()) debug.PrintStack() return } @@ -153,7 +153,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 保存任务 t.FinishTs = time.Now() if err := t.Save(); err != nil { - log.Infof(err.Error()) + log.Infof("save task error: %s", err.Error()) debug.PrintStack() return } From e235076a11bc194027b7d0a2867eded3f3f53fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 10:23:08 +0800 Subject: [PATCH 05/81] =?UTF-8?q?fix=20=E6=97=A0=E6=B3=95=E7=BB=88?= =?UTF-8?q?=E6=AD=A2=E4=BB=BB=E5=8A=A1=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 4c02eead..8d8e3a6c 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -148,9 +148,10 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e return } t.Status = constants.StatusCancelled + } else { + // 保存任务 + t.Status = constants.StatusFinished } - - // 保存任务 t.FinishTs = time.Now() if err := t.Save(); err != nil { log.Infof("save task error: %s", err.Error()) @@ -176,6 +177,12 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e if err := cmd.Wait(); err != nil { log.Errorf("wait process finish error: %s", err.Error()) debug.PrintStack() + + // 发生一次也需要保存 + t.Error = err.Error() + t.FinishTs = time.Now() + t.Status = constants.TaskFinish + _ = t.Save() return err } ch <- constants.TaskFinish From 96e926b486d50871b74000d294ce5861867bb54d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 10:29:58 +0800 Subject: [PATCH 06/81] =?UTF-8?q?fix=20=E6=97=A0=E6=B3=95=E7=BB=88?= =?UTF-8?q?=E6=AD=A2=E4=BB=BB=E5=8A=A1=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/task.go b/backend/services/task.go index 8d8e3a6c..3b05089b 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -181,7 +181,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 发生一次也需要保存 t.Error = err.Error() t.FinishTs = time.Now() - t.Status = constants.TaskFinish + t.Status = constants.StatusFinished _ = t.Save() return err } From 05c28230b70743b6c68d0b93c94f29eef62518ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 11:28:20 +0800 Subject: [PATCH 07/81] =?UTF-8?q?=E7=88=AC=E8=99=AB=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=BA=E4=BB=8EGridFS=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/routes/spider.go | 77 ++++++++------ backend/services/spider.go | 204 +++---------------------------------- backend/services/task.go | 4 +- 3 files changed, 62 insertions(+), 223 deletions(-) diff --git a/backend/routes/spider.go b/backend/routes/spider.go index e0afb1a8..8a9e643f 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -11,7 +11,7 @@ import ( "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "github.com/pkg/errors" - uuid "github.com/satori/go.uuid" + "github.com/satori/go.uuid" "github.com/spf13/viper" "io/ioutil" "net/http" @@ -152,49 +152,68 @@ func PutSpider(c *gin.Context) { return } - // 读取临时文件 - tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777) + // 上传到GridFs + fid, err := services.UploadToGridFs(file.Filename, tmpFilePath) if err != nil { + log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) return } - if err = tmpFile.Close(); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return + + // 保存爬虫信息 + srcPath := viper.GetString("spider.path") + spider := model.Spider{ + Name: file.Filename, + DisplayName: file.Filename, + Type: constants.Customized, + Src: filepath.Join(srcPath, file.Filename), + FileId: fid, } + _ = spider.Save() + + // 读取临时文件 + //tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777) + //if err != nil { + // debug.PrintStack() + // HandleError(http.StatusInternalServerError, c, err) + // return + //} + //if err = tmpFile.Close(); err != nil { + // debug.PrintStack() + // HandleError(http.StatusInternalServerError, c, err) + // return + //} // 目标目录 - dstPath := filepath.Join( - viper.GetString("spider.path"), - strings.Replace(file.Filename, ".zip", "", 1), - ) + //dstPath := filepath.Join( + // viper.GetString("spider.path"), + // strings.Replace(file.Filename, ".zip", "", 1), + //) // 如果目标目录已存在,删除目标目录 - if utils.Exists(dstPath) { - if err := os.RemoveAll(dstPath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - } - } + //if utils.Exists(dstPath) { + // if err := os.RemoveAll(dstPath); err != nil { + // debug.PrintStack() + // HandleError(http.StatusInternalServerError, c, err) + // } + //} // 将临时文件解压到爬虫目录 - if err := utils.DeCompress(tmpFile, dstPath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } + //if err := utils.DeCompress(tmpFile, dstPath); err != nil { + // debug.PrintStack() + // HandleError(http.StatusInternalServerError, c, err) + // return + //} // 删除临时文件 - if err = os.Remove(tmpFilePath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } + //if err = os.Remove(tmpFilePath); err != nil { + // debug.PrintStack() + // HandleError(http.StatusInternalServerError, c, err) + // return + //} // 更新爬虫 - services.UpdateSpiders() + // services.UpdateSpiders() c.JSON(http.StatusOK, Response{ Status: "ok", diff --git a/backend/services/spider.go b/backend/services/spider.go index fdf09517..bcf98700 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -13,16 +13,12 @@ import ( "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "github.com/gomodule/redigo/redis" - "github.com/pkg/errors" "github.com/satori/go.uuid" "github.com/spf13/viper" "io" - "io/ioutil" "os" "path/filepath" "runtime/debug" - "strings" - "syscall" ) type SpiderFileData struct { @@ -36,175 +32,14 @@ type SpiderUploadMessage struct { SpiderId string } -// 从项目目录中获取爬虫列表 -func GetSpidersFromDir() ([]model.Spider, error) { - // 爬虫项目目录路径 - srcPath := viper.GetString("spider.path") - - // 如果爬虫项目目录不存在,则创建一个 - if !utils.Exists(srcPath) { - mask := syscall.Umask(0) // 改为 0000 八进制 - defer syscall.Umask(mask) // 改为原来的 umask - if err := os.MkdirAll(srcPath, 0766); err != nil { - debug.PrintStack() - return []model.Spider{}, err - } - } - - // 获取爬虫项目目录下的所有子项 - items, err := ioutil.ReadDir(srcPath) - if err != nil { - debug.PrintStack() - return []model.Spider{}, err - } - - // 定义爬虫列表 - spiders := make([]model.Spider, 0) - - // 遍历所有子项 - for _, item := range items { - // 忽略不为目录的子项 - if !item.IsDir() { - continue - } - - // 忽略隐藏目录 - if strings.HasPrefix(item.Name(), ".") { - continue - } - - // 构造爬虫 - spider := model.Spider{ - Name: item.Name(), - DisplayName: item.Name(), - Type: constants.Customized, - Src: filepath.Join(srcPath, item.Name()), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - } - - // 将爬虫加入列表 - spiders = append(spiders, spider) - } - - return spiders, nil -} - -// 将爬虫保存到数据库 -func SaveSpiders(spiders []model.Spider) error { - s, c := database.GetCol("spiders") - defer s.Close() - - if len(spiders) == 0 { - err := model.RemoveAllSpider() - if err != nil { - log.Error("remove all spider error:" + err.Error()) - return err - } - log.Info("get spider from dir is empty,removed all spider") - return nil - } - // 如果该爬虫不存在于数据库,则保存爬虫到数据库 - for _, spider := range spiders { - // 忽略非自定义爬虫 - if spider.Type != constants.Customized { - continue - } - spider_ := []*model.Spider{} - _ = c.Find(bson.M{"src": spider.Src}).All(&spider_) - // 以防出现多个重复的爬虫 - if len(spider_) > 1 { - if _, err := c.RemoveAll(bson.M{"src": spider.Src}); err != nil { - log.Errorf("remove spider error: %v, src:%v", err.Error(), spider.Src) - debug.PrintStack() - continue - } - if err := spider.Add(); err != nil { - log.Errorf("remove spider error: %v, src:%v", err.Error(), spider.Src) - debug.PrintStack() - continue - } - continue - } - if len(spider_) == 0 { - // 不存在 - if err := spider.Add(); err != nil { - log.Errorf("remove spider error: %v, src:%v", err.Error(), spider.Src) - debug.PrintStack() - continue - } - } - } - return nil -} - -// 更新爬虫 -func UpdateSpiders() { - // 从项目目录获取爬虫列表 - spiders, err := GetSpidersFromDir() - if err != nil { - log.Errorf(err.Error()) - return - } - - // 储存爬虫 - if err := SaveSpiders(spiders); err != nil { - log.Errorf(err.Error()) - return - } -} - -// 打包爬虫目录为zip文件 -func ZipSpider(spider model.Spider) (filePath string, err error) { - // 如果源文件夹不存在,抛错 - if !utils.Exists(spider.Src) { - debug.PrintStack() - // 删除该爬虫,否则会一直报错 - _ = model.RemoveSpider(spider.Id) - return "", errors.New("source path does not exist") - } - - // 临时文件路径 - randomId := uuid.NewV4() - - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, 0777); err != nil { - log.Errorf("mkdir other.tmppath error: %v", err.Error()) - return "", err - } - } - filePath = filepath.Join(tmpPath, randomId.String()+".zip") - // 将源文件夹打包为zip文件 - d, err := os.Open(spider.Src) - if err != nil { - debug.PrintStack() - return filePath, err - } - var files []*os.File - files = append(files, d) - if err := utils.Compress(files, filePath); err != nil { - return filePath, err - } - - return filePath, nil -} - // 上传zip文件到GridFS -func UploadToGridFs(spider model.Spider, fileName string, filePath string) (fid bson.ObjectId, err error) { +func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" // 获取MongoDB GridFS连接 s, gf := database.GetGridFs("files") defer s.Close() - // 如果存在FileId删除GridFS上的老文件 - if !utils.IsObjectIdNull(spider.FileId) { - if err = gf.RemoveId(spider.FileId); err != nil { - log.Error("remove gf file:" + err.Error()) - debug.PrintStack() - } - } - // 创建一个新GridFS文件 f, err := gf.Create(fileName) if err != nil { @@ -295,29 +130,24 @@ func PublishAllSpidersJob() { // 2. 上传zip文件到GridFS // 3. 发布消息给工作节点 func PublishSpider(spider model.Spider) (err error) { - // 将源文件夹打包为zip文件 - filePath, err := ZipSpider(spider) - if err != nil { - return err - } - // 上传zip文件到GridFS - fileName := filepath.Base(spider.Src) + ".zip" - fid, err := UploadToGridFs(spider, fileName, filePath) - if err != nil { - return err - } + s, gf := database.GetGridFs("files") + defer s.Close() - // 保存FileId - spider.FileId = fid - if err := spider.Save(); err != nil { + f, err := gf.OpenId(spider.FileId) + defer f.Close() + if err != nil { + log.Errorf("open file id: " + spider.FileId.Hex() + ", spider id:" + spider.Id.Hex() + ", error: " + err.Error()) + debug.PrintStack() + // 爬虫和文件没有对应,则删除爬虫 + _ = model.RemoveSpider(spider.Id) return err } // 发布消息给工作节点 msg := SpiderUploadMessage{ - FileId: fid.Hex(), - FileName: fileName, + FileId: spider.FileId.Hex(), + FileName: f.Name(), SpiderId: spider.Id.Hex(), } msgStr, err := json.Marshal(msg) @@ -349,12 +179,12 @@ func OnFileUpload(message redis.Message) (err error) { // 从GridFS获取该文件 f, err := gf.OpenId(bson.ObjectIdHex(msg.FileId)) + defer f.Close() if err != nil { log.Errorf("open file id: " + msg.FileId + ", spider id:" + msg.SpiderId + ", error: " + err.Error()) debug.PrintStack() return err } - defer f.Close() // 生成唯一ID randomId := uuid.NewV4() @@ -386,7 +216,6 @@ func OnFileUpload(message redis.Message) (err error) { // 解压缩临时文件到目标文件夹 dstPath := filepath.Join( viper.GetString("spider.path"), - // strings.Replace(msg.FileName, ".zip", "", -1), ) if err := utils.DeCompress(tmpFile, dstPath); err != nil { log.Errorf(err.Error()) @@ -418,11 +247,6 @@ func InitSpiderService() error { if IsMaster() { // 主节点 - // 每5秒更新一次爬虫信息 - if _, err := c.AddFunc("*/5 * * * * *", UpdateSpiders); err != nil { - return err - } - // 每60秒同步爬虫给工作节点 if _, err := c.AddFunc("0 * * * * *", PublishAllSpidersJob); err != nil { return err @@ -432,8 +256,6 @@ func InitSpiderService() error { // 订阅文件上传 channel := "files:upload" - - //sub.Connect() ctx := context.Background() return database.RedisClient.Subscribe(ctx, OnFileUpload, channel) diff --git a/backend/services/task.go b/backend/services/task.go index 3b05089b..e8d66a2a 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -145,7 +145,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e if err := cmd.Process.Kill(); err != nil { log.Errorf("process kill error: %s", err.Error()) debug.PrintStack() - return } t.Status = constants.StatusCancelled } else { @@ -487,10 +486,9 @@ func CancelTask(id string) (err error) { if err := model.UpdateTaskToAbnormal(node.Id); err != nil { log.Errorf("update task to abnormal : {}", err.Error()) debug.PrintStack() - return + return err } } - } else { // 任务节点为工作节点 From 947b561653b4662165af404066242eec5b6b5c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 11:38:13 +0800 Subject: [PATCH 08/81] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/routes/spider.go | 44 ------------------------ backend/services/spider.go | 70 ++------------------------------------ 2 files changed, 3 insertions(+), 111 deletions(-) diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 8a9e643f..07e84e21 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -171,50 +171,6 @@ func PutSpider(c *gin.Context) { } _ = spider.Save() - // 读取临时文件 - //tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777) - //if err != nil { - // debug.PrintStack() - // HandleError(http.StatusInternalServerError, c, err) - // return - //} - //if err = tmpFile.Close(); err != nil { - // debug.PrintStack() - // HandleError(http.StatusInternalServerError, c, err) - // return - //} - - // 目标目录 - //dstPath := filepath.Join( - // viper.GetString("spider.path"), - // strings.Replace(file.Filename, ".zip", "", 1), - //) - - // 如果目标目录已存在,删除目标目录 - //if utils.Exists(dstPath) { - // if err := os.RemoveAll(dstPath); err != nil { - // debug.PrintStack() - // HandleError(http.StatusInternalServerError, c, err) - // } - //} - - // 将临时文件解压到爬虫目录 - //if err := utils.DeCompress(tmpFile, dstPath); err != nil { - // debug.PrintStack() - // HandleError(http.StatusInternalServerError, c, err) - // return - //} - - // 删除临时文件 - //if err = os.Remove(tmpFilePath); err != nil { - // debug.PrintStack() - // HandleError(http.StatusInternalServerError, c, err) - // return - //} - - // 更新爬虫 - // services.UpdateSpiders() - c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", diff --git a/backend/services/spider.go b/backend/services/spider.go index bcf98700..620cede4 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -1,18 +1,15 @@ package services import ( - "context" "crawlab/constants" "crawlab/database" "crawlab/lib/cron" "crawlab/model" "crawlab/utils" - "encoding/json" "fmt" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" - "github.com/gomodule/redigo/redis" "github.com/satori/go.uuid" "github.com/spf13/viper" "io" @@ -126,11 +123,7 @@ func PublishAllSpidersJob() { } // 发布爬虫 -// 1. 将源文件夹打包为zip文件 -// 2. 上传zip文件到GridFS -// 3. 发布消息给工作节点 func PublishSpider(spider model.Spider) (err error) { - s, gf := database.GetGridFs("files") defer s.Close() @@ -144,48 +137,6 @@ func PublishSpider(spider model.Spider) (err error) { return err } - // 发布消息给工作节点 - msg := SpiderUploadMessage{ - FileId: spider.FileId.Hex(), - FileName: f.Name(), - SpiderId: spider.Id.Hex(), - } - msgStr, err := json.Marshal(msg) - if err != nil { - return - } - channel := "files:upload" - if _, err = database.RedisClient.Publish(channel, utils.BytesToString(msgStr)); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - return -} - -// 上传爬虫回调 -func OnFileUpload(message redis.Message) (err error) { - s, gf := database.GetGridFs("files") - defer s.Close() - - // 反序列化消息 - var msg SpiderUploadMessage - if err := json.Unmarshal(message.Data, &msg); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 从GridFS获取该文件 - f, err := gf.OpenId(bson.ObjectIdHex(msg.FileId)) - defer f.Close() - if err != nil { - log.Errorf("open file id: " + msg.FileId + ", spider id:" + msg.SpiderId + ", error: " + err.Error()) - debug.PrintStack() - return err - } - // 生成唯一ID randomId := uuid.NewV4() tmpPath := viper.GetString("other.tmppath") @@ -197,7 +148,6 @@ func OnFileUpload(message redis.Message) (err error) { } // 创建临时文件 tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) if err != nil { log.Errorf(err.Error()) @@ -236,6 +186,7 @@ func OnFileUpload(message redis.Message) (err error) { debug.PrintStack() return err } + return nil } @@ -243,24 +194,9 @@ func OnFileUpload(message redis.Message) (err error) { func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - - if IsMaster() { - // 主节点 - - // 每60秒同步爬虫给工作节点 - if _, err := c.AddFunc("0 * * * * *", PublishAllSpidersJob); err != nil { - return err - } - } else { - // 非主节点 - - // 订阅文件上传 - channel := "files:upload" - ctx := context.Background() - return database.RedisClient.Subscribe(ctx, OnFileUpload, channel) - + if _, err := c.AddFunc("0 * * * * *", PublishAllSpidersJob); err != nil { + return err } - // 启动定时任务 c.Start() From a11544d809c5b9262fd3783635b7a8bdbe7cfa81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 16:26:32 +0800 Subject: [PATCH 09/81] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/file.go | 25 +++++++++++ backend/model/spider.go | 15 +++++++ backend/routes/spider.go | 67 +++++++++++++++-------------- backend/services/spider.go | 86 ++++++++++++++++++++++---------------- 4 files changed, 127 insertions(+), 66 deletions(-) diff --git a/backend/model/file.go b/backend/model/file.go index 3cea7b39..f0968086 100644 --- a/backend/model/file.go +++ b/backend/model/file.go @@ -1,11 +1,23 @@ package model import ( + "crawlab/database" "crawlab/utils" "github.com/apex/log" + "github.com/globalsign/mgo/bson" "os" + "time" ) +type GridFs struct { + Id bson.ObjectId `json:"_id" bson:"_id"` + ChunkSize int32 `json:"chunk_size" bson:"chunkSize"` + UploadDate time.Time `json:"upload_date" bson:"uploadDate"` + Length int32 `json:"length" bson:"length"` + Md5 string `json:"md_5" bson:"md5"` + Filename string `json:"filename" bson:"filename"` +} + type File struct { Name string `json:"name"` Path string `json:"path"` @@ -13,6 +25,19 @@ type File struct { Size int64 `json:"size"` } +func GetGridFs(id bson.ObjectId) *GridFs { + s, gf := database.GetGridFs("files") + defer s.Close() + + var gfFile GridFs + err := gf.Find(bson.M{"_id": id}).One(&gfFile) + if err != nil { + log.Errorf("get gf file error: %s, file_id: %s", err.Error(), id.Hex()) + return nil + } + return &gfFile +} + func RemoveFile(path string) error { if !utils.Exists(path) { log.Info("file not found: " + path) diff --git a/backend/model/spider.go b/backend/model/spider.go index e0e5f836..c498287d 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -24,6 +24,8 @@ type Spider struct { Site string `json:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 Remark string `json:"remark"` // 备注 + Md5 string `json:"md_5" bson:"md5"` // ZIP文件的MD5 + OldMd5 string `json:"old_md_5" bson:"old_md5"` //上一次的MD5值 // 自定义爬虫 Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 @@ -122,6 +124,19 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { return spiders, nil } +func GetSpiderByName(name string) *Spider { + s, c := database.GetCol("spiders") + defer s.Close() + + var result *Spider + if err := c.Find(bson.M{"name": name}).One(result); err != nil { + log.Errorf("get spider error: %s", err.Error()) + debug.PrintStack() + return result + } + return result +} + func GetSpider(id bson.ObjectId) (Spider, error) { s, c := database.GetCol("spiders") defer s.Close() diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 07e84e21..9b8bd50d 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -79,18 +79,6 @@ func PostSpider(c *gin.Context) { }) } -func PublishAllSpiders(c *gin.Context) { - if err := services.PublishAllSpiders(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - func PublishSpider(c *gin.Context) { id := c.Param("id") @@ -104,10 +92,7 @@ func PublishSpider(c *gin.Context) { return } - if err := services.PublishSpider(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } + services.PublishSpider(spider) c.JSON(http.StatusOK, Response{ Status: "ok", @@ -117,7 +102,7 @@ func PublishSpider(c *gin.Context) { func PutSpider(c *gin.Context) { // 从body中获取文件 - file, err := c.FormFile("file") + uploadFile, err := c.FormFile("file") if err != nil { debug.PrintStack() HandleError(http.StatusInternalServerError, c, err) @@ -125,7 +110,7 @@ func PutSpider(c *gin.Context) { } // 如果不为zip文件,返回错误 - if !strings.HasSuffix(file.Filename, ".zip") { + if !strings.HasSuffix(uploadFile.Filename, ".zip") { debug.PrintStack() HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file")) return @@ -145,31 +130,50 @@ func PutSpider(c *gin.Context) { // 保存到本地临时文件 randomId := uuid.NewV4() tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - if err := c.SaveUploadedFile(file, tmpFilePath); err != nil { + if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil { log.Error("save upload file error: " + err.Error()) debug.PrintStack() HandleError(http.StatusInternalServerError, c, err) return } + s, gf := database.GetGridFs("files") + defer s.Close() + + // 判断文件是否已经存在 + var gfFile model.GridFs + if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { + // 已经存在文件,则删除 + _ = gf.RemoveId(gfFile.Id) + } + // 上传到GridFs - fid, err := services.UploadToGridFs(file.Filename, tmpFilePath) + fid, md5, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) if err != nil { log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() return } - - // 保存爬虫信息 - srcPath := viper.GetString("spider.path") - spider := model.Spider{ - Name: file.Filename, - DisplayName: file.Filename, - Type: constants.Customized, - Src: filepath.Join(srcPath, file.Filename), - FileId: fid, + // 判断爬虫是否存在 + spiderName := strings.Replace(uploadFile.Filename, ".zip", "", -1) + spider := model.GetSpiderByName(spiderName) + if spider == nil { + // 保存爬虫信息 + srcPath := viper.GetString("spider.path") + spider := model.Spider{ + Name: spiderName, + DisplayName: spiderName, + Type: constants.Customized, + Src: filepath.Join(srcPath, spiderName), + FileId: fid, + Md5: md5, + } + _ = spider.Add() + } else { + spider.OldMd5 = spider.Md5 + spider.Md5 = md5 + _ = spider.Save() } - _ = spider.Save() c.JSON(http.StatusOK, Response{ Status: "ok", @@ -259,7 +263,8 @@ func GetSpiderDir(c *gin.Context) { } // 获取目录下文件列表 - f, err := ioutil.ReadDir(filepath.Join(spider.Src, path)) + spiderPath := viper.GetString("spider.path") + f, err := ioutil.ReadDir(filepath.Join(spiderPath, spider.Name, path)) if err != nil { HandleError(http.StatusInternalServerError, c, err) return diff --git a/backend/services/spider.go b/backend/services/spider.go index 620cede4..4a32c023 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -30,7 +30,7 @@ type SpiderUploadMessage struct { } // 上传zip文件到GridFS -func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { +func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 string, err error) { fid = "" // 获取MongoDB GridFS连接 @@ -48,7 +48,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er err = ReadFileByStep(filePath, WriteToGridFS, f) if err != nil { debug.PrintStack() - return "", err + return "", "", err } // 删除zip文件 @@ -58,12 +58,12 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er } // 关闭文件,提交写入 if err = f.Close(); err != nil { - return "", err + return "", "", err } // 文件ID fid = f.Id().(bson.ObjectId) - return fid, nil + return fid, f.MD5(), nil } func WriteToGridFS(content []byte, f *mgo.GridFile) { @@ -96,45 +96,59 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre } // 发布所有爬虫 -func PublishAllSpiders() error { +func PublishAllSpiders() { // 获取爬虫列表 - spiders, err := model.GetSpiderList(nil, 0, constants.Infinite) - if err != nil { - log.Errorf(err.Error()) - return err + spiders, _ := model.GetSpiderList(nil, 0, constants.Infinite) + if len(spiders) == 0 { + return } - // 遍历爬虫列表 for _, spider := range spiders { - // 发布爬虫 - if err := PublishSpider(spider); err != nil { - log.Errorf("publish spider error:" + err.Error()) - // return err - } - } - - return nil -} - -func PublishAllSpidersJob() { - if err := PublishAllSpiders(); err != nil { - log.Errorf(err.Error()) + // 异步发布爬虫 + go func() { + PublishSpider(spider) + }() } } // 发布爬虫 -func PublishSpider(spider model.Spider) (err error) { +func PublishSpider(spider model.Spider) { s, gf := database.GetGridFs("files") defer s.Close() + gfFile := model.GetGridFs(spider.FileId) + if gfFile == nil { + _ = model.RemoveSpider(spider.FileId) + return + } + + // 爬虫文件没有变化 + if spider.Md5 == spider.OldMd5 { + return + } + + //爬虫文件有变化,先删除本地文件 + _ = os.Remove(filepath.Join( + viper.GetString("spider.path"), + spider.Name, + )) + + // 重新下载爬虫文件 + node, _ := GetCurrentNode() + key := node.Id.Hex() + "#" + spider.Id.Hex() + if _, err := database.RedisClient.HGet("spider", key); err == nil { + log.Infof("downloading spider") + return + } + _ = database.RedisClient.HSet("spider", key, key) + defer database.RedisClient.HDel("spider", key) + f, err := gf.OpenId(spider.FileId) defer f.Close() if err != nil { log.Errorf("open file id: " + spider.FileId.Hex() + ", spider id:" + spider.Id.Hex() + ", error: " + err.Error()) debug.PrintStack() - // 爬虫和文件没有对应,则删除爬虫 - _ = model.RemoveSpider(spider.Id) - return err + return } // 生成唯一ID @@ -143,7 +157,7 @@ func PublishSpider(spider model.Spider) (err error) { if !utils.Exists(tmpPath) { if err := os.MkdirAll(tmpPath, 0777); err != nil { log.Errorf("mkdir other.tmppath error: %v", err.Error()) - return err + return } } // 创建临时文件 @@ -152,7 +166,7 @@ func PublishSpider(spider model.Spider) (err error) { if err != nil { log.Errorf(err.Error()) debug.PrintStack() - return err + return } defer tmpFile.Close() @@ -160,7 +174,7 @@ func PublishSpider(spider model.Spider) (err error) { if _, err := io.Copy(tmpFile, f); err != nil { log.Errorf(err.Error()) debug.PrintStack() - return err + return } // 解压缩临时文件到目标文件夹 @@ -170,31 +184,33 @@ func PublishSpider(spider model.Spider) (err error) { if err := utils.DeCompress(tmpFile, dstPath); err != nil { log.Errorf(err.Error()) debug.PrintStack() - return err + return } // 关闭临时文件 if err := tmpFile.Close(); err != nil { log.Errorf(err.Error()) debug.PrintStack() - return err + return } // 删除临时文件 if err := os.Remove(tmpFilePath); err != nil { log.Errorf(err.Error()) debug.PrintStack() - return err + return } - return nil + // 修改spider的MD5和上一次的MD一致 + spider.OldMd5 = spider.Md5 + _ = spider.Save() } // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc("0 * * * * *", PublishAllSpidersJob); err != nil { + if _, err := c.AddFunc("0/15 * * * * *", PublishAllSpiders); err != nil { return err } // 启动定时任务 From 31be4c1839a6446126757ede70c6e4def5a52caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 16:43:32 +0800 Subject: [PATCH 10/81] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/spider.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/spider.go b/backend/services/spider.go index 4a32c023..f166b1b7 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -210,7 +210,7 @@ func PublishSpider(spider model.Spider) { func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc("0/15 * * * * *", PublishAllSpiders); err != nil { + if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { return err } // 启动定时任务 From 169a7d0df90ce109705c558d0feabbebdae73fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 16:49:01 +0800 Subject: [PATCH 11/81] =?UTF-8?q?fix=20=E5=8F=AF=E8=83=BD=E7=9B=B4?= =?UTF-8?q?=E6=8E=A5=E9=80=80=E5=87=BA=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/task.go b/backend/services/task.go index e8d66a2a..1c26b45a 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -140,7 +140,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 传入信号,此处阻塞 signal := <-ch log.Infof("cancel process signal: %s", signal) - if signal == constants.TaskCancel { + if signal == constants.TaskCancel && cmd.Process != nil { // 取消进程 if err := cmd.Process.Kill(); err != nil { log.Errorf("process kill error: %s", err.Error()) From 5f158ddb4474763604d2bbd544cff5a6046fe85d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 19:12:02 +0800 Subject: [PATCH 12/81] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/main.go | 5 +- backend/model/node.go | 68 ++++++++++ backend/services/node.go | 78 +---------- backend/services/spider.go | 105 ++++----------- backend/services/spider_handler/spider.go | 127 ++++++++++++++++++ .../services/spider_handler/spider_test.go | 19 +++ backend/services/task.go | 4 +- backend/utils/file.go | 35 +++++ 8 files changed, 282 insertions(+), 159 deletions(-) create mode 100644 backend/services/spider_handler/spider.go create mode 100644 backend/services/spider_handler/spider_test.go diff --git a/backend/main.go b/backend/main.go index bf98674e..47196fe5 100644 --- a/backend/main.go +++ b/backend/main.go @@ -5,6 +5,7 @@ import ( "crawlab/database" "crawlab/lib/validate_bridge" "crawlab/middlewares" + "crawlab/model" "crawlab/routes" "crawlab/services" "github.com/apex/log" @@ -57,7 +58,7 @@ func main() { } log.Info("初始化Redis数据库成功") - if services.IsMaster() { + if model.IsMaster() { // 初始化定时任务 if err := services.InitScheduler(); err != nil { log.Error("init scheduler error:" + err.Error()) @@ -99,7 +100,7 @@ func main() { log.Info("初始化用户服务成功") // 以下为主节点服务 - if services.IsMaster() { + if model.IsMaster() { // 中间件 app.Use(middlewares.CORSMiddleware()) //app.Use(middlewares.AuthorizationMiddleware()) diff --git a/backend/model/node.go b/backend/model/node.go index 6211115c..7af93dbe 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -7,6 +7,7 @@ import ( "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" + "github.com/spf13/viper" "runtime/debug" "time" ) @@ -30,6 +31,73 @@ type Node struct { UpdateTsUnix int64 `json:"update_ts_unix" bson:"update_ts_unix"` } +const ( + Yes = "Y" + No = "N" +) + +// 当前节点是否为主节点 +func IsMaster() bool { + return viper.GetString("server.master") == Yes +} + +// 获取本机节点 +func GetCurrentNode() (Node, error) { + // 获得注册的key值 + key, err := register.GetRegister().GetKey() + if err != nil { + return Node{}, err + } + + // 从数据库中获取当前节点 + var node Node + errNum := 0 + for { + // 如果错误次数超过10次 + if errNum >= 10 { + panic("cannot get current node") + } + + // 尝试获取节点 + node, err = GetNodeByKey(key) + // 如果获取失败 + if err != nil { + // 如果为主节点,表示为第一次注册,插入节点信息 + if IsMaster() { + // 获取本机信息 + ip, mac, key, err := GetNodeBaseInfo() + if err != nil { + debug.PrintStack() + return node, err + } + + // 生成节点 + node = Node{ + Key: key, + Id: bson.NewObjectId(), + Ip: ip, + Name: ip, + Mac: mac, + IsMaster: true, + } + if err := node.Add(); err != nil { + return node, err + } + return node, nil + } + // 增加错误次数 + errNum++ + + // 5秒后重试 + time.Sleep(5 * time.Second) + continue + } + // 跳出循环 + break + } + return node, nil +} + func (n *Node) Save() error { s, c := database.GetCol("nodes") defer s.Close() diff --git a/backend/services/node.go b/backend/services/node.go index 5526cb01..e3397e74 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -14,7 +14,6 @@ import ( "github.com/apex/log" "github.com/globalsign/mgo/bson" "github.com/gomodule/redigo/redis" - "github.com/spf13/viper" "runtime/debug" "time" ) @@ -28,77 +27,10 @@ type Data struct { UpdateTsUnix int64 `json:"update_ts_unix"` } -const ( - Yes = "Y" - No = "N" -) - -// 获取本机节点 -func GetCurrentNode() (model.Node, error) { - // 获得注册的key值 - key, err := register.GetRegister().GetKey() - if err != nil { - return model.Node{}, err - } - - // 从数据库中获取当前节点 - var node model.Node - errNum := 0 - for { - // 如果错误次数超过10次 - if errNum >= 10 { - panic("cannot get current node") - } - - // 尝试获取节点 - node, err = model.GetNodeByKey(key) - // 如果获取失败 - if err != nil { - // 如果为主节点,表示为第一次注册,插入节点信息 - if IsMaster() { - // 获取本机信息 - ip, mac, key, err := model.GetNodeBaseInfo() - if err != nil { - debug.PrintStack() - return node, err - } - - // 生成节点 - node = model.Node{ - Key: key, - Id: bson.NewObjectId(), - Ip: ip, - Name: ip, - Mac: mac, - IsMaster: true, - } - if err := node.Add(); err != nil { - return node, err - } - return node, nil - } - // 增加错误次数 - errNum++ - - // 5秒后重试 - time.Sleep(5 * time.Second) - continue - } - // 跳出循环 - break - } - return node, nil -} - -// 当前节点是否为主节点 -func IsMaster() bool { - return viper.GetString("server.master") == Yes -} - // 所有调用IsMasterNode的方法,都永远会在master节点执行,所以GetCurrentNode方法返回永远是master节点 // 该ID的节点是否为主节点 func IsMasterNode(id string) bool { - curNode, _ := GetCurrentNode() + curNode, _ := model.GetCurrentNode() node, _ := model.GetNode(bson.ObjectIdHex(id)) return curNode.Id == node.Id } @@ -223,7 +155,7 @@ func UpdateNodeData() { Key: key, Mac: mac, Ip: ip, - Master: IsMaster(), + Master: model.IsMaster(), UpdateTs: time.Now(), UpdateTsUnix: time.Now().Unix(), } @@ -297,13 +229,13 @@ func InitNodeService() error { UpdateNodeData() // 获取当前节点 - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { log.Errorf(err.Error()) return err } ctx := context.Background() - if IsMaster() { + if model.IsMaster() { // 如果为主节点,订阅主节点通信频道 channel := "nodes:master" err := database.RedisClient.Subscribe(ctx, MasterNodeCallback, channel) @@ -320,7 +252,7 @@ func InitNodeService() error { } // 如果为主节点,每30秒刷新所有节点信息 - if IsMaster() { + if model.IsMaster() { spec := "*/10 * * * * *" if _, err := c.AddFunc(spec, UpdateNodeStatus); err != nil { debug.PrintStack() diff --git a/backend/services/spider.go b/backend/services/spider.go index f166b1b7..34693bf5 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -5,14 +5,13 @@ import ( "crawlab/database" "crawlab/lib/cron" "crawlab/model" + "crawlab/services/spider_handler" "crawlab/utils" "fmt" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" - "github.com/satori/go.uuid" "github.com/spf13/viper" - "io" "os" "path/filepath" "runtime/debug" @@ -102,6 +101,7 @@ func PublishAllSpiders() { if len(spiders) == 0 { return } + log.Infof("start sync spider to local, total: %d", len(spiders)) // 遍历爬虫列表 for _, spider := range spiders { // 异步发布爬虫 @@ -113,104 +113,45 @@ func PublishAllSpiders() { // 发布爬虫 func PublishSpider(spider model.Spider) { - s, gf := database.GetGridFs("files") - defer s.Close() - + // 查询gf file,不存在则删除 gfFile := model.GetGridFs(spider.FileId) if gfFile == nil { _ = model.RemoveSpider(spider.FileId) return } + spiderSync := spider_handler.SpiderSync{} + defer spiderSync.CreateMd5File(gfFile.Md5, spider.Name) - // 爬虫文件没有变化 - if spider.Md5 == spider.OldMd5 { + //目录不存在,则直接下载 + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + if !utils.Exists(path) { + log.Infof("path not found: %s", path) + spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) return } - - //爬虫文件有变化,先删除本地文件 - _ = os.Remove(filepath.Join( - viper.GetString("spider.path"), - spider.Name, - )) - - // 重新下载爬虫文件 - node, _ := GetCurrentNode() - key := node.Id.Hex() + "#" + spider.Id.Hex() - if _, err := database.RedisClient.HGet("spider", key); err == nil { - log.Infof("downloading spider") + // md5文件不存在,则下载 + md5 := filepath.Join(path, spider_handler.Md5File) + if !utils.Exists(md5) { + log.Infof("md5.txt file not found: %s", md5) + spiderSync.RemoveSpiderFile(spider.Name) + spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) return } - _ = database.RedisClient.HSet("spider", key, key) - defer database.RedisClient.HDel("spider", key) - - f, err := gf.OpenId(spider.FileId) - defer f.Close() - if err != nil { - log.Errorf("open file id: " + spider.FileId.Hex() + ", spider id:" + spider.Id.Hex() + ", error: " + err.Error()) - debug.PrintStack() + // md5值不一样,则下载 + md5Str := utils.ReadFile(md5) + if spider.Md5 != md5Str { + log.Infof("md5 is different: %s:%s ", md5Str, md5) + spiderSync.RemoveSpiderFile(spider.Name) + spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) return } - - // 生成唯一ID - randomId := uuid.NewV4() - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, 0777); err != nil { - log.Errorf("mkdir other.tmppath error: %v", err.Error()) - return - } - } - // 创建临时文件 - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - defer tmpFile.Close() - - // 将该文件写入临时文件 - if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 解压缩临时文件到目标文件夹 - dstPath := filepath.Join( - viper.GetString("spider.path"), - ) - if err := utils.DeCompress(tmpFile, dstPath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 关闭临时文件 - if err := tmpFile.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 删除临时文件 - if err := os.Remove(tmpFilePath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 修改spider的MD5和上一次的MD一致 - spider.OldMd5 = spider.Md5 - _ = spider.Save() } // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { + if _, err := c.AddFunc("0/15 * * * * *", PublishAllSpiders); err != nil { return err } // 启动定时任务 diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go new file mode 100644 index 00000000..c78b3d5b --- /dev/null +++ b/backend/services/spider_handler/spider.go @@ -0,0 +1,127 @@ +package spider_handler + +import ( + "crawlab/database" + "crawlab/model" + "crawlab/utils" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "github.com/satori/go.uuid" + "github.com/spf13/viper" + "io" + "os" + "path/filepath" + "runtime/debug" +) + +const ( + Md5File = "md5.txt" +) + +type SpiderSync struct { +} + +func (s *SpiderSync) CreateMd5File(md5 string, spiderName string) { + path := filepath.Join(viper.GetString("spider.path"), spiderName) + utils.CreateFilePath(path) + + fileName := filepath.Join(path, Md5File) + file := utils.OpenFile(fileName) + defer file.Close() + if file != nil { + if _, err := file.WriteString(md5); err != nil { + log.Errorf("file write string error: %s", err.Error()) + debug.PrintStack() + } + } +} + +// 获得下载锁的key +func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { + node, _ := model.GetCurrentNode() + return node.Id.Hex() + "#" + spiderId +} + +// 删除本地文件 +func (s *SpiderSync) RemoveSpiderFile(spiderName string) { + //爬虫文件有变化,先删除本地文件 + _ = os.Remove(filepath.Join( + viper.GetString("spider.path"), + spiderName, + )) +} + +// 检测是否已经下载中 +func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, string) { + key := s.GetLockDownloadKey(spiderId) + if _, err := database.RedisClient.HGet("spider", key); err == nil { + log.Infof("downloading spider file, spider_id: %s, file_id:%s", spiderId, fileId) + return true, key + } + return false, key +} + +// 下载爬虫 +func (s *SpiderSync) Download(spiderId string, fileId string) { + + session, gf := database.GetGridFs("files") + defer session.Close() + + f, err := gf.OpenId(bson.ObjectIdHex(fileId)) + defer f.Close() + if err != nil { + log.Errorf("open file id: " + fileId + ", spider id:" + spiderId + ", error: " + err.Error()) + debug.PrintStack() + return + } + + // 生成唯一ID + randomId := uuid.NewV4() + tmpPath := viper.GetString("other.tmppath") + if !utils.Exists(tmpPath) { + if err := os.MkdirAll(tmpPath, 0777); err != nil { + log.Errorf("mkdir other.tmppath error: %v", err.Error()) + return + } + } + // 创建临时文件 + tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") + tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + defer tmpFile.Close() + + // 将该文件写入临时文件 + if _, err := io.Copy(tmpFile, f); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + // 解压缩临时文件到目标文件夹 + dstPath := filepath.Join( + viper.GetString("spider.path"), + ) + if err := utils.DeCompress(tmpFile, dstPath); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + // 关闭临时文件 + if err := tmpFile.Close(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + // 删除临时文件 + if err := os.Remove(tmpFilePath); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } +} diff --git a/backend/services/spider_handler/spider_test.go b/backend/services/spider_handler/spider_test.go new file mode 100644 index 00000000..5289e4d8 --- /dev/null +++ b/backend/services/spider_handler/spider_test.go @@ -0,0 +1,19 @@ +package spider_handler + +import ( + "crawlab/config" + "github.com/apex/log" + "testing" +) + +func init() { + if err := config.InitConfig("../../conf/config.yml"); err != nil { + log.Fatal("Init config failed") + } + log.Infof("初始化配置成功") +} + +func TestSpiderSync_CreateMd5File(t *testing.T) { + s := SpiderSync{} + s.CreateMd5File("asssss", "gongyu_abc") +} diff --git a/backend/services/task.go b/backend/services/task.go index 1c26b45a..2a68f10e 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -258,7 +258,7 @@ func ExecuteTask(id int) { tic := time.Now() // 获取当前节点 - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { log.Errorf(GetWorkerPrefix(id) + err.Error()) return @@ -464,7 +464,7 @@ func CancelTask(id string) (err error) { } // 获取当前节点(默认当前节点为主节点) - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { log.Errorf("get current node error: %s", err.Error()) debug.PrintStack() diff --git a/backend/utils/file.go b/backend/utils/file.go index dda73c13..b5cf059e 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -2,6 +2,7 @@ package utils import ( "archive/zip" + "bufio" "github.com/apex/log" "io" "os" @@ -9,6 +10,40 @@ import ( "runtime/debug" ) +func ReadFile(fileName string) string { + file := OpenFile(fileName) + defer file.Close() + buf := bufio.NewReader(file) + line, err := buf.ReadString('\n') + if err != nil { + log.Errorf("read file error: %s", err.Error()) + return "" + } + return line + +} + +// 创建文件 +func OpenFile(fileName string) *os.File { + file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, os.ModePerm) + if err != nil { + log.Errorf("create file error: %s, file_name: %s", err.Error(), fileName) + debug.PrintStack() + return nil + } + return file +} + +// 创建文件夹 +func CreateFilePath(filePath string) { + if !Exists(filePath) { + if err := os.MkdirAll(filePath, os.ModePerm); err != nil { + log.Errorf("create file error: %s, file_path: %s", err.Error(), filePath) + debug.PrintStack() + } + } +} + // 判断所给路径文件/文件夹是否存在 func Exists(path string) bool { _, err := os.Stat(path) //os.Stat获取文件信息 From 3845e5761298da97cb19ef10435c60856986d678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 19:44:12 +0800 Subject: [PATCH 13/81] =?UTF-8?q?fix=20=E4=B8=8A=E4=BC=A0=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/spider.go | 2 -- backend/routes/spider.go | 7 +---- backend/services/spider.go | 38 +++++++++++++---------- backend/services/spider_handler/spider.go | 35 +++++++++++++-------- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/backend/model/spider.go b/backend/model/spider.go index c498287d..dd7d505c 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -24,8 +24,6 @@ type Spider struct { Site string `json:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 Remark string `json:"remark"` // 备注 - Md5 string `json:"md_5" bson:"md5"` // ZIP文件的MD5 - OldMd5 string `json:"old_md_5" bson:"old_md5"` //上一次的MD5值 // 自定义爬虫 Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 9b8bd50d..76f89bf7 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -148,7 +148,7 @@ func PutSpider(c *gin.Context) { } // 上传到GridFs - fid, md5, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) + fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) if err != nil { log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() @@ -166,13 +166,8 @@ func PutSpider(c *gin.Context) { Type: constants.Customized, Src: filepath.Join(srcPath, spiderName), FileId: fid, - Md5: md5, } _ = spider.Add() - } else { - spider.OldMd5 = spider.Md5 - spider.Md5 = md5 - _ = spider.Save() } c.JSON(http.StatusOK, Response{ diff --git a/backend/services/spider.go b/backend/services/spider.go index 34693bf5..90925346 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -29,7 +29,7 @@ type SpiderUploadMessage struct { } // 上传zip文件到GridFS -func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 string, err error) { +func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" // 获取MongoDB GridFS连接 @@ -47,7 +47,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st err = ReadFileByStep(filePath, WriteToGridFS, f) if err != nil { debug.PrintStack() - return "", "", err + return "", err } // 删除zip文件 @@ -57,12 +57,12 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, md5 st } // 关闭文件,提交写入 if err = f.Close(); err != nil { - return "", "", err + return "", err } // 文件ID fid = f.Id().(bson.ObjectId) - return fid, f.MD5(), nil + return fid, nil } func WriteToGridFS(content []byte, f *mgo.GridFile) { @@ -105,9 +105,9 @@ func PublishAllSpiders() { // 遍历爬虫列表 for _, spider := range spiders { // 异步发布爬虫 - go func() { - PublishSpider(spider) - }() + go func(s model.Spider) { + PublishSpider(s) + }(spider) } } @@ -119,30 +119,34 @@ func PublishSpider(spider model.Spider) { _ = model.RemoveSpider(spider.FileId) return } - spiderSync := spider_handler.SpiderSync{} - defer spiderSync.CreateMd5File(gfFile.Md5, spider.Name) + spiderSync := spider_handler.SpiderSync{ + Spider: spider, + } //目录不存在,则直接下载 path := filepath.Join(viper.GetString("spider.path"), spider.Name) if !utils.Exists(path) { log.Infof("path not found: %s", path) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } // md5文件不存在,则下载 md5 := filepath.Join(path, spider_handler.Md5File) if !utils.Exists(md5) { - log.Infof("md5.txt file not found: %s", md5) - spiderSync.RemoveSpiderFile(spider.Name) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + log.Infof("md5 file not found: %s", md5) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } // md5值不一样,则下载 md5Str := utils.ReadFile(md5) - if spider.Md5 != md5Str { - log.Infof("md5 is different: %s:%s ", md5Str, md5) - spiderSync.RemoveSpiderFile(spider.Name) - spiderSync.Download(spider.Id.Hex(), spider.FileId.Hex()) + if gfFile.Md5 != md5Str { + log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) return } } diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index c78b3d5b..87dedeb0 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -19,17 +19,18 @@ const ( ) type SpiderSync struct { + Spider model.Spider } -func (s *SpiderSync) CreateMd5File(md5 string, spiderName string) { - path := filepath.Join(viper.GetString("spider.path"), spiderName) +func (s *SpiderSync) CreateMd5File(md5 string) { + path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name) utils.CreateFilePath(path) fileName := filepath.Join(path, Md5File) file := utils.OpenFile(fileName) defer file.Close() if file != nil { - if _, err := file.WriteString(md5); err != nil { + if _, err := file.WriteString(md5 + "\n"); err != nil { log.Errorf("file write string error: %s", err.Error()) debug.PrintStack() } @@ -43,11 +44,11 @@ func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { } // 删除本地文件 -func (s *SpiderSync) RemoveSpiderFile(spiderName string) { +func (s *SpiderSync) RemoveSpiderFile() { //爬虫文件有变化,先删除本地文件 _ = os.Remove(filepath.Join( viper.GetString("spider.path"), - spiderName, + s.Spider.Name, )) } @@ -62,7 +63,16 @@ func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, str } // 下载爬虫 -func (s *SpiderSync) Download(spiderId string, fileId string) { +func (s *SpiderSync) Download() { + spiderId := s.Spider.Id.Hex() + fileId := s.Spider.FileId.Hex() + isDownloading, key := s.CheckDownLoading(spiderId, fileId) + if isDownloading { + log.Infof("spider is downloading, spider_id: %s", spiderId) + return + } else { + _ = database.RedisClient.HSet("spider", key, key) + } session, gf := database.GetGridFs("files") defer session.Close() @@ -85,18 +95,14 @@ func (s *SpiderSync) Download(spiderId string, fileId string) { } } // 创建临时文件 + tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } + tmpFile := utils.OpenFile(tmpFilePath) defer tmpFile.Close() // 将该文件写入临时文件 if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf(err.Error()) + log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id()) debug.PrintStack() return } @@ -124,4 +130,7 @@ func (s *SpiderSync) Download(spiderId string, fileId string) { debug.PrintStack() return } + + log.Infof("del key : %s", key) + _ = database.RedisClient.HDel("spider", key) } From bdeaa587f9d89c8126ec32666879ff621425edca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 20:52:45 +0800 Subject: [PATCH 14/81] =?UTF-8?q?=E7=88=AC=E8=99=AB=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/package.json | 2 +- frontend/src/store/modules/spider.js | 14 ++-- frontend/src/views/spider/SpiderList.vue | 96 ++++++++---------------- frontend/src/views/task/TaskList.vue | 44 +++++------ 4 files changed, 64 insertions(+), 92 deletions(-) diff --git a/frontend/package.json b/frontend/package.json index e3bc84f8..20e40c7c 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -3,7 +3,7 @@ "version": "0.2.3", "private": true, "scripts": { - "serve": "vue-cli-service serve --ip=0.0.0.0", + "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", "serve:prod": "vue-cli-service serve --mode=production --ip=0.0.0.0", "config": "vue ui", "build:dev": "vue-cli-service build --mode development", diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index b7bccd0d..07a0bac3 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -4,6 +4,8 @@ const state = { // list of spiders spiderList: [], + spiderTotal: 0, + // active spider data spiderForm: {}, @@ -38,6 +40,9 @@ const state = { const getters = {} const mutations = { + SET_SPIDER_TOTAL (state, value) { + state.spiderTotal = value + }, SET_SPIDER_FORM (state, value) { state.spiderForm = value }, @@ -71,14 +76,11 @@ const mutations = { } const actions = { - getSpiderList ({ state, commit }) { - let params = {} - if (state.filterSite) { - params.site = state.filterSite - } + getSpiderList ({ state, commit }, params = {}) { return request.get('/spiders', params) .then(response => { - commit('SET_SPIDER_LIST', response.data.data) + commit('SET_SPIDER_LIST', response.data.data.list) + commit('SET_SPIDER_TOTAL', response.data.data.total) }) }, editSpider ({ state, dispatch }) { diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 6ff5cb35..63e0a817 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -108,19 +108,10 @@
- - - - - -
- - + + +
@@ -143,7 +134,7 @@ - @@ -258,7 +249,7 @@ export default { data () { return { pagination: { - pageNum: 0, + pageNum: 1, pageSize: 10 }, importLoading: false, @@ -284,8 +275,6 @@ export default { { name: 'create_ts', label: 'Create Time', width: '140' }, { name: 'update_ts', label: 'Update Time', width: '140' }, { name: 'remark', label: 'Remark', width: '140' } - // { name: 'last_7d_tasks', label: 'Last 7-Day Tasks', width: '80' }, - // { name: 'last_5_errors', label: 'Last 5-Run Errors', width: '80' } ], spiderFormRules: { name: [{ required: true, message: 'Required Field', trigger: 'change' }] @@ -297,45 +286,24 @@ export default { ...mapState('spider', [ 'importForm', 'spiderList', - 'spiderForm' + 'spiderForm', + 'spiderTotal' ]), ...mapGetters('user', [ 'token' - ]), - filteredTableData () { - return this.spiderList - .filter(d => { - if (this.filterSite) { - return d.site === this.filterSite - } - return true - }) - .filter((d, index) => { - return (this.pagination.pageSize * (this.pagination.pageNum - 1)) <= index && (index < this.pagination.pageSize * this.pagination.pageNum) - }) - // .filter(d => { - // if (!this.filter.keyword) return true - // for (let i = 0; i < this.columns.length; i++) { - // const colName = this.columns[i].name - // if (d[colName] && d[colName].toLowerCase().indexOf(this.filter.keyword.toLowerCase()) > -1) { - // return true - // } - // } - // return false - // }) - }, - filterSite: { - get () { - return this.$store.state.spider.filterSite - }, - set (value) { - this.$store.commit('spider/SET_FILTER_SITE', value) - } - } + ]) }, methods: { - onSearch (value) { - console.log(value) + onPageSizeChange (val) { + this.pagination.pageSize = val + this.getList() + }, + onPageNumChange (val) { + this.pagination.pageNum = val + this.getList() + }, + onSearch () { + this.getList() }, onAdd () { // this.addDialogVisible = true @@ -353,7 +321,7 @@ export default { this.$st.sendEv('爬虫', '添加爬虫-自定义爬虫') }, onRefresh () { - this.$store.dispatch('spider/getSpiderList') + this.getList() this.$st.sendEv('爬虫', '刷新') }, onSubmit () { @@ -376,9 +344,6 @@ export default { this.$store.commit('spider/SET_SPIDER_FORM', {}) this.dialogVisible = false }, - onAddCancel () { - this.addDialogVisible = false - }, onDialogClose () { this.$store.commit('spider/SET_SPIDER_FORM', {}) this.dialogVisible = false @@ -422,9 +387,6 @@ export default { this.$router.push('/spiders/' + row._id) this.$st.sendEv('爬虫', '查看') }, - onPageChange () { - this.$store.dispatch('spider/getSpiderList') - }, onImport () { this.$refs.importForm.validate(valid => { if (valid) { @@ -433,7 +395,7 @@ export default { this.$store.dispatch('spider/importGithub') .then(response => { this.$message.success('Import repo successfully') - this.$store.dispatch('spider/getSpiderList') + this.getList() }) .catch(response => { this.$message.error(response.data.error) @@ -501,7 +463,7 @@ export default { // fetch spider list setTimeout(() => { - this.$store.dispatch('spider/getSpiderList') + this.getList() }, 500) // close popup @@ -515,6 +477,14 @@ export default { if (column.label !== this.$t('Action')) { this.onView(row) } + }, + getList () { + let params = { + pageNum: this.pagination.pageNum, + pageSize: this.pagination.pageSize, + keyword: this.filter.keyword + } + this.$store.dispatch('spider/getSpiderList', params) } }, created () { @@ -522,7 +492,7 @@ export default { this.$store.commit('spider/SET_FILTER_SITE', this.$route.params.domain) // fetch spider list - this.$store.dispatch('spider/getSpiderList') + this.getList() }, mounted () { } diff --git a/frontend/src/views/task/TaskList.vue b/frontend/src/views/task/TaskList.vue index 5ad1b14f..9db3623d 100644 --- a/frontend/src/views/task/TaskList.vue +++ b/frontend/src/views/task/TaskList.vue @@ -4,28 +4,28 @@
- - - - - - - - {{$t('Search')}} - + + + + + + + + + + + + + + + + + + + + + +
From 0ddb29488503b60695279a2e20d3cecf408ed527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 20:53:05 +0800 Subject: [PATCH 15/81] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/spider.go | 8 +++++--- backend/routes/spider.go | 11 +++++++++-- backend/services/spider.go | 5 ++--- backend/services/spider_handler/spider.go | 3 --- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/backend/model/spider.go b/backend/model/spider.go index dd7d505c..88fdc3ca 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -93,7 +93,7 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } -func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { +func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -101,7 +101,7 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { spiders := []Spider{} if err := c.Find(filter).Skip(skip).Limit(limit).Sort("+name").All(&spiders); err != nil { debug.PrintStack() - return spiders, err + return spiders, 0, err } // 遍历爬虫列表 @@ -119,7 +119,9 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { spiders[i].LastStatus = task.Status } - return spiders, nil + count, _ := c.Find(filter).Count() + + return spiders, count, nil } func GetSpiderByName(name string) *Spider { diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 76f89bf7..0e68bc54 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -24,7 +24,14 @@ import ( ) func GetSpiderList(c *gin.Context) { - results, err := model.GetSpiderList(nil, 0, 0) + pageNumStr, _ := c.GetQuery("pageNum") + pageSizeStr, _ := c.GetQuery("pageSize") + keyword, _ := c.GetQuery("keyword") + pageNum, _ := strconv.Atoi(pageNumStr) + pageSize, _ := strconv.Atoi(pageSizeStr) + skip := pageSize * (pageNum - 1) + filter := bson.M{"name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}} + results, count, err := model.GetSpiderList(filter, skip, pageSize) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -32,7 +39,7 @@ func GetSpiderList(c *gin.Context) { c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: results, + Data: bson.M{"list": results, "total": count}, }) } diff --git a/backend/services/spider.go b/backend/services/spider.go index 90925346..3e87eef3 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -97,7 +97,7 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre // 发布所有爬虫 func PublishAllSpiders() { // 获取爬虫列表 - spiders, _ := model.GetSpiderList(nil, 0, constants.Infinite) + spiders, _, _ := model.GetSpiderList(nil, 0, constants.Infinite) if len(spiders) == 0 { return } @@ -143,7 +143,6 @@ func PublishSpider(spider model.Spider) { // md5值不一样,则下载 md5Str := utils.ReadFile(md5) if gfFile.Md5 != md5Str { - log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5) spiderSync.RemoveSpiderFile() spiderSync.Download() spiderSync.CreateMd5File(gfFile.Md5) @@ -155,7 +154,7 @@ func PublishSpider(spider model.Spider) { func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc("0/15 * * * * *", PublishAllSpiders); err != nil { + if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { return err } // 启动定时任务 diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index 87dedeb0..1d8a8ae1 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -56,7 +56,6 @@ func (s *SpiderSync) RemoveSpiderFile() { func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, string) { key := s.GetLockDownloadKey(spiderId) if _, err := database.RedisClient.HGet("spider", key); err == nil { - log.Infof("downloading spider file, spider_id: %s, file_id:%s", spiderId, fileId) return true, key } return false, key @@ -68,7 +67,6 @@ func (s *SpiderSync) Download() { fileId := s.Spider.FileId.Hex() isDownloading, key := s.CheckDownLoading(spiderId, fileId) if isDownloading { - log.Infof("spider is downloading, spider_id: %s", spiderId) return } else { _ = database.RedisClient.HSet("spider", key, key) @@ -131,6 +129,5 @@ func (s *SpiderSync) Download() { return } - log.Infof("del key : %s", key) _ = database.RedisClient.HDel("spider", key) } From 141ec6a000be63f4868c58f2e880a336885dd651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 20:54:50 +0800 Subject: [PATCH 16/81] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/utils/file.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/utils/file.go b/backend/utils/file.go index b5cf059e..1888ed46 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -25,7 +25,7 @@ func ReadFile(fileName string) string { // 创建文件 func OpenFile(fileName string) *os.File { - file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, os.ModePerm) + file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY|os.O_APPEND, os.ModePerm) if err != nil { log.Errorf("create file error: %s, file_name: %s", err.Error(), fileName) debug.PrintStack() From 698d240bd63427319ed6b3cda2ec57d3a09fe699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 26 Sep 2019 21:13:25 +0800 Subject: [PATCH 17/81] fix bug --- backend/services/spider.go | 2 +- backend/services/spider_handler/spider.go | 1 - backend/utils/file.go | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/services/spider.go b/backend/services/spider.go index 3e87eef3..ea3d374f 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -116,7 +116,7 @@ func PublishSpider(spider model.Spider) { // 查询gf file,不存在则删除 gfFile := model.GetGridFs(spider.FileId) if gfFile == nil { - _ = model.RemoveSpider(spider.FileId) + _ = model.RemoveSpider(spider.Id) return } spiderSync := spider_handler.SpiderSync{ diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index 1d8a8ae1..d5e377df 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -93,7 +93,6 @@ func (s *SpiderSync) Download() { } } // 创建临时文件 - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") tmpFile := utils.OpenFile(tmpFilePath) defer tmpFile.Close() diff --git a/backend/utils/file.go b/backend/utils/file.go index 1888ed46..282bee14 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -25,7 +25,7 @@ func ReadFile(fileName string) string { // 创建文件 func OpenFile(fileName string) *os.File { - file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY|os.O_APPEND, os.ModePerm) + file, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, os.ModePerm) if err != nil { log.Errorf("create file error: %s, file_name: %s", err.Error(), fileName) debug.PrintStack() From e728be6328abdaac2b4d00d52c1efe6a5e4c623b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 10:14:48 +0800 Subject: [PATCH 18/81] fix --- backend/model/spider.go | 10 ++--- backend/services/spider_handler/spider.go | 1 + .../services/spider_handler/spider_test.go | 38 ++++++++++++++++++- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/backend/model/spider.go b/backend/model/spider.go index 88fdc3ca..5d8fa482 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -18,12 +18,12 @@ type Spider struct { Id bson.ObjectId `json:"_id" bson:"_id"` // 爬虫ID Name string `json:"name" bson:"name"` // 爬虫名称(唯一) DisplayName string `json:"display_name" bson:"display_name"` // 爬虫显示名称 - Type string `json:"type"` // 爬虫类别 + Type string `json:"type" bson:"type"` // 爬虫类别 FileId bson.ObjectId `json:"file_id" bson:"file_id"` // GridFS文件ID - Col string `json:"col"` // 结果储存位置 - Site string `json:"site"` // 爬虫网站 + Col string `json:"col" bson:"col"` // 结果储存位置 + Site string `json:"site" bson:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 - Remark string `json:"remark"` // 备注 + Remark string `json:"remark" bson:"remark"` // 备注 // 自定义爬虫 Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 @@ -130,7 +130,7 @@ func GetSpiderByName(name string) *Spider { var result *Spider if err := c.Find(bson.M{"name": name}).One(result); err != nil { - log.Errorf("get spider error: %s", err.Error()) + log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) debug.PrintStack() return result } diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index d5e377df..ac857e0a 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -107,6 +107,7 @@ func (s *SpiderSync) Download() { // 解压缩临时文件到目标文件夹 dstPath := filepath.Join( viper.GetString("spider.path"), + s.Spider.Name, ) if err := utils.DeCompress(tmpFile, dstPath); err != nil { log.Errorf(err.Error()) diff --git a/backend/services/spider_handler/spider_test.go b/backend/services/spider_handler/spider_test.go index 5289e4d8..f7bf97bf 100644 --- a/backend/services/spider_handler/spider_test.go +++ b/backend/services/spider_handler/spider_test.go @@ -2,18 +2,52 @@ package spider_handler import ( "crawlab/config" + "crawlab/database" + "crawlab/model" "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "runtime/debug" "testing" ) +var s SpiderSync + func init() { if err := config.InitConfig("../../conf/config.yml"); err != nil { log.Fatal("Init config failed") } log.Infof("初始化配置成功") + + // 初始化Mongodb数据库 + if err := database.InitMongo(); err != nil { + log.Error("init mongodb error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("初始化Mongodb数据库成功") + + // 初始化Redis数据库 + if err := database.InitRedis(); err != nil { + log.Error("init redis error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("初始化Redis数据库成功") + + s = SpiderSync{ + Spider: model.Spider{ + Id: bson.ObjectIdHex("5d8d5e4b44500b000150009c"), + Name: "scrapy-pre_sale", + FileId: bson.ObjectIdHex("5d8d5e4b44500b0001500098"), + Src: "/opt/crawlab/spiders/scrapy-pre_sale", + }, + } } func TestSpiderSync_CreateMd5File(t *testing.T) { - s := SpiderSync{} - s.CreateMd5File("asssss", "gongyu_abc") + s.CreateMd5File("this is md5") +} + +func TestSpiderSync_Download(t *testing.T) { + s.Download() } From 0847c6a991e8608fb6462fb5c25df5729865be62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 10:28:46 +0800 Subject: [PATCH 19/81] fix --- backend/services/spider_handler/spider.go | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index ac857e0a..d5e377df 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -107,7 +107,6 @@ func (s *SpiderSync) Download() { // 解压缩临时文件到目标文件夹 dstPath := filepath.Join( viper.GetString("spider.path"), - s.Spider.Name, ) if err := utils.DeCompress(tmpFile, dstPath); err != nil { log.Errorf(err.Error()) From f5371f94a4121be2d69dcc833f02699374da2839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 11:30:38 +0800 Subject: [PATCH 20/81] =?UTF-8?q?fix=20=E4=B8=8A=E4=BC=A0=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E5=90=8D=E7=A7=B0=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/file.go | 16 ++++++++++++++++ backend/model/spider.go | 4 ++++ backend/routes/spider.go | 6 +++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/backend/model/file.go b/backend/model/file.go index f0968086..ca2c8ead 100644 --- a/backend/model/file.go +++ b/backend/model/file.go @@ -6,6 +6,7 @@ import ( "github.com/apex/log" "github.com/globalsign/mgo/bson" "os" + "runtime/debug" "time" ) @@ -25,6 +26,19 @@ type File struct { Size int64 `json:"size"` } +func GetAllGridFs() []*GridFs { + s, gf := database.GetGridFs("files") + defer s.Close() + + var files []*GridFs + if err := gf.Find(nil).All(&files); err != nil { + log.Errorf("get all files error: {}", err.Error()) + debug.PrintStack() + return nil + } + return files +} + func GetGridFs(id bson.ObjectId) *GridFs { s, gf := database.GetGridFs("files") defer s.Close() @@ -33,6 +47,7 @@ func GetGridFs(id bson.ObjectId) *GridFs { err := gf.Find(bson.M{"_id": id}).One(&gfFile) if err != nil { log.Errorf("get gf file error: %s, file_id: %s", err.Error(), id.Hex()) + debug.PrintStack() return nil } return &gfFile @@ -41,6 +56,7 @@ func GetGridFs(id bson.ObjectId) *GridFs { func RemoveFile(path string) error { if !utils.Exists(path) { log.Info("file not found: " + path) + debug.PrintStack() return nil } if err := os.Remove(path); err != nil { diff --git a/backend/model/spider.go b/backend/model/spider.go index 5d8fa482..c4e94349 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -124,6 +124,10 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro return spiders, count, nil } +func GetSpiderByFileId(fileId bson.ObjectId) { + +} + func GetSpiderByName(name string) *Spider { s, c := database.GetCol("spiders") defer s.Close() diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 0e68bc54..9357ca5e 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -161,8 +161,12 @@ func PutSpider(c *gin.Context) { debug.PrintStack() return } + + idx := strings.LastIndex(uploadFile.Filename, "/") + targetFilename := uploadFile.Filename[idx+1:] + // 判断爬虫是否存在 - spiderName := strings.Replace(uploadFile.Filename, ".zip", "", -1) + spiderName := strings.Replace(targetFilename, ".zip", "", 1) spider := model.GetSpiderByName(spiderName) if spider == nil { // 保存爬虫信息 From bc6dc220148b9f5e9d8d43a7f6c17a43847255d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 11:49:57 +0800 Subject: [PATCH 21/81] =?UTF-8?q?fix=20=E4=B8=8A=E4=BC=A0=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/spider_handler/spider.go | 1 + backend/services/spider_handler/spider_test.go | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index d5e377df..ac857e0a 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -107,6 +107,7 @@ func (s *SpiderSync) Download() { // 解压缩临时文件到目标文件夹 dstPath := filepath.Join( viper.GetString("spider.path"), + s.Spider.Name, ) if err := utils.DeCompress(tmpFile, dstPath); err != nil { log.Errorf(err.Error()) diff --git a/backend/services/spider_handler/spider_test.go b/backend/services/spider_handler/spider_test.go index f7bf97bf..66d47455 100644 --- a/backend/services/spider_handler/spider_test.go +++ b/backend/services/spider_handler/spider_test.go @@ -36,9 +36,9 @@ func init() { s = SpiderSync{ Spider: model.Spider{ - Id: bson.ObjectIdHex("5d8d5e4b44500b000150009c"), + Id: bson.ObjectIdHex("5d8d8326bc3c4f000186e5df"), Name: "scrapy-pre_sale", - FileId: bson.ObjectIdHex("5d8d5e4b44500b0001500098"), + FileId: bson.ObjectIdHex("5d8d8326bc3c4f000186e5db"), Src: "/opt/crawlab/spiders/scrapy-pre_sale", }, } From c69d5ee390ec0ff7889dc893f253c1d34bcfa761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 12:00:00 +0800 Subject: [PATCH 22/81] =?UTF-8?q?fix=20=E9=87=8D=E5=A4=8D=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E7=88=AC=E8=99=AB=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/spider.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/model/spider.go b/backend/model/spider.go index c4e94349..13ec12e4 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -133,10 +133,10 @@ func GetSpiderByName(name string) *Spider { defer s.Close() var result *Spider - if err := c.Find(bson.M{"name": name}).One(result); err != nil { + if err := c.Find(bson.M{"name": name}).One(&result); err != nil { log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) debug.PrintStack() - return result + return nil } return result } From 72b7ef42e881a125e2294004a46b03a361eca79f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 12:07:38 +0800 Subject: [PATCH 23/81] =?UTF-8?q?fix=20=E9=87=8D=E5=A4=8D=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E7=88=AC=E8=99=AB=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/routes/spider.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 9357ca5e..064541bd 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -179,6 +179,10 @@ func PutSpider(c *gin.Context) { FileId: fid, } _ = spider.Add() + } else { + // 更新file_id + spider.FileId = fid + _ = spider.Save() } c.JSON(http.StatusOK, Response{ From 66bef2e6f25ef4b9d0b414e928bd563c34e34e1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 12:31:47 +0800 Subject: [PATCH 24/81] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=87=A0=E4=B8=AA?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/file.go | 9 +++++++++ backend/model/spider.go | 11 ++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/backend/model/file.go b/backend/model/file.go index ca2c8ead..7aa88e3d 100644 --- a/backend/model/file.go +++ b/backend/model/file.go @@ -26,6 +26,15 @@ type File struct { Size int64 `json:"size"` } +func (f *GridFs) Remove() { + s, gf := database.GetGridFs("files") + defer s.Close() + if err := gf.RemoveId(f.Id); err != nil { + log.Errorf("remove file id error: %s, id: %s", err.Error(), f.Id.Hex()) + debug.PrintStack() + } +} + func GetAllGridFs() []*GridFs { s, gf := database.GetGridFs("files") defer s.Close() diff --git a/backend/model/spider.go b/backend/model/spider.go index 13ec12e4..e63c5f57 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -124,8 +124,17 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro return spiders, count, nil } -func GetSpiderByFileId(fileId bson.ObjectId) { +func GetSpiderByFileId(fileId bson.ObjectId) *Spider { + s, c := database.GetCol("spiders") + defer s.Close() + var result *Spider + if err := c.Find(bson.M{"file_id": fileId}).One(&result); err != nil { + log.Errorf("get spider error: %s, file_id: %s", err.Error(), fileId.Hex()) + debug.PrintStack() + return nil + } + return result } func GetSpiderByName(name string) *Spider { From bbc5401e317a863afda6cc9552a53ce694fb7914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 27 Sep 2019 14:16:41 +0800 Subject: [PATCH 25/81] =?UTF-8?q?=E6=89=93=E5=8D=B0=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E7=9A=84=E6=97=A5=E5=BF=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/schedule.go | 3 ++- backend/services/schedule.go | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 1c8db0bd..6415e22b 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -65,7 +65,8 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { // 获取爬虫名称 spider, err := GetSpider(schedule.SpiderId) if err != nil { - log.Errorf(err.Error()) + log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error()) + debug.PrintStack() continue } schedules[i].SpiderName = spider.Name diff --git a/backend/services/schedule.go b/backend/services/schedule.go index 1c08e0fd..c35c6a58 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -62,12 +62,16 @@ func (s *Scheduler) Start() error { // 更新任务列表 if err := s.Update(); err != nil { + log.Errorf("update scheduler error: %s", err.Error()) + debug.PrintStack() return err } // 每30秒更新一次任务列表 spec := "*/30 * * * * *" if _, err := exec.AddFunc(spec, UpdateSchedules); err != nil { + log.Errorf("add func update schedulers error: %s", err.Error()) + debug.PrintStack() return err } @@ -80,12 +84,16 @@ func (s *Scheduler) AddJob(job model.Schedule) error { // 添加任务 eid, err := s.cron.AddFunc(spec, AddTask(job)) if err != nil { + log.Errorf("add func task error: %s", err.Error()) + debug.PrintStack() return err } // 更新EntryID job.EntryId = eid if err := job.Save(); err != nil { + log.Errorf("job save error: %s", err.Error()) + debug.PrintStack() return err } @@ -106,6 +114,8 @@ func (s *Scheduler) Update() error { // 获取所有定时任务 sList, err := model.GetScheduleList(nil) if err != nil { + log.Errorf("get scheduler list error: %s", err.Error()) + debug.PrintStack() return err } @@ -116,6 +126,8 @@ func (s *Scheduler) Update() error { // 添加到定时任务 if err := s.AddJob(job); err != nil { + log.Errorf("add job error: %s", err.Error()) + debug.PrintStack() return err } } @@ -128,6 +140,8 @@ func InitScheduler() error { cron: cron.New(cron.WithSeconds()), } if err := Sched.Start(); err != nil { + log.Errorf("start scheduler error: %s", err.Error()) + debug.PrintStack() return err } return nil From d93bff63e781a7f7be62174cd479db65d6752fc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 09:23:42 +0800 Subject: [PATCH 26/81] =?UTF-8?q?fix=20=E6=97=A0=E6=B3=95=E5=88=A0?= =?UTF-8?q?=E9=99=A4=E7=88=AC=E8=99=AB=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/schedule.go | 2 +- backend/services/spider_handler/spider.go | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/services/schedule.go b/backend/services/schedule.go index c35c6a58..58cdf628 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -126,7 +126,7 @@ func (s *Scheduler) Update() error { // 添加到定时任务 if err := s.AddJob(job); err != nil { - log.Errorf("add job error: %s", err.Error()) + log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), job.Name, job.Cron) debug.PrintStack() return err } diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index ac857e0a..53c83b9a 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -45,11 +45,15 @@ func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { // 删除本地文件 func (s *SpiderSync) RemoveSpiderFile() { - //爬虫文件有变化,先删除本地文件 - _ = os.Remove(filepath.Join( + path := filepath.Join( viper.GetString("spider.path"), s.Spider.Name, - )) + ) + //爬虫文件有变化,先删除本地文件 + if err := os.RemoveAll(path); err != nil { + log.Errorf("remove spider files error: %s, path: %s", err.Error(), path) + debug.PrintStack() + } } // 检测是否已经下载中 From 41556cab74c4decabf6c798c7bfb81749db77c4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 12:09:37 +0800 Subject: [PATCH 27/81] =?UTF-8?q?fix=20=E5=88=A0=E9=99=A4=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/constants/channels.go | 9 ++++ backend/constants/message.go | 1 + backend/entity/node.go | 25 ++++++++++ backend/entity/system.go | 15 ++++++ backend/model/file.go | 2 +- backend/model/spider.go | 4 ++ backend/model/system.go | 23 ++------- backend/routes/spider.go | 28 +---------- backend/services/log.go | 26 +++++----- backend/services/msg_handler/handler.go | 33 ++++-------- backend/services/msg_handler/msg_log.go | 20 ++------ backend/services/msg_handler/msg_spider.go | 24 +++++++++ .../services/msg_handler/msg_system_info.go | 18 ++----- backend/services/msg_handler/msg_task.go | 3 +- backend/services/node.go | 33 ++++++------ backend/services/spider.go | 43 +++++++++++++++- backend/services/system.go | 10 ++-- backend/services/task.go | 4 +- backend/utils/file.go | 11 +++- backend/utils/helpers.go | 50 ++++++++++++++++++- 20 files changed, 240 insertions(+), 142 deletions(-) create mode 100644 backend/constants/channels.go create mode 100644 backend/entity/node.go create mode 100644 backend/entity/system.go create mode 100644 backend/services/msg_handler/msg_spider.go diff --git a/backend/constants/channels.go b/backend/constants/channels.go new file mode 100644 index 00000000..c38a5ac9 --- /dev/null +++ b/backend/constants/channels.go @@ -0,0 +1,9 @@ +package constants + +const ( + ChannelAllNode = "nodes:public" + + ChannelWorkerNode = "nodes:" + + ChannelMasterNode = "nodes:master" +) diff --git a/backend/constants/message.go b/backend/constants/message.go index f76e8fc3..72e5fab2 100644 --- a/backend/constants/message.go +++ b/backend/constants/message.go @@ -5,4 +5,5 @@ const ( MsgTypeGetSystemInfo = "get-sys-info" MsgTypeCancelTask = "cancel-task" MsgTypeRemoveLog = "remove-log" + MsgTypeRemoveSpider = "remove-spider" ) diff --git a/backend/entity/node.go b/backend/entity/node.go new file mode 100644 index 00000000..cf52fafb --- /dev/null +++ b/backend/entity/node.go @@ -0,0 +1,25 @@ +package entity + +type NodeMessage struct { + // 通信类别 + Type string `json:"type"` + + // 任务相关 + TaskId string `json:"task_id"` // 任务ID + + // 节点相关 + NodeId string `json:"node_id"` // 节点ID + + // 日志相关 + LogPath string `json:"log_path"` // 日志路径 + Log string `json:"log"` // 日志 + + // 系统信息 + SysInfo SystemInfo `json:"sys_info"` + + // 爬虫相关 + SpiderId string `json:"spider_id"` //爬虫ID + + // 错误相关 + Error string `json:"error"` +} diff --git a/backend/entity/system.go b/backend/entity/system.go new file mode 100644 index 00000000..dff637b7 --- /dev/null +++ b/backend/entity/system.go @@ -0,0 +1,15 @@ +package entity + +type SystemInfo struct { + ARCH string `json:"arch"` + OS string `json:"os"` + Hostname string `json:"host_name"` + NumCpu int `json:"num_cpu"` + Executables []Executable `json:"executables"` +} + +type Executable struct { + Path string `json:"path"` + FileName string `json:"file_name"` + DisplayName string `json:"display_name"` +} diff --git a/backend/model/file.go b/backend/model/file.go index 7aa88e3d..fe3ece0e 100644 --- a/backend/model/file.go +++ b/backend/model/file.go @@ -68,7 +68,7 @@ func RemoveFile(path string) error { debug.PrintStack() return nil } - if err := os.Remove(path); err != nil { + if err := os.RemoveAll(path); err != nil { return err } return nil diff --git a/backend/model/spider.go b/backend/model/spider.go index e63c5f57..93eafbc5 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -157,6 +157,7 @@ func GetSpider(id bson.ObjectId) (Spider, error) { var result Spider if err := c.FindId(id).One(&result); err != nil { if err != mgo.ErrNotFound { + log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex()) debug.PrintStack() } return result, err @@ -190,6 +191,8 @@ func RemoveSpider(id bson.ObjectId) error { } if err := c.RemoveId(id); err != nil { + log.Errorf("remove spider error: %s, id:%s", err.Error(), id.Hex()) + debug.PrintStack() return err } @@ -199,6 +202,7 @@ func RemoveSpider(id bson.ObjectId) error { if err := gf.RemoveId(result.FileId); err != nil { log.Error("remove file error, id:" + result.FileId.Hex()) + debug.PrintStack() return err } diff --git a/backend/model/system.go b/backend/model/system.go index 6091c963..5c2f5997 100644 --- a/backend/model/system.go +++ b/backend/model/system.go @@ -1,6 +1,7 @@ package model import ( + "crawlab/entity" "github.com/apex/log" "io/ioutil" "os" @@ -35,21 +36,7 @@ var executableNameMap = map[string]string{ "bash": "bash", } -type SystemInfo struct { - ARCH string `json:"arch"` - OS string `json:"os"` - Hostname string `json:"host_name"` - NumCpu int `json:"num_cpu"` - Executables []Executable `json:"executables"` -} - -type Executable struct { - Path string `json:"path"` - FileName string `json:"file_name"` - DisplayName string `json:"display_name"` -} - -func GetLocalSystemInfo() (sysInfo SystemInfo, err error) { +func GetLocalSystemInfo() (sysInfo entity.SystemInfo, err error) { executables, err := GetExecutables() if err != nil { return sysInfo, err @@ -60,7 +47,7 @@ func GetLocalSystemInfo() (sysInfo SystemInfo, err error) { return sysInfo, err } - return SystemInfo{ + return entity.SystemInfo{ ARCH: runtime.GOARCH, OS: runtime.GOOS, NumCpu: runtime.GOMAXPROCS(0), @@ -78,7 +65,7 @@ func GetPathValues() (paths []string) { return strings.Split(pathEnv, ":") } -func GetExecutables() (executables []Executable, err error) { +func GetExecutables() (executables []entity.Executable, err error) { pathValues := GetPathValues() cache := map[string]string{} @@ -97,7 +84,7 @@ func GetExecutables() (executables []Executable, err error) { if cache[filePath] == "" { if displayName != "" { - executables = append(executables, Executable{ + executables = append(executables, entity.Executable{ Path: filePath, FileName: file.Name(), DisplayName: displayName, diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 064541bd..efd76c0b 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -199,33 +199,7 @@ func DeleteSpider(c *gin.Context) { return } - // 获取该爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除爬虫文件目录 - if err := os.RemoveAll(spider.Src); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 从数据库中删除该爬虫 - if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除日志文件 - if err := services.RemoveLogBySpiderId(spider.Id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除爬虫对应的task任务 - if err := model.RemoveTaskBySpiderId(spider.Id); err != nil { + if err := services.RemoveSpider(id); err != nil { HandleError(http.StatusInternalServerError, c, err) return } diff --git a/backend/services/log.go b/backend/services/log.go index 95459f8f..485cb7dd 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -3,9 +3,9 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" - "crawlab/services/msg_handler" "crawlab/utils" "encoding/json" "github.com/apex/log" @@ -23,7 +23,7 @@ var TaskLogChanMap = utils.NewChanMap() // 获取远端日志 func GetRemoteLog(task model.Task) (logStr string, err error) { // 序列化消息 - msg := msg_handler.NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeGetLog, LogPath: task.LogPath, TaskId: task.Id, @@ -85,21 +85,16 @@ func RemoveLocalLog(path string) error { // 删除远程日志 func RemoveRemoteLog(task model.Task) error { - msg := msg_handler.NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeRemoveLog, LogPath: task.LogPath, TaskId: task.Id, } - msgBytes, err := json.Marshal(&msg) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } // 发布获取日志消息 channel := "nodes:" + task.NodeId.Hex() - if _, err := database.RedisClient.Publish(channel, utils.BytesToString(msgBytes)); err != nil { - log.Errorf(err.Error()) + if _, err := database.RedisClient.Publish(channel, utils.GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() return err } return nil @@ -119,10 +114,12 @@ func RemoveLogByTaskId(id string) error { func removeLog(t model.Task) { if err := RemoveLocalLog(t.LogPath); err != nil { - log.Error("remove local log error:" + err.Error()) + log.Errorf("remove local log error: %s", err.Error()) + debug.PrintStack() } if err := RemoveRemoteLog(t); err != nil { - log.Error("remove remote log error:" + err.Error()) + log.Errorf("remove remote log error: %s", err.Error()) + debug.PrintStack() } } @@ -130,7 +127,8 @@ func removeLog(t model.Task) { func RemoveLogBySpiderId(id bson.ObjectId) error { tasks, err := model.GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") if err != nil { - log.Error("get tasks error:" + err.Error()) + log.Errorf("get tasks error: %s", err.Error()) + debug.PrintStack() } for _, task := range tasks { removeLog(task) diff --git a/backend/services/msg_handler/handler.go b/backend/services/msg_handler/handler.go index 61516bcf..848e0c5d 100644 --- a/backend/services/msg_handler/handler.go +++ b/backend/services/msg_handler/handler.go @@ -2,47 +2,34 @@ package msg_handler import ( "crawlab/constants" - "crawlab/model" + "crawlab/entity" ) type Handler interface { Handle() error } -func GetMsgHandler(msg NodeMessage) Handler { +func GetMsgHandler(msg entity.NodeMessage) Handler { if msg.Type == constants.MsgTypeGetLog || msg.Type == constants.MsgTypeRemoveLog { + // 日志相关 return &Log{ msg: msg, } } else if msg.Type == constants.MsgTypeCancelTask { + // 任务相关 return &Task{ msg: msg, } } else if msg.Type == constants.MsgTypeGetSystemInfo { + // 系统信息相关 return &SystemInfo{ msg: msg, } + } else if msg.Type == constants.MsgTypeRemoveSpider { + // 爬虫相关 + return &Spider{ + SpiderId: msg.SpiderId, + } } return nil } - -type NodeMessage struct { - // 通信类别 - Type string `json:"type"` - - // 任务相关 - TaskId string `json:"task_id"` // 任务ID - - // 节点相关 - NodeId string `json:"node_id"` // 节点ID - - // 日志相关 - LogPath string `json:"log_path"` // 日志路径 - Log string `json:"log"` // 日志 - - // 系统信息 - SysInfo model.SystemInfo `json:"sys_info"` - - // 错误相关 - Error string `json:"error"` -} diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go index 0d09d784..37080bd6 100644 --- a/backend/services/msg_handler/msg_log.go +++ b/backend/services/msg_handler/msg_log.go @@ -2,16 +2,15 @@ package msg_handler import ( "crawlab/constants" - "crawlab/database" + "crawlab/entity" "crawlab/model" "crawlab/utils" - "encoding/json" "github.com/apex/log" "runtime/debug" ) type Log struct { - msg NodeMessage + msg entity.NodeMessage } func (g *Log) Handle() error { @@ -25,31 +24,22 @@ func (g *Log) Handle() error { func (g *Log) get() error { // 发出的消息 - msgSd := NodeMessage{ + msgSd := entity.NodeMessage{ Type: constants.MsgTypeGetLog, TaskId: g.msg.TaskId, } // 获取本地日志 logStr, err := model.GetLocalLog(g.msg.LogPath) - log.Info(utils.BytesToString(logStr)) if err != nil { - log.Errorf(err.Error()) + log.Errorf("get node local log error: %s", err.Error()) debug.PrintStack() msgSd.Error = err.Error() msgSd.Log = err.Error() } else { msgSd.Log = utils.BytesToString(logStr) } - - // 序列化 - msgSdBytes, err := json.Marshal(&msgSd) - if err != nil { - return err - } - // 发布消息给主节点 - log.Info("publish get log msg to master") - if _, err := database.RedisClient.Publish("nodes:master", utils.BytesToString(msgSdBytes)); err != nil { + if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { return err } return nil diff --git a/backend/services/msg_handler/msg_spider.go b/backend/services/msg_handler/msg_spider.go new file mode 100644 index 00000000..dcd6ce06 --- /dev/null +++ b/backend/services/msg_handler/msg_spider.go @@ -0,0 +1,24 @@ +package msg_handler + +import ( + "crawlab/model" + "crawlab/utils" + "github.com/globalsign/mgo/bson" + "github.com/spf13/viper" + "path/filepath" +) + +type Spider struct { + SpiderId string +} + +func (s *Spider) Handle() error { + // 移除本地的爬虫目录 + spider, err := model.GetSpider(bson.ObjectIdHex(s.SpiderId)) + if err != nil { + return err + } + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + utils.RemoveFiles(path) + return nil +} diff --git a/backend/services/msg_handler/msg_system_info.go b/backend/services/msg_handler/msg_system_info.go index c81cb0a0..6b88e2cf 100644 --- a/backend/services/msg_handler/msg_system_info.go +++ b/backend/services/msg_handler/msg_system_info.go @@ -2,16 +2,13 @@ package msg_handler import ( "crawlab/constants" - "crawlab/database" + "crawlab/entity" "crawlab/model" "crawlab/utils" - "encoding/json" - "github.com/apex/log" - "runtime/debug" ) type SystemInfo struct { - msg NodeMessage + msg entity.NodeMessage } func (s *SystemInfo) Handle() error { @@ -20,19 +17,12 @@ func (s *SystemInfo) Handle() error { if err != nil { return err } - msgSd := NodeMessage{ + msgSd := entity.NodeMessage{ Type: constants.MsgTypeGetSystemInfo, NodeId: s.msg.NodeId, SysInfo: sysInfo, } - msgSdBytes, err := json.Marshal(&msgSd) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - if _, err := database.RedisClient.Publish("nodes:master", utils.BytesToString(msgSdBytes)); err != nil { - log.Errorf(err.Error()) + if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { return err } return nil diff --git a/backend/services/msg_handler/msg_task.go b/backend/services/msg_handler/msg_task.go index 5f120f80..21b95430 100644 --- a/backend/services/msg_handler/msg_task.go +++ b/backend/services/msg_handler/msg_task.go @@ -2,6 +2,7 @@ package msg_handler import ( "crawlab/constants" + "crawlab/entity" "crawlab/model" "crawlab/utils" "github.com/apex/log" @@ -10,7 +11,7 @@ import ( ) type Task struct { - msg NodeMessage + msg entity.NodeMessage } func (t *Task) Handle() error { diff --git a/backend/services/node.go b/backend/services/node.go index e3397e74..53af8d32 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -1,9 +1,9 @@ package services import ( - "context" "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" "crawlab/services/msg_handler" @@ -175,7 +175,7 @@ func UpdateNodeData() { func MasterNodeCallback(message redis.Message) (err error) { // 反序列化 - var msg msg_handler.NodeMessage + var msg entity.NodeMessage if err := json.Unmarshal(message.Data, &msg); err != nil { return err @@ -183,7 +183,6 @@ func MasterNodeCallback(message redis.Message) (err error) { if msg.Type == constants.MsgTypeGetLog { // 获取日志 - fmt.Println(msg) time.Sleep(10 * time.Millisecond) ch := TaskLogChanMap.ChanBlocked(msg.TaskId) ch <- msg.Log @@ -200,14 +199,8 @@ func MasterNodeCallback(message redis.Message) (err error) { func WorkerNodeCallback(message redis.Message) (err error) { // 反序列化 - msg := msg_handler.NodeMessage{} - if err := json.Unmarshal(message.Data, &msg); err != nil { - - return err - } - - // worker message handle - if err := msg_handler.GetMsgHandler(msg).Handle(); err != nil { + msg := utils.GetMessage(message) + if err := msg_handler.GetMsgHandler(*msg).Handle(); err != nil { return err } return nil @@ -234,23 +227,25 @@ func InitNodeService() error { log.Errorf(err.Error()) return err } - ctx := context.Background() + if model.IsMaster() { // 如果为主节点,订阅主节点通信频道 - channel := "nodes:master" - err := database.RedisClient.Subscribe(ctx, MasterNodeCallback, channel) - if err != nil { + if err := utils.Sub(constants.ChannelMasterNode, MasterNodeCallback); err != nil { return err } } else { // 若为工作节点,订阅单独指定通信频道 - channel := "nodes:" + node.Id.Hex() - err := database.RedisClient.Subscribe(ctx, WorkerNodeCallback, channel) - if err != nil { + channel := constants.ChannelWorkerNode + node.Id.Hex() + if err := utils.Sub(channel, WorkerNodeCallback); err != nil { return err } } + // 订阅全通道 + if err := utils.Sub(constants.ChannelAllNode, WorkerNodeCallback); err != nil { + return err + } + // 如果为主节点,每30秒刷新所有节点信息 if model.IsMaster() { spec := "*/10 * * * * *" @@ -260,7 +255,7 @@ func InitNodeService() error { } } - // 更新在当前节点执行的任务状态为:abnormal + // 更新在当前节点执行中的任务状态为:abnormal if err := model.UpdateTaskToAbnormal(node.Id); err != nil { debug.PrintStack() return err diff --git a/backend/services/spider.go b/backend/services/spider.go index ea3d374f..a2e9a60f 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -3,6 +3,7 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" "crawlab/services/spider_handler" @@ -65,6 +66,7 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er return fid, nil } +// 写入grid fs func WriteToGridFS(content []byte, f *mgo.GridFile) { if _, err := f.Write(content); err != nil { debug.PrintStack() @@ -141,7 +143,7 @@ func PublishSpider(spider model.Spider) { return } // md5值不一样,则下载 - md5Str := utils.ReadFile(md5) + md5Str := utils.ReadFileOneLine(md5) if gfFile.Md5 != md5Str { spiderSync.RemoveSpiderFile() spiderSync.Download() @@ -150,6 +152,45 @@ func PublishSpider(spider model.Spider) { } } +func RemoveSpider(id string) error { + // 获取该爬虫 + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + return err + } + + // 删除爬虫文件目录 + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + utils.RemoveFiles(path) + + // 删除其他节点的爬虫目录 + msg := entity.NodeMessage{ + Type: constants.MsgTypeRemoveSpider, + SpiderId: id, + } + if err := utils.Pub(constants.ChannelAllNode, msg); err != nil { + return err + } + + // 从数据库中删除该爬虫 + if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil { + return err + } + + // 删除日志文件 + if err := RemoveLogBySpiderId(spider.Id); err != nil { + return err + } + + // 删除爬虫对应的task任务 + if err := model.RemoveTaskBySpiderId(spider.Id); err != nil { + return err + } + + // TODO 定时任务如何处理 + return nil +} + // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 diff --git a/backend/services/system.go b/backend/services/system.go index 2c7cd05a..92f9cf96 100644 --- a/backend/services/system.go +++ b/backend/services/system.go @@ -3,17 +3,17 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/model" - "crawlab/services/msg_handler" "crawlab/utils" "encoding/json" ) var SystemInfoChanMap = utils.NewChanMap() -func GetRemoteSystemInfo(id string) (sysInfo model.SystemInfo, err error) { +func GetRemoteSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { // 发送消息 - msg := msg_handler.NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeGetSystemInfo, NodeId: id, } @@ -21,7 +21,7 @@ func GetRemoteSystemInfo(id string) (sysInfo model.SystemInfo, err error) { // 序列化 msgBytes, _ := json.Marshal(&msg) if _, err := database.RedisClient.Publish("nodes:"+id, utils.BytesToString(msgBytes)); err != nil { - return model.SystemInfo{}, err + return entity.SystemInfo{}, err } // 通道 @@ -38,7 +38,7 @@ func GetRemoteSystemInfo(id string) (sysInfo model.SystemInfo, err error) { return sysInfo, nil } -func GetSystemInfo(id string) (sysInfo model.SystemInfo, err error) { +func GetSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { if IsMasterNode(id) { sysInfo, err = model.GetLocalSystemInfo() } else { diff --git a/backend/services/task.go b/backend/services/task.go index 2a68f10e..0e8db964 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -3,9 +3,9 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" - "crawlab/services/msg_handler" "crawlab/utils" "encoding/json" "errors" @@ -493,7 +493,7 @@ func CancelTask(id string) (err error) { // 任务节点为工作节点 // 序列化消息 - msg := msg_handler.NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeCancelTask, TaskId: id, } diff --git a/backend/utils/file.go b/backend/utils/file.go index 282bee14..d65e7ab1 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -10,7 +10,16 @@ import ( "runtime/debug" ) -func ReadFile(fileName string) string { +// 删除文件 +func RemoveFiles(path string) { + if err := os.RemoveAll(path); err != nil { + log.Errorf("remove files error: %s, path: %s", err.Error(), path) + debug.PrintStack() + } +} + +// 读取文件一行 +func ReadFileOneLine(fileName string) string { file := OpenFile(fileName) defer file.Close() buf := bufio.NewReader(file) diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go index 8e6de815..edc6200e 100644 --- a/backend/utils/helpers.go +++ b/backend/utils/helpers.go @@ -1,7 +1,55 @@ package utils -import "unsafe" +import ( + "context" + "crawlab/database" + "crawlab/entity" + "encoding/json" + "github.com/apex/log" + "github.com/gomodule/redigo/redis" + "runtime/debug" + "unsafe" +) func BytesToString(b []byte) string { return *(*string)(unsafe.Pointer(&b)) } + +func GetJson(message entity.NodeMessage) string { + msgBytes, err := json.Marshal(&message) + if err != nil { + log.Errorf("node message to json error: %s", err.Error()) + debug.PrintStack() + return "" + } + return BytesToString(msgBytes) +} + +func GetMessage(message redis.Message) *entity.NodeMessage { + msg := entity.NodeMessage{} + if err := json.Unmarshal(message.Data, &msg); err != nil { + log.Errorf("message byte to object error: %s", err.Error()) + debug.PrintStack() + return nil + } + return &msg +} + +func Pub(channel string, msg entity.NodeMessage) error { + if _, err := database.RedisClient.Publish(channel, GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} + +func Sub(channel string, consume database.ConsumeFunc) error { + ctx := context.Background() + if err := database.RedisClient.Subscribe(ctx, consume, channel); err != nil { + log.Errorf("subscribe redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} From cdd7696295f62ccec253dcc91dd49e475a7eb361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 14:43:48 +0800 Subject: [PATCH 28/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/entity/spider.go | 6 ++++++ backend/main.go | 1 + backend/model/spider.go | 34 +++++++++++++++++++++++++++++++++- backend/routes/spider.go | 14 ++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 backend/entity/spider.go diff --git a/backend/entity/spider.go b/backend/entity/spider.go new file mode 100644 index 00000000..8129ff40 --- /dev/null +++ b/backend/entity/spider.go @@ -0,0 +1,6 @@ +package entity + +type SpiderType struct { + Type string `json:"type"` + Count int `json:"count"` +} diff --git a/backend/main.go b/backend/main.go index 47196fe5..5d95dbaf 100644 --- a/backend/main.go +++ b/backend/main.go @@ -132,6 +132,7 @@ func main() { authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入 authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 + authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 // 任务 authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 diff --git a/backend/model/spider.go b/backend/model/spider.go index 93eafbc5..6e7102d6 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -2,6 +2,7 @@ package model import ( "crawlab/database" + "crawlab/entity" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" @@ -47,6 +48,7 @@ type Spider struct { UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } +// 更新爬虫 func (spider *Spider) Save() error { s, c := database.GetCol("spiders") defer s.Close() @@ -60,6 +62,7 @@ func (spider *Spider) Save() error { return nil } +// 新增爬虫 func (spider *Spider) Add() error { s, c := database.GetCol("spiders") defer s.Close() @@ -74,6 +77,7 @@ func (spider *Spider) Add() error { return nil } +// 获取爬虫的任务 func (spider *Spider) GetTasks() ([]Task, error) { tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 10, "-create_ts") if err != nil { @@ -82,6 +86,7 @@ func (spider *Spider) GetTasks() ([]Task, error) { return tasks, nil } +// 爬虫最新的任务 func (spider *Spider) GetLastTask() (Task, error) { tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 1, "-create_ts") if err != nil { @@ -93,6 +98,7 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } +// 爬虫列表 func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -124,6 +130,7 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro return spiders, count, nil } +// 获取爬虫 func GetSpiderByFileId(fileId bson.ObjectId) *Spider { s, c := database.GetCol("spiders") defer s.Close() @@ -137,6 +144,7 @@ func GetSpiderByFileId(fileId bson.ObjectId) *Spider { return result } +// 获取爬虫 func GetSpiderByName(name string) *Spider { s, c := database.GetCol("spiders") defer s.Close() @@ -150,6 +158,7 @@ func GetSpiderByName(name string) *Spider { return result } +// 获取爬虫 func GetSpider(id bson.ObjectId) (Spider, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -165,6 +174,7 @@ func GetSpider(id bson.ObjectId) (Spider, error) { return result, nil } +// 更新爬虫 func UpdateSpider(id bson.ObjectId, item Spider) error { s, c := database.GetCol("spiders") defer s.Close() @@ -181,6 +191,7 @@ func UpdateSpider(id bson.ObjectId, item Spider) error { return nil } +// 删除爬虫 func RemoveSpider(id bson.ObjectId) error { s, c := database.GetCol("spiders") defer s.Close() @@ -209,6 +220,7 @@ func RemoveSpider(id bson.ObjectId) error { return nil } +// 删除所有爬虫 func RemoveAllSpider() error { s, c := database.GetCol("spiders") defer s.Close() @@ -227,6 +239,7 @@ func RemoveAllSpider() error { return nil } +// 爬虫总数 func GetSpiderCount() (int, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -235,6 +248,25 @@ func GetSpiderCount() (int, error) { if err != nil { return 0, err } - return count, nil } + +// 爬虫类型 +func GetSpiderTypes() ([]*entity.SpiderType, error) { + s, c := database.GetCol("spiders") + defer s.Close() + + group := bson.M{ + "$group": bson.M{ + "type": bson.M{"$sum": 1}, + }, + } + var types []*entity.SpiderType + if err := c.Pipe([]bson.M{group}).All(&types); err != nil { + log.Errorf("get spider types error: %s", err.Error()) + debug.PrintStack() + return nil, err + } + + return types, nil +} diff --git a/backend/routes/spider.go b/backend/routes/spider.go index efd76c0b..8f8fdd13 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -337,6 +337,20 @@ func PostSpiderFile(c *gin.Context) { }) } +// 爬虫类型 +func GetSpiderTypes(c *gin.Context) { + types, err := model.GetSpiderTypes() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: types, + }) +} + func GetSpiderStats(c *gin.Context) { type Overview struct { TaskCount int `json:"task_count" bson:"task_count"` From 6b3a6247a1d51d0d0b972b206b50a1afb1d7fa1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 14:44:10 +0800 Subject: [PATCH 29/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E5=88=97=E4=B8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/spider/SpiderList.vue | 37 +++++++++++++++--------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 63e0a817..5b93afb4 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -109,9 +109,18 @@
- - - + + + + + + + + + + + +
@@ -239,7 +248,7 @@ import { import dayjs from 'dayjs' import CrawlConfirmDialog from '../../components/Common/CrawlConfirmDialog' import StatusTag from '../../components/Status/StatusTag' - +import request from '../../api/request' export default { name: 'SpiderList', components: { @@ -262,18 +271,17 @@ export default { crawlConfirmDialogVisible: false, activeSpiderId: undefined, filter: { - keyword: '' + keyword: '', + type: '' }, + types: [], // tableData, columns: [ - { name: 'name', label: 'Name', width: '160', align: 'left' }, - // { name: 'site_name', label: 'Site', width: '140', align: 'left' }, + { name: 'display_name', label: 'Name', width: '160', align: 'left' }, { name: 'type', label: 'Spider Type', width: '120' }, - // { name: 'cmd', label: 'Command Line', width: '200' }, { name: 'last_status', label: 'Last Status', width: '120' }, { name: 'last_run_ts', label: 'Last Run', width: '140' }, - { name: 'create_ts', label: 'Create Time', width: '140' }, - { name: 'update_ts', label: 'Update Time', width: '140' }, + // { name: 'update_ts', label: 'Update Time', width: '140' }, { name: 'remark', label: 'Remark', width: '140' } ], spiderFormRules: { @@ -485,12 +493,15 @@ export default { keyword: this.filter.keyword } this.$store.dispatch('spider/getSpiderList', params) + }, + getTypes () { + request.get(`/spider/types`).then(resp => { + console.log('resp', resp) + }) } }, created () { - // take site from params to filter - this.$store.commit('spider/SET_FILTER_SITE', this.$route.params.domain) - + this.getTypes() // fetch spider list this.getList() }, From b994cd49223ca629005582406453018244638864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 16:35:36 +0800 Subject: [PATCH 30/81] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E7=9A=84=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/entity/common.go | 17 +++++++++++++ backend/entity/spider.go | 4 ++-- backend/model/spider.go | 6 +++-- backend/routes/spider.go | 23 ++++++++++++------ frontend/src/components/Common/DialogView.vue | 2 +- .../components/InfoView/SpiderInfoView.vue | 24 +++++++------------ frontend/src/views/result/ResultDetail.vue | 2 +- frontend/src/views/result/ResultList.vue | 4 ++-- frontend/src/views/schedule/ScheduleList.vue | 2 +- frontend/src/views/spider/SpiderDetail.vue | 2 +- frontend/src/views/spider/SpiderList.vue | 19 +++++++++------ 11 files changed, 65 insertions(+), 40 deletions(-) create mode 100644 backend/entity/common.go diff --git a/backend/entity/common.go b/backend/entity/common.go new file mode 100644 index 00000000..332cc494 --- /dev/null +++ b/backend/entity/common.go @@ -0,0 +1,17 @@ +package entity + +import "strconv" + +type Page struct { + Skip int + Limit int + PageNum int + PageSize int +} + +func (p *Page)GetPage(pageNum string, pageSize string) { + p.PageNum, _ = strconv.Atoi(pageNum) + p.PageSize, _ = strconv.Atoi(pageSize) + p.Skip = p.PageSize * (p.PageNum - 1) + p.Limit = p.PageSize +} \ No newline at end of file diff --git a/backend/entity/spider.go b/backend/entity/spider.go index 8129ff40..7f5e02b4 100644 --- a/backend/entity/spider.go +++ b/backend/entity/spider.go @@ -1,6 +1,6 @@ package entity type SpiderType struct { - Type string `json:"type"` - Count int `json:"count"` + Type string `json:"type" bson:"_id"` + Count int `json:"count" bson:"count"` } diff --git a/backend/model/spider.go b/backend/model/spider.go index 6e7102d6..1f88acff 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -256,13 +256,15 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) { s, c := database.GetCol("spiders") defer s.Close() + group := bson.M{ "$group": bson.M{ - "type": bson.M{"$sum": 1}, + "_id": "$type", + "count": bson.M{"$sum": 1}, }, } var types []*entity.SpiderType - if err := c.Pipe([]bson.M{group}).All(&types); err != nil { + if err := c.Pipe([]bson.M{ group}).All(&types); err != nil { log.Errorf("get spider types error: %s", err.Error()) debug.PrintStack() return nil, err diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 8f8fdd13..addddd99 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -3,6 +3,7 @@ package routes import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/model" "crawlab/services" "crawlab/utils" @@ -24,14 +25,22 @@ import ( ) func GetSpiderList(c *gin.Context) { - pageNumStr, _ := c.GetQuery("pageNum") - pageSizeStr, _ := c.GetQuery("pageSize") + pageNum, _ := c.GetQuery("pageNum") + pageSize, _ := c.GetQuery("pageSize") keyword, _ := c.GetQuery("keyword") - pageNum, _ := strconv.Atoi(pageNumStr) - pageSize, _ := strconv.Atoi(pageSizeStr) - skip := pageSize * (pageNum - 1) - filter := bson.M{"name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}} - results, count, err := model.GetSpiderList(filter, skip, pageSize) + t, _ := c.GetQuery("type") + + filter := bson.M{ + "name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}, + } + + if t != "" { + filter["type"] = t + } + + page := &entity.Page{} + page.GetPage(pageNum, pageSize) + results, count, err := model.GetSpiderList(filter, page.Skip, page.Limit) if err != nil { HandleError(http.StatusInternalServerError, c, err) return diff --git a/frontend/src/components/Common/DialogView.vue b/frontend/src/components/Common/DialogView.vue index 7976171e..3c83d7ce 100644 --- a/frontend/src/components/Common/DialogView.vue +++ b/frontend/src/components/Common/DialogView.vue @@ -151,7 +151,7 @@ export default { } }, mounted () { - if (!this.spiderList || !this.spiderList.length) this.$store.dispatch('spider/getSpiderList') + // if (!this.spiderList || !this.spiderList.length) this.$store.dispatch('spider/getSpiderList') if (!this.nodeList || !this.nodeList.length) this.$store.dispatch('node/getNodeList') } } diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 39702a5d..381b253c 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -18,10 +18,10 @@ - + - + @@ -39,10 +39,11 @@ - - - - + + + + + @@ -102,16 +103,7 @@ export default { 'spiderForm' ]), isShowRun () { - if (this.isCustomized) { - // customized spider - return !!this.spiderForm.cmd - } else { - // configurable spider - return !!this.spiderForm.fields - } - }, - isCustomized () { - return this.spiderForm.type === 'customized' + return !!this.spiderForm.cmd } }, methods: { diff --git a/frontend/src/views/result/ResultDetail.vue b/frontend/src/views/result/ResultDetail.vue index f42bee5c..df8487ef 100644 --- a/frontend/src/views/result/ResultDetail.vue +++ b/frontend/src/views/result/ResultDetail.vue @@ -59,7 +59,7 @@ export default { }, created () { // get the list of the spiders - this.$store.dispatch('spider/getSpiderList') + // this.$store.dispatch('spider/getSpiderList') // get spider basic info this.$store.dispatch('spider/getSpiderData', this.$route.params.id) diff --git a/frontend/src/views/result/ResultList.vue b/frontend/src/views/result/ResultList.vue index 85c70098..2f3e820b 100644 --- a/frontend/src/views/result/ResultList.vue +++ b/frontend/src/views/result/ResultList.vue @@ -195,7 +195,7 @@ export default { this.dialogVisible = true }, onRefresh () { - this.$store.dispatch('spider/getSpiderList') + // this.$store.dispatch('spider/getSpiderList') }, onSubmit () { const vm = this @@ -257,7 +257,7 @@ export default { } }, created () { - this.$store.dispatch('spider/getSpiderList') + // this.$store.dispatch('spider/getSpiderList') } } diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index 743a186e..c44d46e2 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -269,7 +269,7 @@ export default { }, created () { this.$store.dispatch('schedule/getScheduleList') - this.$store.dispatch('spider/getSpiderList') + // this.$store.dispatch('spider/getSpiderList') this.$store.dispatch('node/getNodeList') } } diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index 69fdd770..916592f4 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -87,7 +87,7 @@ export default { }, created () { // get the list of the spiders - this.$store.dispatch('spider/getSpiderList') + // this.$store.dispatch('spider/getSpiderList') // get spider basic info this.$store.dispatch('spider/getSpiderData', this.$route.params.id) diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 5b93afb4..743aabbe 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -111,12 +111,13 @@
- - + + - + @@ -156,8 +157,7 @@ align="left" :width="col.width"> { - console.log('resp', resp) + this.types = resp.data.data }) } }, From cacb12638576f1159dd6100396e81f85065fcc85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 16:44:35 +0800 Subject: [PATCH 31/81] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/package.json b/frontend/package.json index 20e40c7c..f5d170c8 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.2.3", + "version": "0.3.2", "private": true, "scripts": { "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", From 0aa7d581b222bdba945b7eee7bb0c0daf8a070bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 16:50:16 +0800 Subject: [PATCH 32/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/layout/components/Navbar.vue | 3 +++ 1 file changed, 3 insertions(+) diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 3b30c049..25d62e35 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -8,6 +8,9 @@ + + v0.3.2 + {{$t('Logout')}} From 6fbae93545fdfa919c4e311504d4f6dfd259f418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 30 Sep 2019 19:24:35 +0800 Subject: [PATCH 33/81] =?UTF-8?q?fix=20=E7=BC=96=E8=AF=91=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/mock/node.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/mock/node.go b/backend/mock/node.go index 4857a96d..789d0a9a 100644 --- a/backend/mock/node.go +++ b/backend/mock/node.go @@ -1,6 +1,7 @@ package mock import ( + "crawlab/entity" "crawlab/model" "crawlab/services" "github.com/apex/log" @@ -97,14 +98,14 @@ var dataList = []services.Data{ }, } -var executeble = []model.Executable{ +var executeble = []entity.Executable{ { Path: "/test", FileName: "test.py", DisplayName: "test.py", }, } -var systemInfo = model.SystemInfo{ARCH: "x86", +var systemInfo = entity.SystemInfo{ARCH: "x86", OS: "linux", Hostname: "test", NumCpu: 4, From 329b4a6470cfda7e82821ccbc2b336297c0aeb67 Mon Sep 17 00:00:00 2001 From: yaziming Date: Tue, 1 Oct 2019 09:58:24 +0800 Subject: [PATCH 34/81] fix(backend): fix mongo connect error when password has special characters --- backend/database/mongo.go | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/backend/database/mongo.go b/backend/database/mongo.go index 1c2d6433..e72baeaa 100644 --- a/backend/database/mongo.go +++ b/backend/database/mongo.go @@ -3,6 +3,7 @@ package database import ( "github.com/globalsign/mgo" "github.com/spf13/viper" + "net" "time" ) @@ -39,13 +40,28 @@ func InitMongo() error { var mongoAuth = viper.GetString("mongo.authSource") if Session == nil { - var uri string - if mongoUsername == "" { - uri = "mongodb://" + mongoHost + ":" + mongoPort + "/" + mongoDb - } else { - uri = "mongodb://" + mongoUsername + ":" + mongoPassword + "@" + mongoHost + ":" + mongoPort + "/" + mongoDb + "?authSource=" + mongoAuth + var dialInfo mgo.DialInfo + addr := net.JoinHostPort(mongoHost, mongoPort) + timeout := time.Second * 10 + dialInfo = mgo.DialInfo{ + Addrs: []string{addr}, + Timeout: timeout, + Database: mongoDb, + PoolLimit: 100, + PoolTimeout: timeout, + ReadTimeout: timeout, + WriteTimeout: timeout, + AppName: "crawlab", + FailFast: true, + MinPoolSize: 10, + MaxIdleTimeMS: 1000 * 30, } - sess, err := mgo.DialWithTimeout(uri, time.Second*5) + if mongoUsername != "" { + dialInfo.Username = mongoUsername + dialInfo.Password = mongoPassword + dialInfo.Source = mongoAuth + } + sess, err := mgo.DialWithInfo(&dialInfo) if err != nil { return err } From 17442fc89337012c8ea0e883e887325545b0c312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Sun, 6 Oct 2019 18:46:51 +0800 Subject: [PATCH 35/81] add k8s deploy link --- README-zh.md | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README-zh.md b/README-zh.md index 6c2449f2..b4e2b469 100644 --- a/README-zh.md +++ b/README-zh.md @@ -21,6 +21,7 @@ 三种方式: 1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)(推荐) 2. [直接部署](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)(了解内核) +3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA) ### 要求(Docker) - Docker 18.03+ diff --git a/README.md b/README.md index 9ae17a77..91a30b34 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Golang-based distributed web crawler management platform, supporting various lan Two methods: 1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html) (Recommended) 2. [Direct Deploy](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html) (Check Internal Kernel) +3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA) ### Pre-requisite (Docker) - Docker 18.03+ From dabf5cacf156ebac3221340243db077d8a7eecde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 7 Oct 2019 12:21:32 +0800 Subject: [PATCH 36/81] =?UTF-8?q?fix=20=E5=88=9B=E5=BB=BA=E7=9B=AE?= =?UTF-8?q?=E5=BD=95=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/routes/spider.go | 2 +- backend/services/spider.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/routes/spider.go b/backend/routes/spider.go index addddd99..4c26fcee 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -135,7 +135,7 @@ func PutSpider(c *gin.Context) { // 以防tmp目录不存在 tmpPath := viper.GetString("other.tmppath") if !utils.Exists(tmpPath) { - if err := os.Mkdir(tmpPath, os.ModePerm); err != nil { + if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { log.Error("mkdir other.tmppath dir error:" + err.Error()) debug.PrintStack() HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error")) diff --git a/backend/services/spider.go b/backend/services/spider.go index a2e9a60f..c03ebe38 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -145,6 +145,7 @@ func PublishSpider(spider model.Spider) { // md5值不一样,则下载 md5Str := utils.ReadFileOneLine(md5) if gfFile.Md5 != md5Str { + log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str) spiderSync.RemoveSpiderFile() spiderSync.Download() spiderSync.CreateMd5File(gfFile.Md5) From 4a40d38844e88c27150babb4f6cba866bf9eddb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 7 Oct 2019 12:49:37 +0800 Subject: [PATCH 37/81] =?UTF-8?q?fix=20md5=E5=80=BC=E4=B8=8D=E4=B8=80?= =?UTF-8?q?=E8=87=B4=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/spider.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/services/spider.go b/backend/services/spider.go index c03ebe38..7aea456f 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -16,6 +16,7 @@ import ( "os" "path/filepath" "runtime/debug" + "strings" ) type SpiderFileData struct { @@ -144,6 +145,9 @@ func PublishSpider(spider model.Spider) { } // md5值不一样,则下载 md5Str := utils.ReadFileOneLine(md5) + // 去掉空格以及换行符 + md5Str = strings.Replace(md5Str, " ", "", -1) + md5Str = strings.Replace(md5Str, "\n", "", -1) if gfFile.Md5 != md5Str { log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str) spiderSync.RemoveSpiderFile() From 10e8827dd3f716b7b954eb31d7a0f3820850e241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 8 Oct 2019 14:46:03 +0800 Subject: [PATCH 38/81] =?UTF-8?q?fix=20=E9=9D=9E=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89=E7=88=AC=E8=99=AB=E7=9C=8B=E4=B8=8D=E5=88=B0=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9A=84=E6=83=85=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/spider/SpiderDetail.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index 916592f4..b42e750d 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -16,7 +16,7 @@ - + From 4c8b38f40bdc5b1376dd026d86a2f34b3d51dbe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 8 Oct 2019 19:41:29 +0800 Subject: [PATCH 39/81] =?UTF-8?q?fix=20=E7=8A=B6=E6=80=81=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 0e8db964..6b0effe6 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -14,10 +14,12 @@ import ( "os" "os/exec" "path/filepath" + "reflect" "runtime" "runtime/debug" "strconv" "sync" + "syscall" "time" ) @@ -142,7 +144,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e log.Infof("cancel process signal: %s", signal) if signal == constants.TaskCancel && cmd.Process != nil { // 取消进程 - if err := cmd.Process.Kill(); err != nil { + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil { log.Errorf("process kill error: %s", err.Error()) debug.PrintStack() } @@ -152,6 +154,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e t.Status = constants.StatusFinished } t.FinishTs = time.Now() + t.Error = "user kill the process ..." if err := t.Save(); err != nil { log.Infof("save task error: %s", err.Error()) debug.PrintStack() @@ -159,6 +162,8 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e } }() + // 在选择所有节点执行的时候,实际就是随机一个节点执行的, + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} // 异步启动进程 if err := cmd.Start(); err != nil { log.Errorf("start spider error:{}", err.Error()) @@ -177,11 +182,12 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e log.Errorf("wait process finish error: %s", err.Error()) debug.PrintStack() + log.Infof("error type is : %s", reflect.TypeOf(err).String()) // 发生一次也需要保存 - t.Error = err.Error() - t.FinishTs = time.Now() - t.Status = constants.StatusFinished - _ = t.Save() + //t.Error = err.Error() + //t.FinishTs = time.Now() + //t.Status = constants.StatusError + //_ = t.Save() return err } ch <- constants.TaskFinish From a081b02c13440a8323e51f1b3ef7165cca5e1127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 8 Oct 2019 20:16:49 +0800 Subject: [PATCH 40/81] =?UTF-8?q?fix=20=E7=8A=B6=E6=80=81=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 6b0effe6..cf0f61a8 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -164,12 +164,14 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 在选择所有节点执行的时候,实际就是随机一个节点执行的, cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + // 异步启动进程 if err := cmd.Start(); err != nil { log.Errorf("start spider error:{}", err.Error()) debug.PrintStack() return err } + // 保存pid到task t.Pid = cmd.Process.Pid if err := t.Save(); err != nil { @@ -183,11 +185,18 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e debug.PrintStack() log.Infof("error type is : %s", reflect.TypeOf(err).String()) - // 发生一次也需要保存 - //t.Error = err.Error() - //t.FinishTs = time.Now() - //t.Status = constants.StatusError - //_ = t.Save() + if exitError, ok := err.(*exec.ExitError); ok { + exitCode := exitError.ExitCode() + log.Errorf("exit error, exit code: %d", exitCode) + // 非kill 的错误类型 + if exitCode != 9 { + // 发生一次也需要保存 + t.Error = err.Error() + t.FinishTs = time.Now() + t.Status = constants.StatusError + _ = t.Save() + } + } return err } ch <- constants.TaskFinish From 6b90d86ea28b4559d3c02a328b718e0cb9f459d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 8 Oct 2019 20:20:48 +0800 Subject: [PATCH 41/81] =?UTF-8?q?fix=20=E7=8A=B6=E6=80=81=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/task.go b/backend/services/task.go index cf0f61a8..9654e8d5 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -189,7 +189,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e exitCode := exitError.ExitCode() log.Errorf("exit error, exit code: %d", exitCode) // 非kill 的错误类型 - if exitCode != 9 { + if exitCode != -1 { // 发生一次也需要保存 t.Error = err.Error() t.FinishTs = time.Now() From f5af83dccee7d8b2662bbf741ffeca040f2e5602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 8 Oct 2019 20:26:08 +0800 Subject: [PATCH 42/81] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 5 +---- frontend/package.json | 2 +- frontend/src/views/layout/components/Navbar.vue | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 9654e8d5..ce62a95e 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -14,7 +14,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "runtime/debug" "strconv" @@ -183,14 +182,12 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e if err := cmd.Wait(); err != nil { log.Errorf("wait process finish error: %s", err.Error()) debug.PrintStack() - - log.Infof("error type is : %s", reflect.TypeOf(err).String()) if exitError, ok := err.(*exec.ExitError); ok { exitCode := exitError.ExitCode() log.Errorf("exit error, exit code: %d", exitCode) // 非kill 的错误类型 if exitCode != -1 { - // 发生一次也需要保存 + // 非手动kill保存为错误状态 t.Error = err.Error() t.FinishTs = time.Now() t.Status = constants.StatusError diff --git a/frontend/package.json b/frontend/package.json index f5d170c8..60ac5cc8 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.3.2", + "version": "0.3.4", "private": true, "scripts": { "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 25d62e35..e294ad0c 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -9,7 +9,7 @@ - v0.3.2 + v0.3.4 {{$t('Logout')}} From 63fab3917b5a29fd9770f9f51f1572b9f0420385 Mon Sep 17 00:00:00 2001 From: yaziming Date: Wed, 9 Oct 2019 11:21:01 +0800 Subject: [PATCH 43/81] feat(backend): support server graceful down 1.support server graceful down --- backend/main.go | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/backend/main.go b/backend/main.go index 5d95dbaf..2c92ab37 100644 --- a/backend/main.go +++ b/backend/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "crawlab/config" "crawlab/database" "crawlab/lib/validate_bridge" @@ -12,7 +13,13 @@ import ( "github.com/gin-gonic/gin" "github.com/gin-gonic/gin/binding" "github.com/spf13/viper" + "net" + "net/http" + "os" + "os/signal" "runtime/debug" + "syscall" + "time" ) func main() { @@ -166,8 +173,26 @@ func main() { // 运行服务器 host := viper.GetString("server.host") port := viper.GetString("server.port") - if err := app.Run(host + ":" + port); err != nil { + address := net.JoinHostPort(host, port) + srv := &http.Server{ + Handler: app, + Addr: address, + } + go func() { + if err := srv.ListenAndServe(); err != nil { + if err != http.ErrServerClosed { + log.Error("run server error:" + err.Error()) + } else { + log.Info("server graceful down") + } + } + }() + quit := make(chan os.Signal, 1) + signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) + <-quit + ctx2, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + if err := srv.Shutdown(ctx2); err != nil { log.Error("run server error:" + err.Error()) - panic(err) } } From 7fe2c9a42535fedada07e4efd9eed3cc73aec9f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 10 Oct 2019 17:59:15 +0800 Subject: [PATCH 44/81] =?UTF-8?q?fix=20=E7=88=AC=E8=99=AB=E5=88=97?= =?UTF-8?q?=E8=A1=A8=E6=8C=89=E9=92=AE=E6=97=A0=E6=B3=95=E7=82=B9=E5=87=BB?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/spider/SpiderList.vue | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 743aabbe..8c97339a 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -424,13 +424,18 @@ export default { this.dialogVisible = true }, isShowRun (row) { - if (this.isCustomized(row)) { - // customized spider - return !!row.cmd + if (row.cmd) { + return true } else { - // configurable spider - return !!row.fields + return false } + // if (this.isCustomized(row)) { + // // customized spider + // return !!row.cmd + // } else { + // // configurable spider + // return !!row.fields + // } }, isCustomized (row) { return row.type === 'customized' From 46d89c8cce2d417b089c8e80bb8f68e9059975e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 10 Oct 2019 19:56:04 +0800 Subject: [PATCH 45/81] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E6=89=93=E5=8D=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/msg_handler/handler.go | 2 ++ backend/services/msg_handler/msg_log.go | 3 +++ backend/services/node.go | 20 +++++++++++++------- backend/services/task.go | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/backend/services/msg_handler/handler.go b/backend/services/msg_handler/handler.go index 848e0c5d..b8b8e231 100644 --- a/backend/services/msg_handler/handler.go +++ b/backend/services/msg_handler/handler.go @@ -3,6 +3,7 @@ package msg_handler import ( "crawlab/constants" "crawlab/entity" + "github.com/apex/log" ) type Handler interface { @@ -10,6 +11,7 @@ type Handler interface { } func GetMsgHandler(msg entity.NodeMessage) Handler { + log.Infof("received msg , type is : %s", msg.Type) if msg.Type == constants.MsgTypeGetLog || msg.Type == constants.MsgTypeRemoveLog { // 日志相关 return &Log{ diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go index 37080bd6..b865f4e3 100644 --- a/backend/services/msg_handler/msg_log.go +++ b/backend/services/msg_handler/msg_log.go @@ -40,8 +40,11 @@ func (g *Log) get() error { } // 发布消息给主节点 if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + log.Errorf("pub log to master node error: %s", err.Error()) + debug.PrintStack() return err } + log.Infof(msgSd.Log) return nil } diff --git a/backend/services/node.go b/backend/services/node.go index 53af8d32..7fc134c5 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -110,13 +110,15 @@ func handleNodeInfo(key string, data Data) { if err := c.Find(bson.M{"key": key}).One(&node); err != nil { // 数据库不存在该节点 node = model.Node{ - Key: key, - Name: data.Ip, - Ip: data.Ip, - Port: "8000", - Mac: data.Mac, - Status: constants.StatusOnline, - IsMaster: data.Master, + Key: key, + Name: data.Ip, + Ip: data.Ip, + Port: "8000", + Mac: data.Mac, + Status: constants.StatusOnline, + IsMaster: data.Master, + UpdateTs: time.Now(), + UpdateTsUnix: time.Now().Unix(), } if err := node.Add(); err != nil { log.Errorf(err.Error()) @@ -125,6 +127,8 @@ func handleNodeInfo(key string, data Data) { } else { // 数据库存在该节点 node.Status = constants.StatusOnline + node.UpdateTs = time.Now() + node.UpdateTsUnix = time.Now().Unix() if err := node.Save(); err != nil { log.Errorf(err.Error()) return @@ -201,6 +205,8 @@ func WorkerNodeCallback(message redis.Message) (err error) { // 反序列化 msg := utils.GetMessage(message) if err := msg_handler.GetMsgHandler(*msg).Handle(); err != nil { + log.Errorf("msg handler error: %s", err.Error()) + debug.PrintStack() return err } return nil diff --git a/backend/services/task.go b/backend/services/task.go index ce62a95e..f515f48d 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -148,12 +148,12 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e debug.PrintStack() } t.Status = constants.StatusCancelled + t.Error = "user kill the process ..." } else { // 保存任务 t.Status = constants.StatusFinished } t.FinishTs = time.Now() - t.Error = "user kill the process ..." if err := t.Save(); err != nil { log.Infof("save task error: %s", err.Error()) debug.PrintStack() From ee808e0e603a341778a6815a90b07a6cc0f7b6e2 Mon Sep 17 00:00:00 2001 From: yaziming Date: Fri, 11 Oct 2019 16:01:57 +0800 Subject: [PATCH 46/81] refactor(all): refactor code remove redundant code and some code refactor --- backend/database/pubsub.go | 5 +- backend/database/redis.go | 35 ++++++-- backend/errors/errors.go | 1 - backend/lib/cron/cron_test.go | 83 +++++++++---------- backend/lib/cron/logger.go | 4 +- backend/lib/cron/option_test.go | 2 +- backend/lib/cron/parser_test.go | 1 + backend/lib/cron/spec.go | 4 +- backend/mock/node.go | 2 +- backend/model/log.go | 3 +- backend/model/node.go | 1 - backend/model/spider.go | 9 +- backend/services/context/context.go | 3 +- backend/services/log.go | 2 +- backend/services/log_test.go | 22 ++--- backend/services/msg_handler/msg_log.go | 3 +- .../services/msg_handler/msg_system_info.go | 4 +- backend/services/node.go | 14 ++-- backend/services/spider.go | 4 +- backend/services/spider_handler/spider.go | 6 +- backend/services/task.go | 24 +++--- backend/services/user.go | 5 +- backend/utils/file.go | 19 ++--- backend/utils/file_test.go | 22 ++--- backend/utils/helpers.go | 23 ++--- backend/utils/model.go | 13 +-- backend/utils/user.go | 2 +- 27 files changed, 155 insertions(+), 161 deletions(-) diff --git a/backend/database/pubsub.go b/backend/database/pubsub.go index 0eb8639b..7f647cda 100644 --- a/backend/database/pubsub.go +++ b/backend/database/pubsub.go @@ -2,6 +2,7 @@ package database import ( "context" + "crawlab/utils" "fmt" "github.com/apex/log" "github.com/gomodule/redigo/redis" @@ -26,7 +27,7 @@ func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...s tick := time.NewTicker(time.Second * 3) defer tick.Stop() go func() { - defer func() { _ = psc.Close() }() + defer utils.Close(psc) for { switch msg := psc.Receive().(type) { case error: @@ -87,7 +88,7 @@ func (r *Redis) Subscribe(ctx context.Context, consume ConsumeFunc, channel ...s } func (r *Redis) Publish(channel, message string) (n int, err error) { conn := r.pool.Get() - defer func() { _ = conn.Close() }() + defer utils.Close(conn) n, err = redis.Int(conn.Do("PUBLISH", channel, message)) if err != nil { return 0, errors2.Wrapf(err, "redis publish %s %s", channel, message) diff --git a/backend/database/redis.go b/backend/database/redis.go index ede229a2..348a74bb 100644 --- a/backend/database/redis.go +++ b/backend/database/redis.go @@ -1,6 +1,10 @@ package database import ( + "context" + "crawlab/entity" + "crawlab/utils" + "github.com/apex/log" "github.com/gomodule/redigo/redis" "github.com/spf13/viper" "runtime/debug" @@ -18,7 +22,7 @@ func NewRedisClient() *Redis { } func (r *Redis) RPush(collection string, value interface{}) error { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) if _, err := c.Do("RPUSH", collection, value); err != nil { debug.PrintStack() @@ -29,7 +33,7 @@ func (r *Redis) RPush(collection string, value interface{}) error { func (r *Redis) LPop(collection string) (string, error) { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) value, err2 := redis.String(c.Do("LPOP", collection)) if err2 != nil { @@ -40,7 +44,7 @@ func (r *Redis) LPop(collection string) (string, error) { func (r *Redis) HSet(collection string, key string, value string) error { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) if _, err := c.Do("HSET", collection, key, value); err != nil { debug.PrintStack() @@ -51,7 +55,7 @@ func (r *Redis) HSet(collection string, key string, value string) error { func (r *Redis) HGet(collection string, key string) (string, error) { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) value, err2 := redis.String(c.Do("HGET", collection, key)) if err2 != nil { @@ -62,7 +66,7 @@ func (r *Redis) HGet(collection string, key string) (string, error) { func (r *Redis) HDel(collection string, key string) error { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) if _, err := c.Do("HDEL", collection, key); err != nil { return err @@ -72,7 +76,7 @@ func (r *Redis) HDel(collection string, key string) error { func (r *Redis) HKeys(collection string) ([]string, error) { c := r.pool.Get() - defer c.Close() + defer utils.Close(c) value, err2 := redis.Strings(c.Do("HKeys", collection)) if err2 != nil { @@ -120,3 +124,22 @@ func InitRedis() error { RedisClient = NewRedisClient() return nil } + +func Pub(channel string, msg entity.NodeMessage) error { + if _, err := RedisClient.Publish(channel, utils.GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} + +func Sub(channel string, consume ConsumeFunc) error { + ctx := context.Background() + if err := RedisClient.Subscribe(ctx, consume, channel); err != nil { + log.Errorf("subscribe redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} diff --git a/backend/errors/errors.go b/backend/errors/errors.go index f191cd3e..d896e4d4 100644 --- a/backend/errors/errors.go +++ b/backend/errors/errors.go @@ -24,7 +24,6 @@ func (O OPError) Error() string { switch O.Scope { case ScopeSystem: scope = "system" - break case ScopeBusiness: scope = "business" } diff --git a/backend/lib/cron/cron_test.go b/backend/lib/cron/cron_test.go index 36f06bf7..35266df1 100644 --- a/backend/lib/cron/cron_test.go +++ b/backend/lib/cron/cron_test.go @@ -44,17 +44,14 @@ func TestFuncPanicRecovery(t *testing.T) { WithChain(Recover(newBufLogger(&buf)))) cron.Start() defer cron.Stop() - cron.AddFunc("* * * * * ?", func() { + _, _ = cron.AddFunc("* * * * * ?", func() { panic("YOLO") }) - - select { - case <-time.After(OneSecond): - if !strings.Contains(buf.String(), "YOLO") { - t.Error("expected a panic to be logged, got none") - } - return + <-time.After(OneSecond) + if !strings.Contains(buf.String(), "YOLO") { + t.Error("expected a panic to be logged, got none") } + } type DummyJob struct{} @@ -71,7 +68,7 @@ func TestJobPanicRecovery(t *testing.T) { WithChain(Recover(newBufLogger(&buf)))) cron.Start() defer cron.Stop() - cron.AddJob("* * * * * ?", job) + _, _ = cron.AddJob("* * * * * ?", job) select { case <-time.After(OneSecond): @@ -102,7 +99,7 @@ func TestStopCausesJobsToNotRun(t *testing.T) { cron := newWithSeconds() cron.Start() cron.Stop() - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) select { case <-time.After(OneSecond): @@ -118,7 +115,7 @@ func TestAddBeforeRunning(t *testing.T) { wg.Add(1) cron := newWithSeconds() - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) cron.Start() defer cron.Stop() @@ -138,7 +135,7 @@ func TestAddWhileRunning(t *testing.T) { cron := newWithSeconds() cron.Start() defer cron.Stop() - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) select { case <-time.After(OneSecond): @@ -154,7 +151,7 @@ func TestAddWhileRunningWithDelay(t *testing.T) { defer cron.Stop() time.Sleep(5 * time.Second) var calls int64 - cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) + _, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) <-time.After(OneSecond) if atomic.LoadInt64(&calls) != 1 { @@ -205,7 +202,7 @@ func TestSnapshotEntries(t *testing.T) { wg.Add(1) cron := New() - cron.AddFunc("@every 2s", func() { wg.Done() }) + _, _ = cron.AddFunc("@every 2s", func() { wg.Done() }) cron.Start() defer cron.Stop() @@ -232,12 +229,12 @@ func TestMultipleEntries(t *testing.T) { wg.Add(2) cron := newWithSeconds() - cron.AddFunc("0 0 0 1 1 ?", func() {}) - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) id1, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() }) id2, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() }) - cron.AddFunc("0 0 0 31 12 ?", func() {}) - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) cron.Remove(id1) cron.Start() @@ -257,9 +254,9 @@ func TestRunningJobTwice(t *testing.T) { wg.Add(2) cron := newWithSeconds() - cron.AddFunc("0 0 0 1 1 ?", func() {}) - cron.AddFunc("0 0 0 31 12 ?", func() {}) - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) + _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) cron.Start() defer cron.Stop() @@ -276,9 +273,9 @@ func TestRunningMultipleSchedules(t *testing.T) { wg.Add(2) cron := newWithSeconds() - cron.AddFunc("0 0 0 1 1 ?", func() {}) - cron.AddFunc("0 0 0 31 12 ?", func() {}) - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) + _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) cron.Schedule(Every(time.Minute), FuncJob(func() {})) cron.Schedule(Every(time.Second), FuncJob(func() { wg.Done() })) cron.Schedule(Every(time.Hour), FuncJob(func() {})) @@ -310,7 +307,7 @@ func TestLocalTimezone(t *testing.T) { now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month()) cron := newWithSeconds() - cron.AddFunc(spec, func() { wg.Done() }) + _, _ = cron.AddFunc(spec, func() { wg.Done() }) cron.Start() defer cron.Stop() @@ -344,7 +341,7 @@ func TestNonLocalTimezone(t *testing.T) { now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month()) cron := New(WithLocation(loc), WithParser(secondParser)) - cron.AddFunc(spec, func() { wg.Done() }) + _, _ = cron.AddFunc(spec, func() { wg.Done() }) cron.Start() defer cron.Stop() @@ -386,7 +383,7 @@ func TestBlockingRun(t *testing.T) { wg.Add(1) cron := newWithSeconds() - cron.AddFunc("* * * * * ?", func() { wg.Done() }) + _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) var unblockChan = make(chan struct{}) @@ -410,7 +407,7 @@ func TestStartNoop(t *testing.T) { var tickChan = make(chan struct{}, 2) cron := newWithSeconds() - cron.AddFunc("* * * * * ?", func() { + _, _ = cron.AddFunc("* * * * * ?", func() { tickChan <- struct{}{} }) @@ -438,10 +435,10 @@ func TestJob(t *testing.T) { wg.Add(1) cron := newWithSeconds() - cron.AddJob("0 0 0 30 Feb ?", testJob{wg, "job0"}) - cron.AddJob("0 0 0 1 1 ?", testJob{wg, "job1"}) + _, _ = cron.AddJob("0 0 0 30 Feb ?", testJob{wg, "job0"}) + _, _ = cron.AddJob("0 0 0 1 1 ?", testJob{wg, "job1"}) job2, _ := cron.AddJob("* * * * * ?", testJob{wg, "job2"}) - cron.AddJob("1 0 0 1 1 ?", testJob{wg, "job3"}) + _, _ = cron.AddJob("1 0 0 1 1 ?", testJob{wg, "job3"}) cron.Schedule(Every(5*time.Second+5*time.Nanosecond), testJob{wg, "job4"}) job5 := cron.Schedule(Every(5*time.Minute), testJob{wg, "job5"}) @@ -465,7 +462,7 @@ func TestJob(t *testing.T) { // Ensure the entries are in the right order. expecteds := []string{"job2", "job4", "job5", "job1", "job3", "job0"} - var actuals []string + var actuals = make([]string, 0, len(cron.Entries())) for _, entry := range cron.Entries() { actuals = append(actuals, entry.Job.(testJob).name) } @@ -545,7 +542,7 @@ func (*ZeroSchedule) Next(time.Time) time.Time { func TestJobWithZeroTimeDoesNotRun(t *testing.T) { cron := newWithSeconds() var calls int64 - cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) + _, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) cron.Schedule(new(ZeroSchedule), FuncJob(func() { t.Error("expected zero task will not run") })) cron.Start() defer cron.Stop() @@ -582,11 +579,11 @@ func TestStopAndWait(t *testing.T) { t.Run("a couple fast jobs added, still returns immediately", func(t *testing.T) { cron := newWithSeconds() - cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) cron.Start() - cron.AddFunc("* * * * * *", func() {}) - cron.AddFunc("* * * * * *", func() {}) - cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) time.Sleep(time.Second) ctx := cron.Stop() select { @@ -598,10 +595,10 @@ func TestStopAndWait(t *testing.T) { t.Run("a couple fast jobs and a slow job added, waits for slow job", func(t *testing.T) { cron := newWithSeconds() - cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) cron.Start() - cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) - cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) + _, _ = cron.AddFunc("* * * * * *", func() {}) time.Sleep(time.Second) ctx := cron.Stop() @@ -625,10 +622,10 @@ func TestStopAndWait(t *testing.T) { t.Run("repeated calls to stop, waiting for completion and after", func(t *testing.T) { cron := newWithSeconds() - cron.AddFunc("* * * * * *", func() {}) - cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) + _, _ = cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) cron.Start() - cron.AddFunc("* * * * * *", func() {}) + _, _ = cron.AddFunc("* * * * * *", func() {}) time.Sleep(time.Second) ctx := cron.Stop() ctx2 := cron.Stop() diff --git a/backend/lib/cron/logger.go b/backend/lib/cron/logger.go index b4efcc05..46314da8 100644 --- a/backend/lib/cron/logger.go +++ b/backend/lib/cron/logger.go @@ -9,10 +9,10 @@ import ( ) // DefaultLogger is used by Cron if none is specified. -var DefaultLogger Logger = PrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags)) +var DefaultLogger = PrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags)) // DiscardLogger can be used by callers to discard all log messages. -var DiscardLogger Logger = PrintfLogger(log.New(ioutil.Discard, "", 0)) +var DiscardLogger = PrintfLogger(log.New(ioutil.Discard, "", 0)) // Logger is the interface used in this package for logging, so that any backend // can be plugged in. It is a subset of the github.com/go-logr/logr interface. diff --git a/backend/lib/cron/option_test.go b/backend/lib/cron/option_test.go index 8aef1682..57dbaa4b 100644 --- a/backend/lib/cron/option_test.go +++ b/backend/lib/cron/option_test.go @@ -30,7 +30,7 @@ func TestWithVerboseLogger(t *testing.T) { t.Error("expected provided logger") } - c.AddFunc("@every 1s", func() {}) + _, _ = c.AddFunc("@every 1s", func() {}) c.Start() time.Sleep(OneSecond) c.Stop() diff --git a/backend/lib/cron/parser_test.go b/backend/lib/cron/parser_test.go index 41c8c520..f95a54bb 100644 --- a/backend/lib/cron/parser_test.go +++ b/backend/lib/cron/parser_test.go @@ -304,6 +304,7 @@ func TestNormalizeFields_Errors(t *testing.T) { actual, err := normalizeFields(test.input, test.options) if err == nil { t.Errorf("expected an error, got none. results: %v", actual) + return } if !strings.Contains(err.Error(), test.err) { t.Errorf("expected error %q, got %q", test.err, err.Error()) diff --git a/backend/lib/cron/spec.go b/backend/lib/cron/spec.go index fa1e241e..9821a6a2 100644 --- a/backend/lib/cron/spec.go +++ b/backend/lib/cron/spec.go @@ -178,8 +178,8 @@ WRAP: // restrictions are satisfied by the given time. func dayMatches(s *SpecSchedule, t time.Time) bool { var ( - domMatch bool = 1< 0 - dowMatch bool = 1< 0 + domMatch = 1< 0 + dowMatch = 1< 0 ) if s.Dom&starBit > 0 || s.Dow&starBit > 0 { return domMatch && dowMatch diff --git a/backend/mock/node.go b/backend/mock/node.go index 789d0a9a..6c77c32e 100644 --- a/backend/mock/node.go +++ b/backend/mock/node.go @@ -188,7 +188,7 @@ func DeleteNode(c *gin.Context) { id := bson.ObjectId("5d429e6c19f7abede924fee2") for _, node := range NodeList { - if node.Id == bson.ObjectId(id) { + if node.Id == id { log.Infof("Delete a node") } } diff --git a/backend/model/log.go b/backend/model/log.go index ae6973b1..77e5094f 100644 --- a/backend/model/log.go +++ b/backend/model/log.go @@ -1,6 +1,7 @@ package model import ( + "crawlab/utils" "github.com/apex/log" "os" "runtime/debug" @@ -21,7 +22,7 @@ func GetLocalLog(logPath string) (fileBytes []byte, err error) { debug.PrintStack() return nil, err } - defer f.Close() + defer utils.Close(f) const bufLen = 2 * 1024 * 1024 logBuf := make([]byte, bufLen) diff --git a/backend/model/node.go b/backend/model/node.go index 7af93dbe..23dc3f8a 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -33,7 +33,6 @@ type Node struct { const ( Yes = "Y" - No = "N" ) // 当前节点是否为主节点 diff --git a/backend/model/spider.go b/backend/model/spider.go index 1f88acff..df1bf3e5 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -104,7 +104,7 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro defer s.Close() // 获取爬虫列表 - spiders := []Spider{} + var spiders []Spider if err := c.Find(filter).Skip(skip).Limit(limit).Sort("+name").All(&spiders); err != nil { debug.PrintStack() return spiders, 0, err @@ -225,7 +225,7 @@ func RemoveAllSpider() error { s, c := database.GetCol("spiders") defer s.Close() - spiders := []Spider{} + var spiders []Spider err := c.Find(nil).All(&spiders) if err != nil { log.Error("get all spiders error:" + err.Error()) @@ -256,15 +256,14 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) { s, c := database.GetCol("spiders") defer s.Close() - group := bson.M{ "$group": bson.M{ - "_id": "$type", + "_id": "$type", "count": bson.M{"$sum": 1}, }, } var types []*entity.SpiderType - if err := c.Pipe([]bson.M{ group}).All(&types); err != nil { + if err := c.Pipe([]bson.M{group}).All(&types); err != nil { log.Errorf("get spider types error: %s", err.Error()) debug.PrintStack() return nil, err diff --git a/backend/services/context/context.go b/backend/services/context/context.go index ce8eb72e..e8b37f8e 100644 --- a/backend/services/context/context.go +++ b/backend/services/context/context.go @@ -66,7 +66,7 @@ func (c *Context) failed(err error, httpCode int, variables ...interface{}) { "message": "error", "error": errStr, }) - break + case validator.ValidationErrors: validatorErrors := causeError.(validator.ValidationErrors) //firstError := validatorErrors[0].(validator.FieldError) @@ -75,7 +75,6 @@ func (c *Context) failed(err error, httpCode int, variables ...interface{}) { "message": "error", "error": validatorErrors.Error(), }) - break default: fmt.Println("deprecated....") c.AbortWithStatusJSON(httpCode, gin.H{ diff --git a/backend/services/log.go b/backend/services/log.go index 485cb7dd..6766cd9e 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -67,7 +67,7 @@ func DeleteLogPeriodically() { for _, fi := range rd { if fi.IsDir() { log.Info(filepath.Join(logDir, fi.Name())) - os.RemoveAll(filepath.Join(logDir, fi.Name())) + _ = os.RemoveAll(filepath.Join(logDir, fi.Name())) log.Info("Delete Log File Success") } } diff --git a/backend/services/log_test.go b/backend/services/log_test.go index 1e9a21c7..1e3f76d4 100644 --- a/backend/services/log_test.go +++ b/backend/services/log_test.go @@ -13,10 +13,8 @@ import ( func TestDeleteLogPeriodically(t *testing.T) { Convey("Test DeleteLogPeriodically", t, func() { - if err := config.InitConfig("../conf/config.yml"); err != nil { - log.Error("init config error:" + err.Error()) - panic(err) - } + err := config.InitConfig("../conf/config.yml") + So(err, ShouldBeNil) log.Info("初始化配置成功") logDir := viper.GetString("log.path") log.Info(logDir) @@ -28,24 +26,16 @@ func TestGetLocalLog(t *testing.T) { //create a log file for test logPath := "../logs/crawlab/test.log" f, err := os.Create(logPath) - defer f.Close() + defer utils.Close(f) if err != nil { - fmt.Println(err.Error()) + fmt.Println(err) } else { _, err = f.WriteString("This is for test") + fmt.Println(err) } - Convey("Test GetLocalLog", t, func() { - Convey("Test response", func() { - logStr, err := GetLocalLog(logPath) - log.Info(utils.BytesToString(logStr)) - fmt.Println(err) - So(err, ShouldEqual, nil) - - }) - }) //delete the test log file - os.Remove(logPath) + _ = os.Remove(logPath) } diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go index 37080bd6..3b1416eb 100644 --- a/backend/services/msg_handler/msg_log.go +++ b/backend/services/msg_handler/msg_log.go @@ -2,6 +2,7 @@ package msg_handler import ( "crawlab/constants" + "crawlab/database" "crawlab/entity" "crawlab/model" "crawlab/utils" @@ -39,7 +40,7 @@ func (g *Log) get() error { msgSd.Log = utils.BytesToString(logStr) } // 发布消息给主节点 - if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + if err := database.Pub(constants.ChannelMasterNode, msgSd); err != nil { return err } return nil diff --git a/backend/services/msg_handler/msg_system_info.go b/backend/services/msg_handler/msg_system_info.go index 6b88e2cf..9de5c74a 100644 --- a/backend/services/msg_handler/msg_system_info.go +++ b/backend/services/msg_handler/msg_system_info.go @@ -2,9 +2,9 @@ package msg_handler import ( "crawlab/constants" + "crawlab/database" "crawlab/entity" "crawlab/model" - "crawlab/utils" ) type SystemInfo struct { @@ -22,7 +22,7 @@ func (s *SystemInfo) Handle() error { NodeId: s.msg.NodeId, SysInfo: sysInfo, } - if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + if err := database.Pub(constants.ChannelMasterNode, msgSd); err != nil { return err } return nil diff --git a/backend/services/node.go b/backend/services/node.go index 53af8d32..de33f6d3 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -100,7 +100,7 @@ func handleNodeInfo(key string, data Data) { // 同个key可能因为并发,被注册多次 var nodes []model.Node _ = c.Find(bson.M{"key": key}).All(&nodes) - if nodes != nil && len(nodes) > 1 { + if len(nodes) > 1 { for _, node := range nodes { _ = c.RemoveId(node.Id) } @@ -149,7 +149,11 @@ func UpdateNodeData() { } // 获取redis的key key, err := register.GetRegister().GetKey() - + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } // 构造节点数据 data := Data{ Key: key, @@ -230,19 +234,19 @@ func InitNodeService() error { if model.IsMaster() { // 如果为主节点,订阅主节点通信频道 - if err := utils.Sub(constants.ChannelMasterNode, MasterNodeCallback); err != nil { + if err := database.Sub(constants.ChannelMasterNode, MasterNodeCallback); err != nil { return err } } else { // 若为工作节点,订阅单独指定通信频道 channel := constants.ChannelWorkerNode + node.Id.Hex() - if err := utils.Sub(channel, WorkerNodeCallback); err != nil { + if err := database.Sub(channel, WorkerNodeCallback); err != nil { return err } } // 订阅全通道 - if err := utils.Sub(constants.ChannelAllNode, WorkerNodeCallback); err != nil { + if err := database.Sub(constants.ChannelAllNode, WorkerNodeCallback); err != nil { return err } diff --git a/backend/services/spider.go b/backend/services/spider.go index 7aea456f..84d218bb 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -82,7 +82,7 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre log.Infof("can't opened this file") return err } - defer f.Close() + defer utils.Close(f) s := make([]byte, 4096) for { switch nr, err := f.Read(s[:]); true { @@ -173,7 +173,7 @@ func RemoveSpider(id string) error { Type: constants.MsgTypeRemoveSpider, SpiderId: id, } - if err := utils.Pub(constants.ChannelAllNode, msg); err != nil { + if err := database.Pub(constants.ChannelAllNode, msg); err != nil { return err } diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index 53c83b9a..cce025dc 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -28,7 +28,7 @@ func (s *SpiderSync) CreateMd5File(md5 string) { fileName := filepath.Join(path, Md5File) file := utils.OpenFile(fileName) - defer file.Close() + defer utils.Close(file) if file != nil { if _, err := file.WriteString(md5 + "\n"); err != nil { log.Errorf("file write string error: %s", err.Error()) @@ -80,7 +80,7 @@ func (s *SpiderSync) Download() { defer session.Close() f, err := gf.OpenId(bson.ObjectIdHex(fileId)) - defer f.Close() + defer utils.Close(f) if err != nil { log.Errorf("open file id: " + fileId + ", spider id:" + spiderId + ", error: " + err.Error()) debug.PrintStack() @@ -99,7 +99,7 @@ func (s *SpiderSync) Download() { // 创建临时文件 tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") tmpFile := utils.OpenFile(tmpFilePath) - defer tmpFile.Close() + defer utils.Close(tmpFile) // 将该文件写入临时文件 if _, err := io.Copy(tmpFile, f); err != nil { diff --git a/backend/services/task.go b/backend/services/task.go index ce62a95e..80f063ff 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -122,7 +122,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e HandleTaskError(t, err) return err } - defer fLog.Close() + defer utils.Close(fLog) cmd.Stdout = fLog cmd.Stderr = fLog @@ -432,33 +432,29 @@ func ExecuteTask(id int) { func GetTaskLog(id string) (logStr string, err error) { task, err := model.GetTask(id) + if err != nil { - return "", err + return } - logStr = "" if IsMasterNode(task.NodeId.Hex()) { // 若为主节点,获取本机日志 logBytes, err := model.GetLocalLog(task.LogPath) - logStr = utils.BytesToString(logBytes) if err != nil { log.Errorf(err.Error()) logStr = err.Error() - // return "", err } else { logStr = utils.BytesToString(logBytes) } - - } else { - // 若不为主节点,获取远端日志 - logStr, err = GetRemoteLog(task) - if err != nil { - log.Errorf(err.Error()) - return "", err - } + return logStr, err } + // 若不为主节点,获取远端日志 + logStr, err = GetRemoteLog(task) + if err != nil { + log.Errorf(err.Error()) - return logStr, nil + } + return logStr, err } func CancelTask(id string) (err error) { diff --git a/backend/services/user.go b/backend/services/user.go index 4811f767..231300ee 100644 --- a/backend/services/user.go +++ b/backend/services/user.go @@ -17,10 +17,7 @@ func InitUserService() error { Password: utils.EncryptPassword("admin"), Role: constants.RoleAdmin, } - if err := adminUser.Add(); err != nil { - // pass - } - return nil + return adminUser.Add() } func MakeToken(user *model.User) (tokenStr string, err error) { token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ diff --git a/backend/utils/file.go b/backend/utils/file.go index d65e7ab1..babc0d69 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -21,7 +21,7 @@ func RemoveFiles(path string) { // 读取文件一行 func ReadFileOneLine(fileName string) string { file := OpenFile(fileName) - defer file.Close() + defer Close(file) buf := bufio.NewReader(file) line, err := buf.ReadString('\n') if err != nil { @@ -57,10 +57,7 @@ func CreateFilePath(filePath string) { func Exists(path string) bool { _, err := os.Stat(path) //os.Stat获取文件信息 if err != nil { - if os.IsExist(err) { - return true - } - return false + return os.IsExist(err) } return true } @@ -88,7 +85,7 @@ func DeCompressByPath(tarFile, dest string) error { if err != nil { return err } - defer srcFile.Close() + defer Close(srcFile) return DeCompress(srcFile, dest) } @@ -112,7 +109,7 @@ func DeCompress(srcFile *os.File, dstPath string) error { debug.PrintStack() return err } - defer zipFile.Close() + defer Close(zipFile) // 遍历zip内所有文件和目录 for _, innerFile := range zipFile.File { @@ -156,7 +153,7 @@ func DeCompress(srcFile *os.File, dstPath string) error { debug.PrintStack() continue } - defer newFile.Close() + defer Close(newFile) // 拷贝该文件到新文件中 if _, err := io.Copy(newFile, srcFile); err != nil { @@ -184,9 +181,9 @@ func DeCompress(srcFile *os.File, dstPath string) error { //dest 压缩文件存放地址 func Compress(files []*os.File, dest string) error { d, _ := os.Create(dest) - defer d.Close() + defer Close(d) w := zip.NewWriter(d) - defer w.Close() + defer Close(w) for _, file := range files { err := _Compress(file, "", w) if err != nil { @@ -234,7 +231,7 @@ func _Compress(file *os.File, prefix string, zw *zip.Writer) error { return err } _, err = io.Copy(writer, file) - file.Close() + Close(file) if err != nil { debug.PrintStack() return err diff --git a/backend/utils/file_test.go b/backend/utils/file_test.go index 64f2df6d..4af32d0d 100644 --- a/backend/utils/file_test.go +++ b/backend/utils/file_test.go @@ -60,7 +60,7 @@ func TestCompress(t *testing.T) { So(er, ShouldEqual, nil) }) }) - os.RemoveAll("testCompress") + _ = os.RemoveAll("testCompress") } func Zip(zipFile string, fileList []string) error { @@ -69,16 +69,11 @@ func Zip(zipFile string, fileList []string) error { if err != nil { log.Fatal() } - defer fw.Close() + defer Close(fw) // 实例化新的 zip.Writer zw := zip.NewWriter(fw) - defer func() { - // 检测一下是否成功关闭 - if err := zw.Close(); err != nil { - log.Fatalln(err) - } - }() + defer Close(zw) for _, fileName := range fileList { fr, err := os.Open(fileName) @@ -91,6 +86,9 @@ func Zip(zipFile string, fileList []string) error { } // 写入文件的头信息 fh, err := zip.FileInfoHeader(fi) + if err != nil { + return err + } w, err := zw.CreateHeader(fh) if err != nil { return err @@ -106,6 +104,10 @@ func Zip(zipFile string, fileList []string) error { func TestDeCompress(t *testing.T) { err := os.Mkdir("testDeCompress", os.ModePerm) + if err != nil { + t.Error(err) + + } err = Zip("demo.zip", []string{}) if err != nil { t.Error("create zip file failed") @@ -121,7 +123,7 @@ func TestDeCompress(t *testing.T) { err := DeCompress(tmpFile, dstPath) So(err, ShouldEqual, nil) }) - os.RemoveAll("testDeCompress") - os.Remove("demo.zip") + _ = os.RemoveAll("testDeCompress") + _ = os.Remove("demo.zip") } diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go index edc6200e..541d9002 100644 --- a/backend/utils/helpers.go +++ b/backend/utils/helpers.go @@ -1,12 +1,11 @@ package utils import ( - "context" - "crawlab/database" "crawlab/entity" "encoding/json" "github.com/apex/log" "github.com/gomodule/redigo/redis" + "io" "runtime/debug" "unsafe" ) @@ -35,21 +34,9 @@ func GetMessage(message redis.Message) *entity.NodeMessage { return &msg } -func Pub(channel string, msg entity.NodeMessage) error { - if _, err := database.RedisClient.Publish(channel, GetJson(msg)); err != nil { - log.Errorf("publish redis error: %s", err.Error()) - debug.PrintStack() - return err +func Close(c io.Closer) { + err := c.Close() + if err != nil { + log.WithError(err).Error("关闭资源文件失败。") } - return nil -} - -func Sub(channel string, consume database.ConsumeFunc) error { - ctx := context.Background() - if err := database.RedisClient.Subscribe(ctx, consume, channel); err != nil { - log.Errorf("subscribe redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil } diff --git a/backend/utils/model.go b/backend/utils/model.go index 867ae620..21a295d6 100644 --- a/backend/utils/model.go +++ b/backend/utils/model.go @@ -12,15 +12,16 @@ func IsObjectIdNull(id bson.ObjectId) bool { } func InterfaceToString(value interface{}) string { - switch value.(type) { + switch realValue := value.(type) { case bson.ObjectId: - return value.(bson.ObjectId).Hex() + return realValue.Hex() case string: - return value.(string) + return realValue case int: - return strconv.Itoa(value.(int)) + return strconv.Itoa(realValue) case time.Time: - return value.(time.Time).String() + return realValue.String() + default: + return "" } - return "" } diff --git a/backend/utils/user.go b/backend/utils/user.go index 9d1bdceb..46933f9e 100644 --- a/backend/utils/user.go +++ b/backend/utils/user.go @@ -8,7 +8,7 @@ import ( func EncryptPassword(str string) string { w := md5.New() - io.WriteString(w, str) + _, _ = io.WriteString(w, str) md5str := fmt.Sprintf("%x", w.Sum(nil)) return md5str } From 738d607d7851c2a58cd4452732697577d1c0c1c8 Mon Sep 17 00:00:00 2001 From: yaziming Date: Fri, 11 Oct 2019 16:54:13 +0800 Subject: [PATCH 47/81] ignore add super admin account error --- backend/services/user.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/services/user.go b/backend/services/user.go index 231300ee..61fd952e 100644 --- a/backend/services/user.go +++ b/backend/services/user.go @@ -17,7 +17,8 @@ func InitUserService() error { Password: utils.EncryptPassword("admin"), Role: constants.RoleAdmin, } - return adminUser.Add() + _ = adminUser.Add() + return nil } func MakeToken(user *model.User) (tokenStr string, err error) { token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ From 973251a0fbe7a2184ac0da09e0404a17c736aee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 21:57:25 +0800 Subject: [PATCH 48/81] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=8A=82=E7=82=B9?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/node.go | 6 ++++-- backend/model/schedule.go | 19 ++++++++++++++----- backend/model/spider.go | 11 ++++++++--- backend/model/task.go | 13 ++++--------- backend/routes/node.go | 10 +++++----- backend/services/node.go | 4 +++- 6 files changed, 38 insertions(+), 25 deletions(-) diff --git a/backend/model/node.go b/backend/model/node.go index 7af93dbe..1a1ebce5 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -157,10 +157,12 @@ func GetNodeList(filter interface{}) ([]Node, error) { } func GetNode(id bson.ObjectId) (Node, error) { + var node Node + if id.Hex() == "" { + return node, nil + } s, c := database.GetCol("nodes") defer s.Close() - - var node Node if err := c.FindId(id).One(&node); err != nil { if err != mgo.ErrNotFound { log.Errorf(err.Error()) diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 6415e22b..8ec065fb 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -38,6 +38,12 @@ func (sch *Schedule) Save() error { return nil } +func (sch *Schedule) Delete() error { + s, c := database.GetCol("schedules") + defer s.Close() + return c.RemoveId(sch.Id) +} + func GetScheduleList(filter interface{}) ([]Schedule, error) { s, c := database.GetCol("schedules") defer s.Close() @@ -47,11 +53,12 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { return schedules, err } - for i, schedule := range schedules { + var schs []Schedule + for _, schedule := range schedules { // 获取节点名称 if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) { // 选择所有节点 - schedules[i].NodeName = "All Nodes" + schedule.NodeName = "All Nodes" } else { // 选择单一节点 node, err := GetNode(schedule.NodeId) @@ -59,7 +66,7 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { log.Errorf(err.Error()) continue } - schedules[i].NodeName = node.Name + schedule.NodeName = node.Name } // 获取爬虫名称 @@ -67,11 +74,13 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { if err != nil { log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error()) debug.PrintStack() + _ = schedule.Delete() continue } - schedules[i].SpiderName = spider.Name + schedule.SpiderName = spider.Name + schs = append(schs, schedule) } - return schedules, nil + return schs, nil } func GetSchedule(id bson.ObjectId) (Schedule, error) { diff --git a/backend/model/spider.go b/backend/model/spider.go index 1f88acff..efd93c3d 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -98,6 +98,12 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } +func (spider *Spider) Delete() error { + s, c := database.GetCol("spiders") + defer s.Close() + return c.RemoveId(spider.Id) +} + // 爬虫列表 func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, error) { s, c := database.GetCol("spiders") @@ -256,15 +262,14 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) { s, c := database.GetCol("spiders") defer s.Close() - group := bson.M{ "$group": bson.M{ - "_id": "$type", + "_id": "$type", "count": bson.M{"$sum": 1}, }, } var types []*entity.SpiderType - if err := c.Pipe([]bson.M{ group}).All(&types); err != nil { + if err := c.Pipe([]bson.M{group}).All(&types); err != nil { log.Errorf("get spider types error: %s", err.Error()) debug.PrintStack() return nil, err diff --git a/backend/model/task.go b/backend/model/task.go index f568b7fe..df046ecc 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -4,7 +4,6 @@ import ( "crawlab/constants" "crawlab/database" "github.com/apex/log" - "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "runtime/debug" "time" @@ -118,20 +117,16 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas for i, task := range tasks { // 获取爬虫名称 spider, err := task.GetSpider() - if err == mgo.ErrNotFound { - // do nothing - } else if err != nil { - return tasks, err + if spider.Id.Hex() == "" || err != nil { + _ = spider.Delete() } else { tasks[i].SpiderName = spider.DisplayName } // 获取节点名称 node, err := task.GetNode() - if err == mgo.ErrNotFound { - // do nothing - } else if err != nil { - return tasks, err + if node.Id.Hex() == "" || err != nil { + _ = task.Delete() } else { tasks[i].NodeName = node.Name } diff --git a/backend/routes/node.go b/backend/routes/node.go index f86c152d..7d030773 100644 --- a/backend/routes/node.go +++ b/backend/routes/node.go @@ -15,9 +15,9 @@ func GetNodeList(c *gin.Context) { return } - for i, node := range nodes { - nodes[i].IsMaster = services.IsMasterNode(node.Id.Hex()) - } + //for i, node := range nodes { + // nodes[i].IsMaster = services.IsMasterNode(node.Id.Hex()) + //} c.JSON(http.StatusOK, Response{ Status: "ok", @@ -109,11 +109,11 @@ func GetSystemInfo(c *gin.Context) { }) } -func DeleteNode(c *gin.Context) { +func DeleteNode(c *gin.Context) { id := c.Param("id") node, err := model.GetNode(bson.ObjectIdHex(id)) if err != nil { - HandleError(http.StatusInternalServerError, c ,err) + HandleError(http.StatusInternalServerError, c, err) return } err = node.Delete() diff --git a/backend/services/node.go b/backend/services/node.go index 7fc134c5..144cdbd8 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -88,6 +88,8 @@ func UpdateNodeStatus() { handleNodeInfo(key, data) } + // 重新获取list + list, _ = database.RedisClient.HKeys("nodes") // 重置不在redis的key为offline model.ResetNodeStatusToOffline(list) } @@ -225,7 +227,7 @@ func InitNodeService() error { } // 首次更新节点数据(注册到Redis) - UpdateNodeData() + // UpdateNodeData() // 获取当前节点 node, err := model.GetCurrentNode() From 311f72da19094e3fa05ab4af49812f58843d8d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 21:57:59 +0800 Subject: [PATCH 49/81] =?UTF-8?q?fix=20=E7=9B=B4=E6=8E=A5=E7=82=B9?= =?UTF-8?q?=E5=87=BB=E5=AE=9A=E6=97=B6=E4=BB=BB=E5=8A=A1=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E6=89=BE=E5=88=B0=E7=88=AC=E8=99=AB=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/schedule/ScheduleList.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index c44d46e2..743a186e 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -269,7 +269,7 @@ export default { }, created () { this.$store.dispatch('schedule/getScheduleList') - // this.$store.dispatch('spider/getSpiderList') + this.$store.dispatch('spider/getSpiderList') this.$store.dispatch('node/getNodeList') } } From 7dae91ab50a99901e03a72d52c673167ae4267de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 22:48:06 +0800 Subject: [PATCH 50/81] fix --- backend/model/schedule.go | 9 +++++++++ backend/routes/schedule.go | 6 +++--- backend/services/node.go | 14 +++++++------- backend/services/schedule.go | 18 +++++++++++++++--- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 8ec065fb..951cb043 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -16,6 +16,7 @@ type Schedule struct { Description string `json:"description" bson:"description"` SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` NodeId bson.ObjectId `json:"node_id" bson:"node_id"` + NodeKey string `json:"node_key" bson:"node_key"` Cron string `json:"cron" bson:"cron"` EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` Param string `json:"param" bson:"param"` @@ -113,9 +114,17 @@ func AddSchedule(item Schedule) error { s, c := database.GetCol("schedules") defer s.Close() + node, err := GetNode(item.NodeId) + if err != nil { + log.Errorf("get node error: %s", err.Error()) + debug.PrintStack() + return nil + } + item.Id = bson.NewObjectId() item.CreateTs = time.Now() item.UpdateTs = time.Now() + item.NodeKey = node.Key if err := c.Insert(&item); err != nil { debug.PrintStack() diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go index b447abb5..4ca245b3 100644 --- a/backend/routes/schedule.go +++ b/backend/routes/schedule.go @@ -81,9 +81,9 @@ func PutSchedule(c *gin.Context) { } // 如果node_id为空,则置为空ObjectId - if item.NodeId == "" { - item.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } + //if item.NodeId == "" { + // item.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) + //} // 更新数据库 if err := model.AddSchedule(item); err != nil { diff --git a/backend/services/node.go b/backend/services/node.go index 144cdbd8..04cbc0ef 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -100,13 +100,13 @@ func handleNodeInfo(key string, data Data) { defer s.Close() // 同个key可能因为并发,被注册多次 - var nodes []model.Node - _ = c.Find(bson.M{"key": key}).All(&nodes) - if nodes != nil && len(nodes) > 1 { - for _, node := range nodes { - _ = c.RemoveId(node.Id) - } - } + //var nodes []model.Node + //_ = c.Find(bson.M{"key": key}).All(&nodes) + //if nodes != nil && len(nodes) > 1 { + // for _, node := range nodes { + // _ = c.RemoveId(node.Id) + // } + //} var node model.Node if err := c.Find(bson.M{"key": key}).One(&node); err != nil { diff --git a/backend/services/schedule.go b/backend/services/schedule.go index 58cdf628..f011f02a 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -17,7 +17,19 @@ type Scheduler struct { func AddTask(s model.Schedule) func() { return func() { - nodeId := s.NodeId + node, err := model.GetNodeByKey(s.NodeKey) + if err != nil || node.Id.Hex() == "" { + log.Errorf("get node by key error: %s", err.Error()) + debug.PrintStack() + return + } + + spider := model.GetSpiderByName(s.SpiderName) + if spider == nil || spider.Id.Hex() == "" { + log.Errorf("get spider by name error: %s", err.Error()) + debug.PrintStack() + return + } // 生成任务ID id := uuid.NewV4() @@ -25,8 +37,8 @@ func AddTask(s model.Schedule) func() { // 生成任务模型 t := model.Task{ Id: id.String(), - SpiderId: s.SpiderId, - NodeId: nodeId, + SpiderId: spider.Id, + NodeId: node.Id, Status: constants.StatusPending, Param: s.Param, } From 8eef98e082c9dcdd7423797cfcbb70cc100aa277 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 22:48:22 +0800 Subject: [PATCH 51/81] fix --- frontend/src/views/schedule/ScheduleList.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index 743a186e..d16a6f69 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -16,7 +16,7 @@ - + Date: Fri, 11 Oct 2019 23:22:25 +0800 Subject: [PATCH 52/81] =?UTF-8?q?fix=20=E5=AE=9A=E6=97=B6=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/log.go | 11 +++++++++-- backend/services/task.go | 3 +++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/backend/services/log.go b/backend/services/log.go index 485cb7dd..81140c0a 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "runtime/debug" + "time" ) // 任务日志频道映射 @@ -45,8 +46,14 @@ func GetRemoteLog(task model.Task) (logStr string, err error) { // 生成频道,等待获取log ch := TaskLogChanMap.ChanBlocked(task.Id) - // 此处阻塞,等待结果 - logStr = <-ch + select { + case logStr = <-ch: + log.Infof("get remote log") + break + case <-time.After(5 * time.Second): + logStr = "get remote log timeout" + break + } return logStr, nil } diff --git a/backend/services/task.go b/backend/services/task.go index f515f48d..12f0330e 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -285,6 +285,9 @@ func ExecuteTask(id int) { // 节点队列任务 var msg string msg, err = database.RedisClient.LPop(queueCur) + if msg != "" { + log.Infof("queue cur: %s", msg) + } if err != nil { if msg == "" { // 节点队列没有任务,获取公共队列任务 From d853948718b440a0f42a5c5dbf9f7e57df191459 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 23:23:04 +0800 Subject: [PATCH 53/81] =?UTF-8?q?fix=20=E5=AE=9A=E6=97=B6=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/schedule/ScheduleList.vue | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index d16a6f69..4d283966 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -132,6 +132,7 @@ From 5bd30d8046ef5bafb483223a5a66f1f8531ed036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Fri, 11 Oct 2019 23:53:07 +0800 Subject: [PATCH 54/81] =?UTF-8?q?fix=20=E5=AE=9A=E6=97=B6=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/schedule.go | 7 +++++++ backend/routes/schedule.go | 7 +++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 951cb043..bcd051e3 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -104,6 +104,13 @@ func UpdateSchedule(id bson.ObjectId, item Schedule) error { return err } + node, err := GetNode(item.NodeId) + if err != nil { + log.Errorf("get node error: %s", err.Error()) + debug.PrintStack() + return nil + } + item.NodeKey = node.Key if err := item.Save(); err != nil { return err } diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go index 4ca245b3..24df0c0f 100644 --- a/backend/routes/schedule.go +++ b/backend/routes/schedule.go @@ -1,7 +1,6 @@ package routes import ( - "crawlab/constants" "crawlab/model" "crawlab/services" "github.com/gin-gonic/gin" @@ -49,9 +48,9 @@ func PostSchedule(c *gin.Context) { newItem.Id = bson.ObjectIdHex(id) // 如果node_id为空,则置为空ObjectId - if newItem.NodeId == "" { - newItem.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } + //if newItem.NodeId == "" { + // newItem.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) + //} // 更新数据库 if err := model.UpdateSchedule(bson.ObjectIdHex(id), newItem); err != nil { From 9d8b0fd13767f60b4bf69ba69005b5df3acf4ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Sat, 12 Oct 2019 06:28:53 +0800 Subject: [PATCH 55/81] =?UTF-8?q?=E4=BF=AE=E6=94=B9Dockerfile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 893cf6fe..52c668e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,7 @@ ADD ./frontend /app WORKDIR /app # install frontend +RUN npm config set unsafe-perm true RUN npm install -g yarn && yarn install RUN npm run build:prod @@ -56,4 +57,4 @@ EXPOSE 8080 EXPOSE 8000 # start backend -CMD ["/bin/sh", "/app/docker_init.sh"] \ No newline at end of file +CMD ["/bin/sh", "/app/docker_init.sh"] From decb662c12361e4c6fc0290c8c885f319d1c7293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Sat, 12 Oct 2019 06:29:52 +0800 Subject: [PATCH 56/81] =?UTF-8?q?=E4=BF=AE=E6=94=B9Dockerfile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 52c668e9..0809a0ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ WORKDIR /app # install frontend RUN npm config set unsafe-perm true -RUN npm install -g yarn && yarn install +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org RUN npm run build:prod From 6af06efc17685a9e232e8c2b5fd819ec7d2d1674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Sat, 12 Oct 2019 07:16:11 +0800 Subject: [PATCH 57/81] =?UTF-8?q?fix=20worker=E6=97=A0=E6=B3=95=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E5=88=B0=E5=BD=93=E5=89=8DNode=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/node.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/node.go b/backend/services/node.go index 04cbc0ef..d3409ed2 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -227,7 +227,7 @@ func InitNodeService() error { } // 首次更新节点数据(注册到Redis) - // UpdateNodeData() + UpdateNodeData() // 获取当前节点 node, err := model.GetCurrentNode() From d4c152f93a15d60994c4617fe4f4d9a6ba77ba80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Tue, 15 Oct 2019 17:23:03 +0800 Subject: [PATCH 58/81] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/node.go | 14 ++-- backend/services/task.go | 173 ++++++++++++++++++++++++--------------- 2 files changed, 113 insertions(+), 74 deletions(-) diff --git a/backend/services/node.go b/backend/services/node.go index d3409ed2..8b1f998a 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -100,13 +100,13 @@ func handleNodeInfo(key string, data Data) { defer s.Close() // 同个key可能因为并发,被注册多次 - //var nodes []model.Node - //_ = c.Find(bson.M{"key": key}).All(&nodes) - //if nodes != nil && len(nodes) > 1 { - // for _, node := range nodes { - // _ = c.RemoveId(node.Id) - // } - //} + var nodes []model.Node + _ = c.Find(bson.M{"key": key}).All(&nodes) + if nodes != nil && len(nodes) > 1 { + for _, node := range nodes { + _ = c.RemoveId(node.Id) + } + } var node model.Node if err := c.Find(bson.M{"key": key}).One(&node); err != nil { diff --git a/backend/services/task.go b/backend/services/task.go index 12f0330e..b79dbe7a 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -100,10 +100,104 @@ func AssignTask(task model.Task) error { return nil } +// 设置环境变量 +func SetEnv(cmd *exec.Cmd, envs []model.Env, taskId string, dataCol string) *exec.Cmd { + // 默认环境变量 + cmd.Env = append(cmd.Env, "CRAWLAB_TASK_ID="+taskId) + cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+dataCol) + cmd.Env = append(cmd.Env, "PYTHONUNBUFFERED=0") + cmd.Env = append(cmd.Env, "PYTHONIOENCODING=utf-8") + + //任务环境变量 + for _, env := range envs { + cmd.Env = append(cmd.Env, env.Name+"="+env.Value) + } + + // TODO 全局环境变量 + return cmd +} + +func SetLogConfig(cmd *exec.Cmd, path string) error { + fLog, err := os.Create(path) + if err != nil { + log.Errorf("create task log file error: %s", path) + debug.PrintStack() + return err + } + defer fLog.Close() + cmd.Stdout = fLog + cmd.Stderr = fLog + return nil +} + +func FinishOrCancelTask(ch chan string, cmd *exec.Cmd, t model.Task) { + // 传入信号,此处阻塞 + signal := <-ch + log.Infof("process received signal: %s", signal) + + if signal == constants.TaskCancel && cmd.Process != nil { + // 取消进程 + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil { + log.Errorf("process kill error: %s", err.Error()) + debug.PrintStack() + + t.Error = "kill process error: " + err.Error() + t.Status = constants.StatusError + } else { + t.Error = "user kill the process ..." + t.Status = constants.StatusCancelled + } + } else { + // 保存任务 + t.Status = constants.StatusFinished + } + + t.FinishTs = time.Now() + _ = t.Save() +} + +func StartTaskProcess(cmd *exec.Cmd, t model.Task) error { + if err := cmd.Start(); err != nil { + log.Errorf("start spider error:{}", err.Error()) + debug.PrintStack() + + t.Error = "start task error: " + err.Error() + t.Status = constants.StatusError + t.FinishTs = time.Now() + _ = t.Save() + return err + } + return nil +} + +func WaitTaskProcess(cmd *exec.Cmd, t model.Task) error { + if err := cmd.Wait(); err != nil { + log.Errorf("wait process finish error: %s", err.Error()) + debug.PrintStack() + + if exitError, ok := err.(*exec.ExitError); ok { + exitCode := exitError.ExitCode() + log.Errorf("exit error, exit code: %d", exitCode) + + // 非kill 的错误类型 + if exitCode != -1 { + // 非手动kill保存为错误状态 + t.Error = err.Error() + t.FinishTs = time.Now() + t.Status = constants.StatusError + _ = t.Save() + } + } + + return err + } + return nil +} + // 执行shell命令 func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (err error) { - log.Infof("cwd: " + cwd) - log.Infof("cmd: " + cmdStr) + log.Infof("cwd: %s", cwd) + log.Infof("cmd: %s", cmdStr) // 生成执行命令 var cmd *exec.Cmd @@ -116,84 +210,29 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e // 工作目录 cmd.Dir = cwd - // 指定stdout, stderr日志位置 - fLog, err := os.Create(t.LogPath) - if err != nil { - HandleTaskError(t, err) + // 日志配置 + if err := SetLogConfig(cmd, t.LogPath); err != nil { return err } - defer fLog.Close() - cmd.Stdout = fLog - cmd.Stderr = fLog - // 添加默认环境变量 - cmd.Env = append(cmd.Env, "CRAWLAB_TASK_ID="+t.Id) - cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+s.Col) - - // 添加任务环境变量 - for _, env := range s.Envs { - cmd.Env = append(cmd.Env, env.Name+"="+env.Value) - } + // 环境变量配置 + cmd = SetEnv(cmd, s.Envs, t.Id, s.Col) // 起一个goroutine来监控进程 ch := utils.TaskExecChanMap.ChanBlocked(t.Id) - go func() { - // 传入信号,此处阻塞 - signal := <-ch - log.Infof("cancel process signal: %s", signal) - if signal == constants.TaskCancel && cmd.Process != nil { - // 取消进程 - if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil { - log.Errorf("process kill error: %s", err.Error()) - debug.PrintStack() - } - t.Status = constants.StatusCancelled - t.Error = "user kill the process ..." - } else { - // 保存任务 - t.Status = constants.StatusFinished - } - t.FinishTs = time.Now() - if err := t.Save(); err != nil { - log.Infof("save task error: %s", err.Error()) - debug.PrintStack() - return - } - }() - // 在选择所有节点执行的时候,实际就是随机一个节点执行的, + go FinishOrCancelTask(ch, cmd, t) + + // kill的时候,可以kill所有的子进程 cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - // 异步启动进程 - if err := cmd.Start(); err != nil { - log.Errorf("start spider error:{}", err.Error()) - debug.PrintStack() + // 启动进程 + if err := StartTaskProcess(cmd, t); err != nil { return err } - // 保存pid到task - t.Pid = cmd.Process.Pid - if err := t.Save(); err != nil { - log.Errorf("save task pid error: %s", err.Error()) - debug.PrintStack() - return err - } // 同步等待进程完成 - if err := cmd.Wait(); err != nil { - log.Errorf("wait process finish error: %s", err.Error()) - debug.PrintStack() - if exitError, ok := err.(*exec.ExitError); ok { - exitCode := exitError.ExitCode() - log.Errorf("exit error, exit code: %d", exitCode) - // 非kill 的错误类型 - if exitCode != -1 { - // 非手动kill保存为错误状态 - t.Error = err.Error() - t.FinishTs = time.Now() - t.Status = constants.StatusError - _ = t.Save() - } - } + if err := WaitTaskProcess(cmd, t); err != nil { return err } ch <- constants.TaskFinish From f92d2d426643e873a894b2357b060df2c2c7ff76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 09:45:50 +0800 Subject: [PATCH 59/81] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 49 +++++++++++++--------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index b79dbe7a..50b902cb 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -311,70 +311,53 @@ func ExecuteTask(id int) { // 获取当前节点 node, err := model.GetCurrentNode() if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) + log.Errorf("execute task get current node error: %s", err.Error()) + debug.PrintStack() return } - // 公共队列 - queuePub := "tasks:public" - // 节点队列 queueCur := "tasks:node:" + node.Id.Hex() - // 节点队列任务 var msg string - msg, err = database.RedisClient.LPop(queueCur) - if msg != "" { - log.Infof("queue cur: %s", msg) - } - if err != nil { - if msg == "" { - // 节点队列没有任务,获取公共队列任务 - msg, err = database.RedisClient.LPop(queuePub) - if err != nil { - if msg == "" { - // 公共队列没有任务 - log.Debugf(GetWorkerPrefix(id) + "没有任务...") - return - } else { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() - return - } - } - } else { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() - return + if msg, err = database.RedisClient.LPop(queueCur); err != nil { + log.Errorf("get current node task error: %s", err.Error()) + // 节点队列没有任务,获取公共队列任务 + queuePub := "tasks:public" + if msg, err = database.RedisClient.LPop(queuePub); err != nil { + log.Errorf("get public task error: %s", err.Error()) } } + if msg == "" { + return + } + // 反序列化 tMsg := TaskMessage{} if err := json.Unmarshal([]byte(msg), &tMsg); err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() + log.Errorf("json string to struct error: %s", err.Error()) return } // 获取任务 t, err := model.GetTask(tMsg.Id) if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) + log.Errorf("execute task, get task error: %s", err.Error()) return } // 获取爬虫 spider, err := t.GetSpider() if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) + log.Errorf("execute task, get spider error: %s", err.Error()) return } // 创建日志目录 fileDir, err := MakeLogDir(t) if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) + log.Errorf("execute task, make log dir error: %s", err.Error()) return } From 467796b55a5b84bb299fbad5b4e9d49a1269e0bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 10:11:00 +0800 Subject: [PATCH 60/81] =?UTF-8?q?fix=20=E4=B8=80=E4=BA=9B=E4=B8=9C?= =?UTF-8?q?=E8=A5=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/backend/services/task.go b/backend/services/task.go index 50b902cb..78c11f17 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -124,7 +124,6 @@ func SetLogConfig(cmd *exec.Cmd, path string) error { debug.PrintStack() return err } - defer fLog.Close() cmd.Stdout = fLog cmd.Stderr = fLog return nil @@ -247,6 +246,7 @@ func MakeLogDir(t model.Task) (fileDir string, err error) { // 如果日志目录不存在,生成该目录 if !utils.Exists(fileDir) { if err := os.MkdirAll(fileDir, 0777); err != nil { + log.Errorf("execute task, make log dir error: %s", err.Error()) debug.PrintStack() return "", err } @@ -321,11 +321,9 @@ func ExecuteTask(id int) { // 节点队列任务 var msg string if msg, err = database.RedisClient.LPop(queueCur); err != nil { - log.Errorf("get current node task error: %s", err.Error()) // 节点队列没有任务,获取公共队列任务 queuePub := "tasks:public" if msg, err = database.RedisClient.LPop(queuePub); err != nil { - log.Errorf("get public task error: %s", err.Error()) } } @@ -355,24 +353,13 @@ func ExecuteTask(id int) { } // 创建日志目录 - fileDir, err := MakeLogDir(t) - if err != nil { - log.Errorf("execute task, make log dir error: %s", err.Error()) + var fileDir string + if fileDir, err = MakeLogDir(t); err != nil { return } - // 获取日志文件路径 t.LogPath = GetLogFilePaths(fileDir) - // 创建日志目录文件夹 - fileStdoutDir := filepath.Dir(t.LogPath) - if !utils.Exists(fileStdoutDir) { - if err := os.MkdirAll(fileStdoutDir, os.ModePerm); err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - return - } - } - // 工作目录 cwd := filepath.Join( viper.GetString("spider.path"), From 3fc97e78d8cd5fd4c3b41c18dd61873b14572c59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 11:26:15 +0800 Subject: [PATCH 61/81] =?UTF-8?q?fix=20=E6=97=B6=E5=8C=BA=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/services/task.go b/backend/services/task.go index 78c11f17..5bcb54e8 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -107,6 +107,7 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, taskId string, dataCol string) *exe cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+dataCol) cmd.Env = append(cmd.Env, "PYTHONUNBUFFERED=0") cmd.Env = append(cmd.Env, "PYTHONIOENCODING=utf-8") + cmd.Env = append(cmd.Env, "TZ=Asia/Shanghai") //任务环境变量 for _, env := range envs { @@ -203,6 +204,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e if runtime.GOOS == constants.Windows { cmd = exec.Command("cmd", "/C", cmdStr) } else { + cmd = exec.Command("") cmd = exec.Command("sh", "-c", cmdStr) } From b3b2efd6112c32b51a83d0bc921447d6f29e69e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 15:27:12 +0800 Subject: [PATCH 62/81] =?UTF-8?q?fix=20=E5=AE=9A=E6=97=B6=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E5=8F=82=E6=95=B0=E9=94=99=E8=AF=AF=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/node.go | 14 +++++++++----- backend/model/schedule.go | 31 ++++++++++++++++++++++++------- backend/routes/schedule.go | 22 ++++++++++++---------- backend/services/schedule.go | 17 ++++++++++++++++- 4 files changed, 61 insertions(+), 23 deletions(-) diff --git a/backend/model/node.go b/backend/model/node.go index 1a1ebce5..2beb9e1c 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -4,6 +4,7 @@ import ( "crawlab/constants" "crawlab/database" "crawlab/services/register" + "errors" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" @@ -158,16 +159,19 @@ func GetNodeList(filter interface{}) ([]Node, error) { func GetNode(id bson.ObjectId) (Node, error) { var node Node + if id.Hex() == "" { - return node, nil + log.Infof("id is empty") + debug.PrintStack() + return node, errors.New("id is empty") } + s, c := database.GetCol("nodes") defer s.Close() + if err := c.FindId(id).One(&node); err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - } + log.Errorf(err.Error()) + debug.PrintStack() return node, err } return node, nil diff --git a/backend/model/schedule.go b/backend/model/schedule.go index bcd051e3..36799ac3 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -45,6 +45,27 @@ func (sch *Schedule) Delete() error { return c.RemoveId(sch.Id) } +func (sch *Schedule) SyncNodeIdAndSpiderId(node Node, spider Spider) { + sch.syncNodeId(node) + sch.syncSpiderId(spider) +} + +func (sch *Schedule) syncNodeId(node Node) { + if node.Id.Hex() == sch.NodeId.Hex() { + return + } + sch.NodeId = node.Id + _ = sch.Save() +} + +func (sch *Schedule) syncSpiderId(spider Spider) { + if spider.Id.Hex() == sch.SpiderId.Hex() { + return + } + sch.SpiderId = spider.Id + _ = sch.Save() +} + func GetScheduleList(filter interface{}) ([]Schedule, error) { s, c := database.GetCol("schedules") defer s.Close() @@ -103,13 +124,11 @@ func UpdateSchedule(id bson.ObjectId, item Schedule) error { if err := c.FindId(id).One(&result); err != nil { return err } - node, err := GetNode(item.NodeId) if err != nil { - log.Errorf("get node error: %s", err.Error()) - debug.PrintStack() - return nil + return err } + item.NodeKey = node.Key if err := item.Save(); err != nil { return err @@ -123,9 +142,7 @@ func AddSchedule(item Schedule) error { node, err := GetNode(item.NodeId) if err != nil { - log.Errorf("get node error: %s", err.Error()) - debug.PrintStack() - return nil + return err } item.Id = bson.NewObjectId() diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go index 24df0c0f..73b75323 100644 --- a/backend/routes/schedule.go +++ b/backend/routes/schedule.go @@ -45,13 +45,14 @@ func PostSchedule(c *gin.Context) { HandleError(http.StatusBadRequest, c, err) return } + + // 验证cron表达式 + if err := services.ParserCron(newItem.Cron); err != nil { + HandleError(http.StatusOK, c, err) + return + } + newItem.Id = bson.ObjectIdHex(id) - - // 如果node_id为空,则置为空ObjectId - //if newItem.NodeId == "" { - // newItem.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - //} - // 更新数据库 if err := model.UpdateSchedule(bson.ObjectIdHex(id), newItem); err != nil { HandleError(http.StatusInternalServerError, c, err) @@ -79,10 +80,11 @@ func PutSchedule(c *gin.Context) { return } - // 如果node_id为空,则置为空ObjectId - //if item.NodeId == "" { - // item.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - //} + // 验证cron表达式 + if err := services.ParserCron(item.Cron); err != nil { + HandleError(http.StatusOK, c, err) + return + } // 更新数据库 if err := model.AddSchedule(item); err != nil { diff --git a/backend/services/schedule.go b/backend/services/schedule.go index f011f02a..d4c1635b 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -5,7 +5,7 @@ import ( "crawlab/lib/cron" "crawlab/model" "github.com/apex/log" - uuid "github.com/satori/go.uuid" + "github.com/satori/go.uuid" "runtime/debug" ) @@ -31,6 +31,9 @@ func AddTask(s model.Schedule) func() { return } + // 同步ID到定时任务 + s.SyncNodeIdAndSpiderId(node, *spider) + // 生成任务ID id := uuid.NewV4() @@ -119,6 +122,18 @@ func (s *Scheduler) RemoveAll() { } } +// 验证cron表达式是否正确 +func ParserCron(spec string) error { + parser := cron.NewParser( + cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, + ) + + if _, err := parser.Parse(spec); err != nil { + return err + } + return nil +} + func (s *Scheduler) Update() error { // 删除所有定时任务 s.RemoveAll() From 2e3ec18d676bd2d1ea4a0aac1553dd3d9e034049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 15:27:35 +0800 Subject: [PATCH 63/81] =?UTF-8?q?fix=20=E5=AE=9A=E6=97=B6=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/schedule/ScheduleList.vue | 101 ++++++++++--------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index 4d283966..b170c9ed 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -14,7 +14,7 @@ - + - - - + + + + + + + + + + + - {{$t('schedules.add_cron')}} + + {{$t('Cancel')}} {{$t('Submit')}} @@ -76,9 +78,9 @@ - - - + + + @@ -131,7 +133,7 @@ diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 8c97339a..eb1e548f 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -429,13 +429,6 @@ export default { } else { return false } - // if (this.isCustomized(row)) { - // // customized spider - // return !!row.cmd - // } else { - // // configurable spider - // return !!row.fields - // } }, isCustomized (row) { return row.type === 'customized' From 4ab4892471965d6342d30385578ca60dc51f8ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 15:39:25 +0800 Subject: [PATCH 65/81] =?UTF-8?q?fix=20=E7=8E=AF=E5=A2=83=E5=8F=98?= =?UTF-8?q?=E9=87=8F=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/services/task.go b/backend/services/task.go index 5bcb54e8..03038613 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -103,7 +103,7 @@ func AssignTask(task model.Task) error { // 设置环境变量 func SetEnv(cmd *exec.Cmd, envs []model.Env, taskId string, dataCol string) *exec.Cmd { // 默认环境变量 - cmd.Env = append(cmd.Env, "CRAWLAB_TASK_ID="+taskId) + cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+taskId) cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+dataCol) cmd.Env = append(cmd.Env, "PYTHONUNBUFFERED=0") cmd.Env = append(cmd.Env, "PYTHONIOENCODING=utf-8") From 3d6fb33d3c7729cb9c3da0bcf36b419828283af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 16:00:01 +0800 Subject: [PATCH 66/81] =?UTF-8?q?fix=20=E6=97=A5=E5=BF=97=E4=B8=8D?= =?UTF-8?q?=E4=BC=9A=E8=87=AA=E5=8A=A8=E5=88=B7=E6=96=B0=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/task/TaskDetail.vue | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/frontend/src/views/task/TaskDetail.vue b/frontend/src/views/task/TaskDetail.vue index b4ec0652..c1bb4ff0 100644 --- a/frontend/src/views/task/TaskDetail.vue +++ b/frontend/src/views/task/TaskDetail.vue @@ -35,6 +35,7 @@ import { import TaskOverview from '../../components/Overview/TaskOverview' import GeneralTableView from '../../components/TableView/GeneralTableView' import LogView from '../../components/ScrollView/LogView' +import request from '../../api/request' export default { name: 'TaskDetail', @@ -46,12 +47,12 @@ export default { data () { return { activeTabName: 'overview', - handle: undefined + handle: undefined, + taskLog: '' } }, computed: { ...mapState('task', [ - 'taskLog', 'taskResultsData', 'taskResultsTotalCount' ]), @@ -97,18 +98,22 @@ export default { downloadCSV () { this.$store.dispatch('task/getTaskResultExcel', this.$route.params.id) this.$st.sendEv('任务详情-结果', '下载CSV') + }, + getTaskLog () { + if (this.$route.params.id) { + request.get(`/tasks/${this.$route.params.id}/log`).then(response => { + this.taskLog = response.data.data + }) + } } }, async created () { - await this.$store.dispatch('task/getTaskData', this.$route.params.id) - this.$store.dispatch('task/getTaskLog', this.$route.params.id) + this.$store.dispatch('task/getTaskData', this.$route.params.id) this.$store.dispatch('task/getTaskResults', this.$route.params.id) - if (this.taskForm && ['running'].includes(this.taskForm.status)) { - this.handle = setInterval(() => { - this.$store.dispatch('task/getTaskLog', this.$route.params.id) - }, 5000) - } + this.handle = setInterval(() => { + this.getTaskLog() + }, 5000) }, destroyed () { clearInterval(this.handle) From c028c4d1f3b13c3cbb0550cba42a487032b09f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 16:00:36 +0800 Subject: [PATCH 67/81] =?UTF-8?q?fix=20=E6=97=A5=E5=BF=97=E4=B8=8D?= =?UTF-8?q?=E4=BC=9A=E8=87=AA=E5=8A=A8=E5=88=B7=E6=96=B0=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/task/TaskDetail.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/views/task/TaskDetail.vue b/frontend/src/views/task/TaskDetail.vue index c1bb4ff0..5ba4ab4d 100644 --- a/frontend/src/views/task/TaskDetail.vue +++ b/frontend/src/views/task/TaskDetail.vue @@ -107,7 +107,7 @@ export default { } } }, - async created () { + created () { this.$store.dispatch('task/getTaskData', this.$route.params.id) this.$store.dispatch('task/getTaskResults', this.$route.params.id) From f9882cf0f516d99543ce3538272626795af2b4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Wed, 16 Oct 2019 16:29:54 +0800 Subject: [PATCH 68/81] =?UTF-8?q?fix=20=E6=97=A5=E5=BF=97=E8=8E=B7?= =?UTF-8?q?=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/views/task/TaskDetail.vue | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/views/task/TaskDetail.vue b/frontend/src/views/task/TaskDetail.vue index 5ba4ab4d..d61394e8 100644 --- a/frontend/src/views/task/TaskDetail.vue +++ b/frontend/src/views/task/TaskDetail.vue @@ -111,6 +111,7 @@ export default { this.$store.dispatch('task/getTaskData', this.$route.params.id) this.$store.dispatch('task/getTaskResults', this.$route.params.id) + this.getTaskLog() this.handle = setInterval(() => { this.getTaskLog() }, 5000) From 418d728825d949ab7480bb2723d6b41a68f56f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Thu, 17 Oct 2019 17:54:19 +0800 Subject: [PATCH 69/81] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/log.go | 2 +- backend/services/log.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/model/log.go b/backend/model/log.go index ae6973b1..fc2cc79d 100644 --- a/backend/model/log.go +++ b/backend/model/log.go @@ -23,7 +23,7 @@ func GetLocalLog(logPath string) (fileBytes []byte, err error) { } defer f.Close() - const bufLen = 2 * 1024 * 1024 + const bufLen = 1 * 1024 * 1024 logBuf := make([]byte, bufLen) off := int64(0) diff --git a/backend/services/log.go b/backend/services/log.go index 81140c0a..47280fe5 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -50,7 +50,7 @@ func GetRemoteLog(task model.Task) (logStr string, err error) { case logStr = <-ch: log.Infof("get remote log") break - case <-time.After(5 * time.Second): + case <-time.After(30 * time.Second): logStr = "get remote log timeout" break } From 5d8f6f0c56768a6e58f5e46cbf5adff8c7819228 Mon Sep 17 00:00:00 2001 From: Hai Nguyen Date: Mon, 21 Oct 2019 08:27:22 +0700 Subject: [PATCH 70/81] Fix environment variable replacement for CRAWLAB_API_ADDRESS --- docker_init.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_init.sh b/docker_init.sh index 4d571769..09f63e9b 100755 --- a/docker_init.sh +++ b/docker_init.sh @@ -6,7 +6,7 @@ then : else jspath=`ls /app/dist/js/app.*.js` - sed -i "s?localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath} + sed -i "s?http://localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath} fi # replace base url From a33060f2061119bae2d91500fb069214c91ed397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 28 Oct 2019 11:32:36 +0800 Subject: [PATCH 71/81] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/package.json | 2 +- frontend/src/views/layout/components/Navbar.vue | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/package.json b/frontend/package.json index 60ac5cc8..5b835a60 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.3.4", + "version": "0.3.5", "private": true, "scripts": { "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index e294ad0c..0bd4c6d1 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -9,7 +9,7 @@ - v0.3.4 + v0.3.5 {{$t('Logout')}} From 5525c82835b8d5b1c3c2ba16ec40d8fbb4de1f2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 28 Oct 2019 11:38:39 +0800 Subject: [PATCH 72/81] =?UTF-8?q?=E8=AF=BB=E5=8F=96=E6=97=A5=E5=BF=97?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=B8=BA2M?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/log.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/model/log.go b/backend/model/log.go index abb77ed9..77e5094f 100644 --- a/backend/model/log.go +++ b/backend/model/log.go @@ -24,7 +24,7 @@ func GetLocalLog(logPath string) (fileBytes []byte, err error) { } defer utils.Close(f) - const bufLen = 1 * 1024 * 1024 + const bufLen = 2 * 1024 * 1024 logBuf := make([]byte, bufLen) off := int64(0) From 091bbb8b5326b089f37951ada5a24ed5b77b4193 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Mon, 28 Oct 2019 11:43:05 +0800 Subject: [PATCH 73/81] Update CHANGELOG.md --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cd54497..04139677 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,17 @@ +# 0.3.5 (2019-10-28) +### Features / Enhancement +- **Graceful Showdown**. https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385 +- **Node Info Optimization**. https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7 +- **Append System Environment Variables to Tasks**. https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3 +- **Auto Refresh Task Log**. https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3 +- **Enable HTTPS Deployment**. https://github.com/crawlab-team/crawlab/commit/5d8f6f0c56768a6e58f5e46cbf5adff8c7819228 + +### Bug Fixes +- **Unable to fetch spider list info in schedule jobs**. https://github.com/crawlab-team/crawlab/commit/311f72da19094e3fa05ab4af49812f58843d8d93 +- **Unable to fetch node info from worker nodes**. https://github.com/crawlab-team/crawlab/commit/6af06efc17685a9e232e8c2b5fd819ec7d2d1674 +- **Unable to select node when trying to run spider tasks**. https://github.com/crawlab-team/crawlab/commit/31f8e03234426e97aed9b0bce6a50562f957edad + + # 0.3.1 (2019-08-25) ### Features / Enhancement - **Docker Image Optimization**. Split docker further into master, worker, frontend with alpine image. From 76ab8f1fcd43928c986f7f5ef0cd7b46fabf6526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 28 Oct 2019 12:03:02 +0800 Subject: [PATCH 74/81] =?UTF-8?q?fix=20=E4=BC=98=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/schedule.go | 3 ++- backend/model/task.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 36799ac3..ef758fb6 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -5,6 +5,7 @@ import ( "crawlab/database" "crawlab/lib/cron" "github.com/apex/log" + "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "runtime/debug" "time" @@ -93,7 +94,7 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { // 获取爬虫名称 spider, err := GetSpider(schedule.SpiderId) - if err != nil { + if err != nil && err == mgo.ErrNotFound { log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error()) debug.PrintStack() _ = schedule.Delete() diff --git a/backend/model/task.go b/backend/model/task.go index df046ecc..64f06cd7 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -117,7 +117,7 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas for i, task := range tasks { // 获取爬虫名称 spider, err := task.GetSpider() - if spider.Id.Hex() == "" || err != nil { + if err != nil || spider.Id.Hex() == "" { _ = spider.Delete() } else { tasks[i].SpiderName = spider.DisplayName From 383d5e61353a1b17b959c90810406e9c01670494 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Mon, 28 Oct 2019 12:18:09 +0800 Subject: [PATCH 75/81] Update CHANGELOG.md --- CHANGELOG.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04139677..e38cd5d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,17 @@ # 0.3.5 (2019-10-28) ### Features / Enhancement -- **Graceful Showdown**. https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385 -- **Node Info Optimization**. https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7 -- **Append System Environment Variables to Tasks**. https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3 -- **Auto Refresh Task Log**. https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3 -- **Enable HTTPS Deployment**. https://github.com/crawlab-team/crawlab/commit/5d8f6f0c56768a6e58f5e46cbf5adff8c7819228 +- **[Graceful Showdown](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)** +- **[Node Info Optimization](https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7)** +- **[Append System Environment Variables to Tasks](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3)** +- **[Auto Refresh Task Log](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3)** +- **[Enable HTTPS Deployment](https://github.com/crawlab-team/crawlab/commit/5d8f6f0c56768a6e58f5e46cbf5adff8c7819228)** ### Bug Fixes -- **Unable to fetch spider list info in schedule jobs**. https://github.com/crawlab-team/crawlab/commit/311f72da19094e3fa05ab4af49812f58843d8d93 -- **Unable to fetch node info from worker nodes**. https://github.com/crawlab-team/crawlab/commit/6af06efc17685a9e232e8c2b5fd819ec7d2d1674 -- **Unable to select node when trying to run spider tasks**. https://github.com/crawlab-team/crawlab/commit/31f8e03234426e97aed9b0bce6a50562f957edad +- **Unable to fetch spider list info in schedule jobs**. [detail](https://github.com/crawlab-team/crawlab/commit/311f72da19094e3fa05ab4af49812f58843d8d93) +- **Unable to fetch node info from worker nodes**. [detail](https://github.com/crawlab-team/crawlab/commit/6af06efc17685a9e232e8c2b5fd819ec7d2d1674) +- **Unable to select node when trying to run spider tasks**. [detail](https://github.com/crawlab-team/crawlab/commit/31f8e03234426e97aed9b0bce6a50562f957edad) +- **Unable to fetch result count when result volume is large**. [#260](https://github.com/crawlab-team/crawlab/issues/260) +- **Node issue in schedule tasks**. [#244](https://github.com/crawlab-team/crawlab/issues/244) # 0.3.1 (2019-08-25) From ba85c5312cf40ea7f4d3afdaf9a23ae6356548d6 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Mon, 28 Oct 2019 12:19:26 +0800 Subject: [PATCH 76/81] Update CHANGELOG.md --- CHANGELOG.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e38cd5d6..6e85f8ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,10 @@ # 0.3.5 (2019-10-28) ### Features / Enhancement -- **[Graceful Showdown](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)** -- **[Node Info Optimization](https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7)** -- **[Append System Environment Variables to Tasks](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3)** -- **[Auto Refresh Task Log](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3)** -- **[Enable HTTPS Deployment](https://github.com/crawlab-team/crawlab/commit/5d8f6f0c56768a6e58f5e46cbf5adff8c7819228)** +- **Graceful Showdown**. [detail](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)** +- **Node Info Optimization**. [detail](https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7) +- **Append System Environment Variables to Tasks**. [detail](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3) +- **Auto Refresh Task Log**. [detail](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3) +- **Enable HTTPS Deployment**. [detail](https://github.com/crawlab-team/crawlab/commit/5d8f6f0c56768a6e58f5e46cbf5adff8c7819228) ### Bug Fixes - **Unable to fetch spider list info in schedule jobs**. [detail](https://github.com/crawlab-team/crawlab/commit/311f72da19094e3fa05ab4af49812f58843d8d93) From 7b01e650296a91c36ad5e00969aa13b14813cf4d Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Mon, 28 Oct 2019 12:19:45 +0800 Subject: [PATCH 77/81] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e85f8ce..95ef9cd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # 0.3.5 (2019-10-28) ### Features / Enhancement -- **Graceful Showdown**. [detail](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)** +- **Graceful Showdown**. [detail](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385) - **Node Info Optimization**. [detail](https://github.com/crawlab-team/crawlab/commit/973251a0fbe7a2184ac0da09e0404a17c736aee7) - **Append System Environment Variables to Tasks**. [detail](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3) - **Auto Refresh Task Log**. [detail](https://github.com/crawlab-team/crawlab/commit/4ab4892471965d6342d30385578ca60dc51f8ad3) From 965553b285fd0ca28d537ced66f0de8bf5f21545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 28 Oct 2019 12:36:00 +0800 Subject: [PATCH 78/81] =?UTF-8?q?fix=20=E4=BB=A3=E7=A0=81=E9=81=97?= =?UTF-8?q?=E6=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/utils/helpers.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go index 541d9002..1c8c96e3 100644 --- a/backend/utils/helpers.go +++ b/backend/utils/helpers.go @@ -1,6 +1,8 @@ package utils import ( + "context" + "crawlab/database" "crawlab/entity" "encoding/json" "github.com/apex/log" @@ -40,3 +42,22 @@ func Close(c io.Closer) { log.WithError(err).Error("关闭资源文件失败。") } } + +func Pub(channel string, msg entity.NodeMessage) error { + if _, err := database.RedisClient.Publish(channel, GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} + +func Sub(channel string, consume database.ConsumeFunc) error { + ctx := context.Background() + if err := database.RedisClient.Subscribe(ctx, consume, channel); err != nil { + log.Errorf("subscribe redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} From 9427283e3a6689c52cd2ba3a0734a49ccab28fea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=99=AF=E9=98=B3?= <1656488874@qq.com> Date: Mon, 28 Oct 2019 13:51:12 +0800 Subject: [PATCH 79/81] =?UTF-8?q?fix=20=E5=BE=AA=E7=8E=AF=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/services/msg_handler/msg_log.go | 2 +- backend/utils/helpers.go | 21 --------------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go index 993fad9a..2a17ed99 100644 --- a/backend/services/msg_handler/msg_log.go +++ b/backend/services/msg_handler/msg_log.go @@ -40,7 +40,7 @@ func (g *Log) get() error { msgSd.Log = utils.BytesToString(logStr) } // 发布消息给主节点 - if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + if err := database.Pub(constants.ChannelMasterNode, msgSd); err != nil { log.Errorf("pub log to master node error: %s", err.Error()) debug.PrintStack() return err diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go index 1c8c96e3..541d9002 100644 --- a/backend/utils/helpers.go +++ b/backend/utils/helpers.go @@ -1,8 +1,6 @@ package utils import ( - "context" - "crawlab/database" "crawlab/entity" "encoding/json" "github.com/apex/log" @@ -42,22 +40,3 @@ func Close(c io.Closer) { log.WithError(err).Error("关闭资源文件失败。") } } - -func Pub(channel string, msg entity.NodeMessage) error { - if _, err := database.RedisClient.Publish(channel, GetJson(msg)); err != nil { - log.Errorf("publish redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func Sub(channel string, consume database.ConsumeFunc) error { - ctx := context.Background() - if err := database.RedisClient.Subscribe(ctx, consume, channel); err != nil { - log.Errorf("subscribe redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} From e2759ec426090c6d9fd2748eb690760121b55f5c Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 1 Nov 2019 11:16:35 +0800 Subject: [PATCH 80/81] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 91a30b34..70822b1d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ services: image: tikazyq/crawlab:latest container_name: master environment: - CRAWLAB_API_ADDRESS: "localhost:8000" + CRAWLAB_API_ADDRESS: "http://localhost:8000" CRAWLAB_SERVER_MASTER: "Y" CRAWLAB_MONGO_HOST: "mongo" CRAWLAB_REDIS_ADDRESS: "redis" From 765d0717c4b1053c9d61b688eec3e7d271583c19 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 1 Nov 2019 11:18:50 +0800 Subject: [PATCH 81/81] Update README-zh.md --- README-zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README-zh.md b/README-zh.md index b4e2b469..a12eacc4 100644 --- a/README-zh.md +++ b/README-zh.md @@ -47,7 +47,7 @@ services: image: tikazyq/crawlab:latest container_name: master environment: - CRAWLAB_API_ADDRESS: "localhost:8000" + CRAWLAB_API_ADDRESS: "http://localhost:8000" CRAWLAB_SERVER_MASTER: "Y" CRAWLAB_MONGO_HOST: "mongo" CRAWLAB_REDIS_ADDRESS: "redis"