From b0806d2e94be4bdedad4fdd81e73297a7d359d21 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Sun, 19 Apr 2020 17:49:21 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=93=E6=9E=9C=E9=9B=86=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E7=94=9F=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/model/task.go | 12 ++++---- backend/routes/project.go | 28 +++++++++--------- backend/services/spider.go | 4 ++- backend/services/task.go | 29 +++++++++---------- backend/utils/spider.go | 8 +++++ .../components/InfoView/SpiderInfoView.vue | 4 +-- frontend/src/views/project/ProjectList.vue | 2 +- frontend/src/views/spider/SpiderList.vue | 8 +++-- 8 files changed, 55 insertions(+), 40 deletions(-) create mode 100644 backend/utils/spider.go diff --git a/backend/model/task.go b/backend/model/task.go index 415a0234..35e738ab 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -3,6 +3,7 @@ package model import ( "crawlab/constants" "crawlab/database" + "crawlab/utils" "github.com/apex/log" "github.com/globalsign/mgo/bson" "runtime/debug" @@ -89,11 +90,9 @@ func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, tot return } - if spider.Col == "" { - return - } + col := utils.GetSpiderCol(spider.Col, spider.Name) - s, c := database.GetCol(spider.Col) + s, c := database.GetCol(col) defer s.Close() query := bson.M{ @@ -388,8 +387,11 @@ func UpdateTaskResultCount(id string) (err error) { return err } + // default results collection + col := utils.GetSpiderCol(spider.Col, spider.Name) + // 获取结果数量 - s, c := database.GetCol(spider.Col) + s, c := database.GetCol(col) defer s.Close() resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count() if err != nil { diff --git a/backend/routes/project.go b/backend/routes/project.go index 4fdb6b6d..f0dd1198 100644 --- a/backend/routes/project.go +++ b/backend/routes/project.go @@ -47,20 +47,20 @@ func GetProjectList(c *gin.Context) { } // 获取未被分配的爬虫数量 - //if tag == "" { - // noProject := model.Project{ - // Id: bson.ObjectIdHex(constants.ObjectIdNull), - // Name: "No Project", - // Description: "Not assigned to any project", - // } - // spiders, err := noProject.GetSpiders() - // if err != nil { - // HandleError(http.StatusInternalServerError, c, err) - // return - // } - // noProject.Spiders = spiders - // projects = append(projects, noProject) - //} + if tag == "" { + noProject := model.Project{ + Id: bson.ObjectIdHex(constants.ObjectIdNull), + Name: "No Project", + Description: "Not assigned to any project", + } + spiders, err := noProject.GetSpiders() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + noProject.Spiders = spiders + projects = append(projects, noProject) + } c.JSON(http.StatusOK, ListResponse{ Status: "ok", diff --git a/backend/services/spider.go b/backend/services/spider.go index 7d14c287..e8fc37c2 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -437,7 +437,9 @@ func CopySpider(spider model.Spider, newName string) error { } func UpdateSpiderDedup(spider model.Spider) error { - s, c := database.GetCol(spider.Col) + col := utils.GetSpiderCol(spider.Col, spider.Name) + + s, c := database.GetCol(col) defer s.Close() if !spider.IsDedup { diff --git a/backend/services/task.go b/backend/services/task.go index 3be237a0..df8b4071 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -120,9 +120,12 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spide } _ = os.Setenv("NODE_PATH", nodePath) + // default results collection + col := utils.GetSpiderCol(spider.Col, spider.Name) + // 默认环境变量 cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id) - cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+spider.Col) + cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+col) cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host")) cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port")) if viper.GetString("mongo.db") != "" { @@ -571,17 +574,15 @@ func ExecuteTask(id int) { _ = t.Save() // 起一个cron执行器来统计任务结果数 - if spider.Col != "" { - cronExec := cron.New(cron.WithSeconds()) - _, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id)) - if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() - return - } - cronExec.Start() - defer cronExec.Stop() + cronExec := cron.New(cron.WithSeconds()) + _, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id)) + if err != nil { + log.Errorf(GetWorkerPrefix(id) + err.Error()) + debug.PrintStack() + return } + cronExec.Start() + defer cronExec.Stop() // 起一个cron来更新错误日志 cronExecErrLog := cron.New(cron.WithSeconds()) @@ -652,10 +653,8 @@ func ExecuteTask(id int) { func FinishUpTask(s model.Spider, t model.Task) { // 更新任务结果数 go func() { - if s.Col != "" { - if err := model.UpdateTaskResultCount(t.Id); err != nil { - return - } + if err := model.UpdateTaskResultCount(t.Id); err != nil { + return } }() diff --git a/backend/utils/spider.go b/backend/utils/spider.go new file mode 100644 index 00000000..4484ccf0 --- /dev/null +++ b/backend/utils/spider.go @@ -0,0 +1,8 @@ +package utils + +func GetSpiderCol(col string, name string) string { + if col == "" { + return "results_" + name + } + return col +} diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 359cea46..e9139adc 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -45,10 +45,10 @@ /> - + diff --git a/frontend/src/views/project/ProjectList.vue b/frontend/src/views/project/ProjectList.vue index 12d5f3f8..d2ee0c9e 100644 --- a/frontend/src/views/project/ProjectList.vue +++ b/frontend/src/views/project/ProjectList.vue @@ -77,7 +77,7 @@