mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
结果集自动生成
This commit is contained in:
@@ -3,6 +3,7 @@ package model
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"crawlab/utils"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"runtime/debug"
|
||||
@@ -89,11 +90,9 @@ func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, tot
|
||||
return
|
||||
}
|
||||
|
||||
if spider.Col == "" {
|
||||
return
|
||||
}
|
||||
col := utils.GetSpiderCol(spider.Col, spider.Name)
|
||||
|
||||
s, c := database.GetCol(spider.Col)
|
||||
s, c := database.GetCol(col)
|
||||
defer s.Close()
|
||||
|
||||
query := bson.M{
|
||||
@@ -388,8 +387,11 @@ func UpdateTaskResultCount(id string) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
// default results collection
|
||||
col := utils.GetSpiderCol(spider.Col, spider.Name)
|
||||
|
||||
// 获取结果数量
|
||||
s, c := database.GetCol(spider.Col)
|
||||
s, c := database.GetCol(col)
|
||||
defer s.Close()
|
||||
resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count()
|
||||
if err != nil {
|
||||
|
||||
@@ -47,20 +47,20 @@ func GetProjectList(c *gin.Context) {
|
||||
}
|
||||
|
||||
// 获取未被分配的爬虫数量
|
||||
//if tag == "" {
|
||||
// noProject := model.Project{
|
||||
// Id: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
// Name: "No Project",
|
||||
// Description: "Not assigned to any project",
|
||||
// }
|
||||
// spiders, err := noProject.GetSpiders()
|
||||
// if err != nil {
|
||||
// HandleError(http.StatusInternalServerError, c, err)
|
||||
// return
|
||||
// }
|
||||
// noProject.Spiders = spiders
|
||||
// projects = append(projects, noProject)
|
||||
//}
|
||||
if tag == "" {
|
||||
noProject := model.Project{
|
||||
Id: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
Name: "No Project",
|
||||
Description: "Not assigned to any project",
|
||||
}
|
||||
spiders, err := noProject.GetSpiders()
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
noProject.Spiders = spiders
|
||||
projects = append(projects, noProject)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, ListResponse{
|
||||
Status: "ok",
|
||||
|
||||
@@ -437,7 +437,9 @@ func CopySpider(spider model.Spider, newName string) error {
|
||||
}
|
||||
|
||||
func UpdateSpiderDedup(spider model.Spider) error {
|
||||
s, c := database.GetCol(spider.Col)
|
||||
col := utils.GetSpiderCol(spider.Col, spider.Name)
|
||||
|
||||
s, c := database.GetCol(col)
|
||||
defer s.Close()
|
||||
|
||||
if !spider.IsDedup {
|
||||
|
||||
@@ -120,9 +120,12 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spide
|
||||
}
|
||||
_ = os.Setenv("NODE_PATH", nodePath)
|
||||
|
||||
// default results collection
|
||||
col := utils.GetSpiderCol(spider.Col, spider.Name)
|
||||
|
||||
// 默认环境变量
|
||||
cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id)
|
||||
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+spider.Col)
|
||||
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+col)
|
||||
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host"))
|
||||
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port"))
|
||||
if viper.GetString("mongo.db") != "" {
|
||||
@@ -571,17 +574,15 @@ func ExecuteTask(id int) {
|
||||
_ = t.Save()
|
||||
|
||||
// 起一个cron执行器来统计任务结果数
|
||||
if spider.Col != "" {
|
||||
cronExec := cron.New(cron.WithSeconds())
|
||||
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
|
||||
if err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
cronExec.Start()
|
||||
defer cronExec.Stop()
|
||||
cronExec := cron.New(cron.WithSeconds())
|
||||
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
|
||||
if err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
cronExec.Start()
|
||||
defer cronExec.Stop()
|
||||
|
||||
// 起一个cron来更新错误日志
|
||||
cronExecErrLog := cron.New(cron.WithSeconds())
|
||||
@@ -652,10 +653,8 @@ func ExecuteTask(id int) {
|
||||
func FinishUpTask(s model.Spider, t model.Task) {
|
||||
// 更新任务结果数
|
||||
go func() {
|
||||
if s.Col != "" {
|
||||
if err := model.UpdateTaskResultCount(t.Id); err != nil {
|
||||
return
|
||||
}
|
||||
if err := model.UpdateTaskResultCount(t.Id); err != nil {
|
||||
return
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
8
backend/utils/spider.go
Normal file
8
backend/utils/spider.go
Normal file
@@ -0,0 +1,8 @@
|
||||
package utils
|
||||
|
||||
func GetSpiderCol(col string, name string) string {
|
||||
if col == "" {
|
||||
return "results_" + name
|
||||
}
|
||||
return col
|
||||
}
|
||||
@@ -45,10 +45,10 @@
|
||||
/>
|
||||
</el-form-item>
|
||||
</template>
|
||||
<el-form-item :label="$t('Results Collection')" prop="col" required>
|
||||
<el-form-item :label="$t('Results Collection')" prop="col">
|
||||
<el-input
|
||||
v-model="spiderForm.col"
|
||||
:placeholder="$t('Results Collection')"
|
||||
:placeholder="$t('By default: ') + 'results_<spider_name>'"
|
||||
:disabled="isView || isPublic"
|
||||
/>
|
||||
</el-form-item>
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
<ul v-else class="list">
|
||||
<li
|
||||
class="item"
|
||||
v-for="(item, index) in projectList"
|
||||
v-for="item in projectList.filter(d => d._id !== '000000000000000000000000')"
|
||||
:key="item._id"
|
||||
@click="onView(item)"
|
||||
>
|
||||
|
||||
@@ -52,8 +52,12 @@
|
||||
:disabled="spiderForm.is_scrapy"
|
||||
/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Results')" prop="col" required>
|
||||
<el-input id="col" v-model="spiderForm.col" :placeholder="$t('Results')"/>
|
||||
<el-form-item :label="$t('Results')" prop="col">
|
||||
<el-input
|
||||
id="col"
|
||||
v-model="spiderForm.col"
|
||||
:placeholder="$t('By default: ') + 'results_<spider_name>'"
|
||||
/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
|
||||
<el-upload
|
||||
|
||||
Reference in New Issue
Block a user