结果集自动生成

This commit is contained in:
marvzhang
2020-04-19 17:49:21 +08:00
parent 86ea474c9a
commit b0806d2e94
8 changed files with 55 additions and 40 deletions

View File

@@ -3,6 +3,7 @@ package model
import (
"crawlab/constants"
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"runtime/debug"
@@ -89,11 +90,9 @@ func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, tot
return
}
if spider.Col == "" {
return
}
col := utils.GetSpiderCol(spider.Col, spider.Name)
s, c := database.GetCol(spider.Col)
s, c := database.GetCol(col)
defer s.Close()
query := bson.M{
@@ -388,8 +387,11 @@ func UpdateTaskResultCount(id string) (err error) {
return err
}
// default results collection
col := utils.GetSpiderCol(spider.Col, spider.Name)
// 获取结果数量
s, c := database.GetCol(spider.Col)
s, c := database.GetCol(col)
defer s.Close()
resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count()
if err != nil {

View File

@@ -47,20 +47,20 @@ func GetProjectList(c *gin.Context) {
}
// 获取未被分配的爬虫数量
//if tag == "" {
// noProject := model.Project{
// Id: bson.ObjectIdHex(constants.ObjectIdNull),
// Name: "No Project",
// Description: "Not assigned to any project",
// }
// spiders, err := noProject.GetSpiders()
// if err != nil {
// HandleError(http.StatusInternalServerError, c, err)
// return
// }
// noProject.Spiders = spiders
// projects = append(projects, noProject)
//}
if tag == "" {
noProject := model.Project{
Id: bson.ObjectIdHex(constants.ObjectIdNull),
Name: "No Project",
Description: "Not assigned to any project",
}
spiders, err := noProject.GetSpiders()
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
noProject.Spiders = spiders
projects = append(projects, noProject)
}
c.JSON(http.StatusOK, ListResponse{
Status: "ok",

View File

@@ -437,7 +437,9 @@ func CopySpider(spider model.Spider, newName string) error {
}
func UpdateSpiderDedup(spider model.Spider) error {
s, c := database.GetCol(spider.Col)
col := utils.GetSpiderCol(spider.Col, spider.Name)
s, c := database.GetCol(col)
defer s.Close()
if !spider.IsDedup {

View File

@@ -120,9 +120,12 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spide
}
_ = os.Setenv("NODE_PATH", nodePath)
// default results collection
col := utils.GetSpiderCol(spider.Col, spider.Name)
// 默认环境变量
cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id)
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+spider.Col)
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+col)
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host"))
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port"))
if viper.GetString("mongo.db") != "" {
@@ -571,17 +574,15 @@ func ExecuteTask(id int) {
_ = t.Save()
// 起一个cron执行器来统计任务结果数
if spider.Col != "" {
cronExec := cron.New(cron.WithSeconds())
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
debug.PrintStack()
return
}
cronExec.Start()
defer cronExec.Stop()
cronExec := cron.New(cron.WithSeconds())
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
debug.PrintStack()
return
}
cronExec.Start()
defer cronExec.Stop()
// 起一个cron来更新错误日志
cronExecErrLog := cron.New(cron.WithSeconds())
@@ -652,10 +653,8 @@ func ExecuteTask(id int) {
func FinishUpTask(s model.Spider, t model.Task) {
// 更新任务结果数
go func() {
if s.Col != "" {
if err := model.UpdateTaskResultCount(t.Id); err != nil {
return
}
if err := model.UpdateTaskResultCount(t.Id); err != nil {
return
}
}()

8
backend/utils/spider.go Normal file
View File

@@ -0,0 +1,8 @@
package utils
func GetSpiderCol(col string, name string) string {
if col == "" {
return "results_" + name
}
return col
}

View File

@@ -45,10 +45,10 @@
/>
</el-form-item>
</template>
<el-form-item :label="$t('Results Collection')" prop="col" required>
<el-form-item :label="$t('Results Collection')" prop="col">
<el-input
v-model="spiderForm.col"
:placeholder="$t('Results Collection')"
:placeholder="$t('By default: ') + 'results_<spider_name>'"
:disabled="isView || isPublic"
/>
</el-form-item>

View File

@@ -77,7 +77,7 @@
<ul v-else class="list">
<li
class="item"
v-for="(item, index) in projectList"
v-for="item in projectList.filter(d => d._id !== '000000000000000000000000')"
:key="item._id"
@click="onView(item)"
>

View File

@@ -52,8 +52,12 @@
:disabled="spiderForm.is_scrapy"
/>
</el-form-item>
<el-form-item :label="$t('Results')" prop="col" required>
<el-input id="col" v-model="spiderForm.col" :placeholder="$t('Results')"/>
<el-form-item :label="$t('Results')" prop="col">
<el-input
id="col"
v-model="spiderForm.col"
:placeholder="$t('By default: ') + 'results_<spider_name>'"
/>
</el-form-item>
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
<el-upload