Merge pull request #359 from crawlab-team/develop

v0.4.1
This commit is contained in:
Marvin Zhang
2019-12-13 14:22:33 +08:00
committed by GitHub
43 changed files with 790 additions and 543 deletions

View File

@@ -1,3 +1,23 @@
# 0.4.1 (2019-12-13)
### Features / Enhancement
- **Spiderfile Optimization**. Stages changed from dictionary to array. [#358](https://github.com/crawlab-team/crawlab/issues/358)
- **Baidu Tongji Update**.
### Bug Fixes
- **Unable to display schedule tasks**. [#353](https://github.com/crawlab-team/crawlab/issues/353)
- **Duplicate node registration**. [#334](https://github.com/crawlab-team/crawlab/issues/334)
# 0.4.0 (2019-12-06)
### Features / Enhancement
- **Configurable Spider**. Allow users to add spiders using *Spiderfile* to configure crawling rules.
- **Execution Mode**. Allow users to select 3 modes for task execution: *All Nodes*, *Selected Nodes* and *Random*.
### Bug Fixes
- **Task accidentally killed**. [#306](https://github.com/crawlab-team/crawlab/issues/306)
- **Documentation fix**. [#301](https://github.com/crawlab-team/crawlab/issues/258) [#301](https://github.com/crawlab-team/crawlab/issues/258)
- **Direct deploy incompatible with Windows**. [#288](https://github.com/crawlab-team/crawlab/issues/288)
- **Log files lost**. [#269](https://github.com/crawlab-team/crawlab/issues/269)
# 0.3.5 (2019-10-28)
### Features / Enhancement
- **Graceful Showdown**. [detail](https://github.com/crawlab-team/crawlab/commit/63fab3917b5a29fd9770f9f51f1572b9f0420385)

View File

@@ -0,0 +1,10 @@
package constants
const (
ScheduleStatusStop = "stop"
ScheduleStatusRunning = "running"
ScheduleStatusError = "error"
ScheduleStatusErrorNotFoundNode = "Not Found Node"
ScheduleStatusErrorNotFoundSpider = "Not Found Spider"
)

View File

@@ -4,10 +4,12 @@ import (
"context"
"crawlab/entity"
"crawlab/utils"
"errors"
"github.com/apex/log"
"github.com/gomodule/redigo/redis"
"github.com/spf13/viper"
"runtime/debug"
"strings"
"time"
)
@@ -17,9 +19,18 @@ type Redis struct {
pool *redis.Pool
}
type Mutex struct {
Name string
expiry time.Duration
tries int
delay time.Duration
value string
}
func NewRedisClient() *Redis {
return &Redis{pool: NewRedisPool()}
}
func (r *Redis) RPush(collection string, value interface{}) error {
c := r.pool.Get()
defer utils.Close(c)
@@ -143,3 +154,59 @@ func Sub(channel string, consume ConsumeFunc) error {
}
return nil
}
// 构建同步锁key
func (r *Redis) getLockKey(lockKey string) string {
lockKey = strings.ReplaceAll(lockKey, ":", "-")
return "nodes:lock:" + lockKey
}
// 获得锁
func (r *Redis) Lock(lockKey string) (int64, error) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)
ts := time.Now().Unix()
ok, err := c.Do("SET", lockKey, ts, "NX", "PX", 30000)
if err != nil {
log.Errorf("get lock fail with error: %s", err.Error())
debug.PrintStack()
return 0, err
}
if err == nil && ok == nil {
log.Errorf("the lockKey is locked: key=%s", lockKey)
return 0, errors.New("the lockKey is locked")
}
return ts, nil
}
func (r *Redis) UnLock(lockKey string, value int64) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)
getValue, err := redis.Int64(c.Do("GET", lockKey))
if err != nil {
log.Errorf("get lockKey error: %s", err.Error())
debug.PrintStack()
return
}
if getValue != value {
log.Errorf("the lockKey value diff: %d, %d", value, getValue)
return
}
v, err := redis.Int64(c.Do("DEL", lockKey))
if err != nil {
log.Errorf("unlock failed, error: %s", err.Error())
debug.PrintStack()
return
}
if v == 0 {
log.Errorf("unlock failed: key=%s", lockKey)
return
}
}

View File

@@ -5,7 +5,7 @@ type ConfigSpiderData struct {
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages map[string]Stage `yaml:"stages" json:"stages"`
Stages []Stage `yaml:"stages" json:"stages"`
Settings map[string]string `yaml:"settings" json:"settings"`
}

View File

@@ -154,17 +154,20 @@ func main() {
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
authGroup.PUT("/tasks", routes.PutTask) // 派发任务
authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务
authGroup.DELETE("/tasks_multiple", routes.DeleteMultipleTask) // 删除多个任务
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) //删除指定状态的任务
authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务
authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志
authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果
authGroup.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果
// 定时任务
authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表
authGroup.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情
authGroup.PUT("/schedules", routes.PutSchedule) // 创建定时任务
authGroup.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务
authGroup.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务
authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表
authGroup.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情
authGroup.PUT("/schedules", routes.PutSchedule) // 创建定时任务
authGroup.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务
authGroup.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务
authGroup.POST("/schedules/:id/stop", routes.StopSchedule) // 停止定时任务
authGroup.POST("/schedules/:id/run", routes.RunSchedule) // 运行定时任务
// 统计数据
authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据
// 用户

View File

@@ -15,16 +15,12 @@ func GetAllFields(data entity.ConfigSpiderData) []entity.Field {
func GetStartStageName(data entity.ConfigSpiderData) string {
// 如果 start_stage 设置了且在 stages 里,则返回
if data.StartStage != "" {
for stageName := range data.Stages {
if stageName == data.StartStage {
return data.StartStage
}
}
return data.StartStage
}
// 否则返回第一个 stage
for stageName := range data.Stages {
return stageName
for _, stage := range data.Stages {
return stage.Name
}
return ""
}

View File

@@ -83,7 +83,8 @@ func (g ScrapyGenerator) ProcessSpider() error {
// 替换 parsers
strParser := ""
for stageName, stage := range g.ConfigData.Stages {
for _, stage := range g.ConfigData.Stages {
stageName := stage.Name
stageStr := g.GetParserString(stageName, stage)
strParser += stageStr
}

View File

@@ -169,7 +169,7 @@ func GetNode(id bson.ObjectId) (Node, error) {
defer s.Close()
if err := c.FindId(id).One(&node); err != nil {
log.Errorf(err.Error())
log.Errorf("get node error: %s, id: %s", err.Error(), id.Hex())
debug.PrintStack()
return node, err
}

View File

@@ -12,19 +12,25 @@ import (
)
type Schedule struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
NodeKey string `json:"node_key" bson:"node_key"`
Cron string `json:"cron" bson:"cron"`
EntryId cron.EntryID `json:"entry_id" bson:"entry_id"`
Param string `json:"param" bson:"param"`
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
//NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
//NodeKey string `json:"node_key" bson:"node_key"`
Cron string `json:"cron" bson:"cron"`
EntryId cron.EntryID `json:"entry_id" bson:"entry_id"`
Param string `json:"param" bson:"param"`
RunType string `json:"run_type" bson:"run_type"`
NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"`
// 状态
Status string `json:"status" bson:"status"`
// 前端展示
SpiderName string `json:"spider_name" bson:"spider_name"`
NodeName string `json:"node_name" bson:"node_name"`
Message string `json:"message" bson:"message"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
@@ -46,26 +52,26 @@ func (sch *Schedule) Delete() error {
return c.RemoveId(sch.Id)
}
func (sch *Schedule) SyncNodeIdAndSpiderId(node Node, spider Spider) {
sch.syncNodeId(node)
sch.syncSpiderId(spider)
}
//func (sch *Schedule) SyncNodeIdAndSpiderId(node Node, spider Spider) {
// sch.syncNodeId(node)
// sch.syncSpiderId(spider)
//}
func (sch *Schedule) syncNodeId(node Node) {
if node.Id.Hex() == sch.NodeId.Hex() {
return
}
sch.NodeId = node.Id
_ = sch.Save()
}
//func (sch *Schedule) syncNodeId(node Node) {
// if node.Id.Hex() == sch.NodeId.Hex() {
// return
// }
// sch.NodeId = node.Id
// _ = sch.Save()
//}
func (sch *Schedule) syncSpiderId(spider Spider) {
if spider.Id.Hex() == sch.SpiderId.Hex() {
return
}
sch.SpiderId = spider.Id
_ = sch.Save()
}
//func (sch *Schedule) syncSpiderId(spider Spider) {
// if spider.Id.Hex() == sch.SpiderId.Hex() {
// return
// }
// sch.SpiderId = spider.Id
// _ = sch.Save()
//}
func GetScheduleList(filter interface{}) ([]Schedule, error) {
s, c := database.GetCol("schedules")
@@ -78,29 +84,31 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) {
var schs []Schedule
for _, schedule := range schedules {
// 获取节点名称
if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) {
// 选择所有节点
schedule.NodeName = "All Nodes"
} else {
// 选择单一节点
node, err := GetNode(schedule.NodeId)
if err != nil {
log.Errorf(err.Error())
continue
}
schedule.NodeName = node.Name
}
// TODO: 获取节点名称
//if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) {
// // 选择所有节点
// schedule.NodeName = "All Nodes"
//} else {
// // 选择单一节点
// node, err := GetNode(schedule.NodeId)
// if err != nil {
// schedule.Status = constants.ScheduleStatusError
// schedule.Message = constants.ScheduleStatusErrorNotFoundNode
// } else {
// schedule.NodeName = node.Name
// }
//}
// 获取爬虫名称
spider, err := GetSpider(schedule.SpiderId)
if err != nil && err == mgo.ErrNotFound {
log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error())
debug.PrintStack()
_ = schedule.Delete()
continue
schedule.Status = constants.ScheduleStatusError
schedule.Message = constants.ScheduleStatusErrorNotFoundSpider
} else {
schedule.SpiderName = spider.Name
}
schedule.SpiderName = spider.Name
schs = append(schs, schedule)
}
return schs, nil
@@ -125,12 +133,13 @@ func UpdateSchedule(id bson.ObjectId, item Schedule) error {
if err := c.FindId(id).One(&result); err != nil {
return err
}
node, err := GetNode(item.NodeId)
if err != nil {
return err
}
//node, err := GetNode(item.NodeId)
//if err != nil {
// return err
//}
item.NodeKey = node.Key
item.UpdateTs = time.Now()
//item.NodeKey = node.Key
if err := item.Save(); err != nil {
return err
}
@@ -141,15 +150,15 @@ func AddSchedule(item Schedule) error {
s, c := database.GetCol("schedules")
defer s.Close()
node, err := GetNode(item.NodeId)
if err != nil {
return err
}
//node, err := GetNode(item.NodeId)
//if err != nil {
// return err
//}
item.Id = bson.NewObjectId()
item.CreateTs = time.Now()
item.UpdateTs = time.Now()
item.NodeKey = node.Key
//item.NodeKey = node.Key
if err := c.Insert(&item); err != nil {
debug.PrintStack()

View File

@@ -319,11 +319,5 @@ func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) {
return configData, err
}
// 赋值 stage_name
for stageName, stage := range configData.Stages {
stage.Name = stageName
configData.Stages[stageName] = stage
}
return configData, nil
}

View File

@@ -61,6 +61,7 @@ func (t *Task) Save() error {
defer s.Close()
t.UpdateTs = time.Now()
if err := c.UpdateId(t.Id, t); err != nil {
log.Errorf("update task error: %s", err.Error())
debug.PrintStack()
return err
}
@@ -152,14 +153,13 @@ func GetTask(id string) (Task, error) {
var task Task
if err := c.FindId(id).One(&task); err != nil {
log.Infof("get task error: %s, id: %s", err.Error(), id)
debug.PrintStack()
return task, err
}
return task, nil
}
func AddTask(item Task) error {
s, c := database.GetCol("tasks")
defer s.Close()

View File

@@ -14,11 +14,7 @@ func GetScheduleList(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: results,
})
HandleSuccessData(c, results)
}
func GetSchedule(c *gin.Context) {
@@ -29,11 +25,8 @@ func GetSchedule(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
HandleSuccessData(c, result)
}
func PostSchedule(c *gin.Context) {
@@ -48,7 +41,7 @@ func PostSchedule(c *gin.Context) {
// 验证cron表达式
if err := services.ParserCron(newItem.Cron); err != nil {
HandleError(http.StatusOK, c, err)
HandleError(http.StatusInternalServerError, c, err)
return
}
@@ -65,10 +58,7 @@ func PostSchedule(c *gin.Context) {
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
func PutSchedule(c *gin.Context) {
@@ -82,7 +72,7 @@ func PutSchedule(c *gin.Context) {
// 验证cron表达式
if err := services.ParserCron(item.Cron); err != nil {
HandleError(http.StatusOK, c, err)
HandleError(http.StatusInternalServerError, c, err)
return
}
@@ -98,10 +88,7 @@ func PutSchedule(c *gin.Context) {
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
func DeleteSchedule(c *gin.Context) {
@@ -119,8 +106,25 @@ func DeleteSchedule(c *gin.Context) {
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
// 停止定时任务
func StopSchedule(c *gin.Context) {
id := c.Param("id")
if err := services.Sched.Stop(bson.ObjectIdHex(id)); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
HandleSuccess(c)
}
// 运行定时任务
func RunSchedule(c *gin.Context) {
id := c.Param("id")
if err := services.Sched.Run(bson.ObjectIdHex(id)); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
HandleSuccess(c)
}

View File

@@ -29,7 +29,7 @@ func GetTaskList(c *gin.Context) {
// 绑定数据
data := TaskListRequestData{}
if err := c.ShouldBindQuery(&data); err != nil {
HandleError(http.StatusBadRequest, c, err)
HandleError(http.StatusInternalServerError, c, err)
return
}
if data.PageNum == 0 {
@@ -82,11 +82,7 @@ func GetTask(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
HandleSuccessData(c, result)
}
func PutTask(c *gin.Context) {
@@ -100,7 +96,7 @@ func PutTask(c *gin.Context) {
// 绑定数据
var reqBody TaskRequestBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
HandleError(http.StatusInternalServerError, c, err)
return
}
@@ -123,7 +119,6 @@ func PutTask(c *gin.Context) {
return
}
}
} else if reqBody.RunType == constants.RunTypeRandom {
// 随机
t := model.Task{
@@ -134,7 +129,6 @@ func PutTask(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
} else if reqBody.RunType == constants.RunTypeSelectedNodes {
// 指定节点
for _, nodeId := range reqBody.NodeIds {
@@ -149,16 +143,11 @@ func PutTask(c *gin.Context) {
return
}
}
} else {
HandleErrorF(http.StatusBadRequest, c, "invalid run_type")
HandleErrorF(http.StatusInternalServerError, c, "invalid run_type")
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
func DeleteTaskByStatus(c *gin.Context) {
@@ -176,12 +165,31 @@ func DeleteTaskByStatus(c *gin.Context) {
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
// 删除多个任务
func DeleteMultipleTask(c *gin.Context) {
ids := make(map[string][]string)
if err := c.ShouldBindJSON(&ids); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
list := ids["ids"]
for _, id := range list {
if err := services.RemoveLogByTaskId(id); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
if err := model.RemoveTask(id); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
}
HandleSuccess(c)
}
// 删除单个任务
func DeleteTask(c *gin.Context) {
id := c.Param("id")
@@ -190,33 +198,22 @@ func DeleteTask(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 删除task
if err := model.RemoveTask(id); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}
func GetTaskLog(c *gin.Context) {
id := c.Param("id")
logStr, err := services.GetTaskLog(id)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: logStr,
})
HandleSuccessData(c, logStr)
}
func GetTaskResults(c *gin.Context) {
@@ -225,7 +222,7 @@ func GetTaskResults(c *gin.Context) {
// 绑定数据
data := TaskResultsRequestData{}
if err := c.ShouldBindQuery(&data); err != nil {
HandleError(http.StatusBadRequest, c, err)
HandleError(http.StatusInternalServerError, c, err)
return
}
@@ -327,9 +324,5 @@ func CancelTask(c *gin.Context) {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
HandleSuccess(c)
}

View File

@@ -1,17 +1,15 @@
package routes
import (
"github.com/apex/log"
"github.com/gin-gonic/gin"
"net/http"
"runtime/debug"
)
func HandleError(statusCode int, c *gin.Context, err error) {
log.Errorf("handle error:" + err.Error())
debug.PrintStack()
c.AbortWithStatusJSON(statusCode, Response{
Status: "ok",
Message: "error",
Status: "error",
Message: "failure",
Error: err.Error(),
})
}
@@ -24,3 +22,18 @@ func HandleErrorF(statusCode int, c *gin.Context, err string) {
Error: err,
})
}
func HandleSuccess(c *gin.Context) {
c.AbortWithStatusJSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func HandleSuccessData(c *gin.Context, data interface{}) {
c.AbortWithStatusJSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: data,
})
}

View File

@@ -61,7 +61,9 @@ func ValidateSpiderfile(configData entity.ConfigSpiderData) error {
// 校验stages
dict := map[string]int{}
for stageName, stage := range configData.Stages {
for _, stage := range configData.Stages {
stageName := stage.Name
// stage 名称不能为空
if stageName == "" {
return errors.New("spiderfile invalid: stage name is empty")
@@ -152,12 +154,6 @@ func IsUniqueConfigSpiderFields(fields []entity.Field) bool {
func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
spiderDir := spider.Src
// 赋值 stage_name
for stageName, stage := range configData.Stages {
stage.Name = stageName
configData.Stages[stageName] = stage
}
// 删除已有的爬虫文件
for _, fInfo := range utils.ListDir(spiderDir) {
// 不删除Spiderfile

View File

@@ -95,19 +95,17 @@ func UpdateNodeStatus() {
}
func handleNodeInfo(key string, data Data) {
// 添加同步锁
v, err := database.RedisClient.Lock(key)
if err != nil {
return
}
defer database.RedisClient.UnLock(key, v)
// 更新节点信息到数据库
s, c := database.GetCol("nodes")
defer s.Close()
// 同个key可能因为并发被注册多次
var nodes []model.Node
_ = c.Find(bson.M{"key": key}).All(&nodes)
if len(nodes) > 1 {
for _, node := range nodes {
_ = c.RemoveId(node.Id)
}
}
var node model.Node
if err := c.Find(bson.M{"key": key}).One(&node); err != nil {
// 数据库不存在该节点

View File

@@ -4,8 +4,10 @@ import (
"crawlab/constants"
"crawlab/lib/cron"
"crawlab/model"
"errors"
"github.com/apex/log"
"github.com/satori/go.uuid"
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"runtime/debug"
)
@@ -17,48 +19,87 @@ type Scheduler struct {
func AddScheduleTask(s model.Schedule) func() {
return func() {
node, err := model.GetNodeByKey(s.NodeKey)
if err != nil || node.Id.Hex() == "" {
log.Errorf("get node by key error: %s", err.Error())
debug.PrintStack()
return
}
spider := model.GetSpiderByName(s.SpiderName)
if spider == nil || spider.Id.Hex() == "" {
log.Errorf("get spider by name error: %s", err.Error())
debug.PrintStack()
return
}
// 同步ID到定时任务
s.SyncNodeIdAndSpiderId(node, *spider)
// 生成任务ID
id := uuid.NewV4()
// 生成任务模型
t := model.Task{
Id: id.String(),
SpiderId: spider.Id,
NodeId: node.Id,
Status: constants.StatusPending,
Param: s.Param,
}
if s.RunType == constants.RunTypeAllNodes {
// 所有节点
nodes, err := model.GetNodeList(nil)
if err != nil {
return
}
for _, node := range nodes {
t := model.Task{
Id: id.String(),
SpiderId: s.SpiderId,
NodeId: node.Id,
Param: s.Param,
}
// 将任务存入数据库
if err := model.AddTask(t); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
if err := AddTask(t); err != nil {
return
}
if err := AssignTask(t); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
}
} else if s.RunType == constants.RunTypeRandom {
// 随机
t := model.Task{
Id: id.String(),
SpiderId: s.SpiderId,
Param: s.Param,
}
if err := AddTask(t); err != nil {
return
}
if err := AssignTask(t); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
} else if s.RunType == constants.RunTypeSelectedNodes {
// 指定节点
for _, nodeId := range s.NodeIds {
t := model.Task{
Id: id.String(),
SpiderId: s.SpiderId,
NodeId: nodeId,
Param: s.Param,
}
if err := AddTask(t); err != nil {
return
}
if err := AssignTask(t); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
}
} else {
return
}
// 加入任务队列
if err := AssignTask(t); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
//node, err := model.GetNodeByKey(s.NodeKey)
//if err != nil || node.Id.Hex() == "" {
// log.Errorf("get node by key error: %s", err.Error())
// debug.PrintStack()
// return
//}
//
//spider := model.GetSpiderByName(s.SpiderName)
//if spider == nil || spider.Id.Hex() == "" {
// log.Errorf("get spider by name error: %s", err.Error())
// debug.PrintStack()
// return
//}
//
//// 同步ID到定时任务
//s.SyncNodeIdAndSpiderId(node, *spider)
}
}
@@ -106,6 +147,7 @@ func (s *Scheduler) AddJob(job model.Schedule) error {
// 更新EntryID
job.EntryId = eid
job.Status = constants.ScheduleStatusRunning
if err := job.Save(); err != nil {
log.Errorf("job save error: %s", err.Error())
debug.PrintStack()
@@ -134,6 +176,36 @@ func ParserCron(spec string) error {
return nil
}
// 停止定时任务
func (s *Scheduler) Stop(id bson.ObjectId) error {
schedule, err := model.GetSchedule(id)
if err != nil {
return err
}
if schedule.EntryId == 0 {
return errors.New("entry id not found")
}
s.cron.Remove(schedule.EntryId)
// 更新状态
schedule.Status = constants.ScheduleStatusStop
if err = schedule.Save(); err != nil {
return err
}
return nil
}
// 运行任务
func (s *Scheduler) Run(id bson.ObjectId) error {
schedule, err := model.GetSchedule(id)
if err != nil {
return err
}
if err := s.AddJob(schedule); err != nil {
return err
}
return nil
}
func (s *Scheduler) Update() error {
// 删除所有定时任务
s.RemoveAll()
@@ -151,6 +223,10 @@ func (s *Scheduler) Update() error {
// 单个任务
job := sList[i]
if job.Status == constants.ScheduleStatusStop {
continue
}
// 添加到定时任务
if err := s.AddJob(job); err != nil {
log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), job.Name, job.Cron)

View File

@@ -418,15 +418,23 @@ func ExecuteTask(id int) {
t.Status = constants.StatusRunning // 任务状态
t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长
// 判断爬虫文件是否存在
gfFile := model.GetGridFs(spider.FileId)
if gfFile == nil {
t.Error = "找不到爬虫文件,请重新上传"
t.Status = constants.StatusError
t.FinishTs = time.Now() // 结束时间
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
_ = t.Save()
return
}
// 开始执行任务
log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")")
// 储存任务
if err := t.Save(); err != nil {
log.Errorf(err.Error())
HandleTaskError(t, err)
return
}
_ = t.Save()
// 起一个cron执行器来统计任务结果数
if spider.Col != "" {

View File

@@ -4,17 +4,17 @@ start_url: "http://news.163.com/special/0001386F/rank_news.html"
start_stage: "list"
engine: "scrapy"
stages:
list:
is_list: true
list_css: "table tr:not(:first-child)"
fields:
- name: "title"
css: "td:nth-child(1) > a"
- name: "url"
css: "td:nth-child(1) > a"
attr: "href"
- name: "clicks"
css: "td.cBlue"
- name: list
is_list: true
list_css: "table tr:not(:first-child)"
fields:
- name: "title"
css: "td:nth-child(1) > a"
- name: "url"
css: "td:nth-child(1) > a"
attr: "href"
- name: "clicks"
css: "td.cBlue"
settings:
ROBOTSTXT_OBEY: false
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36

View File

@@ -4,19 +4,19 @@ start_url: http://www.baidu.com/s?wd=crawlab
start_stage: list
engine: scrapy
stages:
list:
is_list: true
list_xpath: //*[contains(@class, "c-container")]
page_xpath: //*[@id="page"]//a[@class="n"][last()]
page_attr: href
fields:
- name: title
xpath: .//h3/a
- name: url
xpath: .//h3/a
attr: href
- name: abstract
xpath: .//*[@class="c-abstract"]
- name: list
is_list: true
list_xpath: //*[contains(@class, "c-container")]
page_xpath: //*[@id="page"]//a[@class="n"][last()]
page_attr: href
fields:
- name: title
xpath: .//h3/a
- name: url
xpath: .//h3/a
attr: href
- name: abstract
xpath: .//*[@class="c-abstract"]
settings:
ROBOTSTXT_OBEY: false
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36

View File

@@ -4,25 +4,25 @@ start_url: "http://books.toscrape.com"
start_stage: "list"
engine: "scrapy"
stages:
list:
is_list: true
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href"
fields:
- name: "title"
css: "h3 > a"
- name: "url"
css: "h3 > a"
attr: "href"
next_stage: "detail"
- name: "price"
css: ".product_price > .price_color"
detail:
is_list: false
fields:
- name: "description"
css: "#product_description + p"
- name: list
is_list: true
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href"
fields:
- name: "title"
css: "h3 > a"
- name: "url"
css: "h3 > a"
attr: "href"
next_stage: "detail"
- name: "price"
css: ".product_price > .price_color"
- name: detail
is_list: false
fields:
- name: "description"
css: "#product_description + p"
settings:
ROBOTSTXT_OBEY: true
AUTOTHROTTLE_ENABLED: true

View File

@@ -1,6 +1,6 @@
{
"name": "crawlab",
"version": "0.3.5",
"version": "0.4.1",
"private": true,
"scripts": {
"serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development",

View File

@@ -30,10 +30,10 @@ export default {
document.querySelector('.el-message__closeBtn').click()
if (value === 1) {
this.$st.sendPv('/allow_stats')
this.$st.sendEv('全局', '允许/禁止统计', 'value', 'allow')
this.$st.sendEv('全局', '允许/禁止统计', '允许')
} else {
this.$st.sendPv('/disallow_stats')
this.$st.sendEv('全局', '允许/禁止统计', 'value', 'disallow')
this.$st.sendEv('全局', '允许/禁止统计', '禁止')
}
}

View File

@@ -1,53 +1,39 @@
import axios from 'axios'
import router from '../router'
import { Message } from 'element-ui'
let baseUrl = process.env.VUE_APP_BASE_URL ? process.env.VUE_APP_BASE_URL : 'http://localhost:8000'
const request = async (method, path, params, data, others = {}) => {
try {
const url = baseUrl + path
const headers = {
'Authorization': window.localStorage.getItem('token')
const request = (method, path, params, data, others = {}) => {
const url = baseUrl + path
const headers = {
'Authorization': window.localStorage.getItem('token')
}
return axios({
method,
url,
params,
data,
headers,
...others
}).then((response) => {
if (response.status === 200) {
return Promise.resolve(response)
}
const response = await axios({
method,
url,
params,
data,
headers,
...others
})
// console.log(response)
return response
} catch (e) {
if (e.response.status === 401 && router.currentRoute.path !== '/login') {
return Promise.reject(response)
}).catch((e) => {
let response = e.response
if (response.status === 400) {
Message.error(response.data.error)
}
if (response.status === 401 && router.currentRoute.path !== '/login') {
console.log('login')
router.push('/login')
}
await Promise.reject(e)
}
// return new Promise((resolve, reject) => {
// const url = baseUrl + path
// const headers = {
// 'Authorization': window.localStorage.getItem('token')
// }
// axios({
// method,
// url,
// params,
// data,
// headers,
// ...others
// })
// .then(resolve)
// .catch(error => {
// console.log(error)
// if (error.response.status === 401) {
// router.push('/login')
// }
// reject(error)
// })
// })
if (response.status === 500) {
Message.error(response.data.error)
}
})
}
const get = (path, params) => {
@@ -63,7 +49,7 @@ const put = (path, data) => {
}
const del = (path, data) => {
return request('DELETE', path)
return request('DELETE', path, {}, data)
}
export default {

View File

@@ -80,7 +80,7 @@ export default {
this.$message.success(this.$t('A task has been scheduled successfully'))
})
this.$emit('close')
this.$st.sendEv('爬虫', '运行')
this.$st.sendEv('爬虫确认', '确认运行', this.form.runType)
})
}
},

View File

@@ -68,9 +68,10 @@
v-model="spiderForm.config.start_stage"
:placeholder="$t('Start Stage')"
:class="startStageClass"
@change="$st.sendEv('爬虫详情', '配置', '改变起始阶段')"
>
<el-option
v-for="n in Object.keys(spiderForm.config.stages)"
v-for="n in spiderForm.config.stages.map(s => s.name)"
:key="n"
:value="n"
:label="n"
@@ -133,9 +134,9 @@
:value="activeNames"
>
<el-collapse-item
v-for="(stage, stageName) in spiderForm.config.stages"
:key="stageName"
:name="stageName"
v-for="(stage, index) in spiderForm.config.stages"
:key="index"
:name="stage.name"
>
<template slot="title">
<ul class="stage-list">
@@ -271,7 +272,7 @@
title="List Page Fields"
:fields="stage.fields"
:stage="stage"
:stage-names="Object.keys(spiderForm.config.stages)"
:stage-names="spiderForm.config.stages.map(s => s.name)"
/>
</el-collapse-item>
</el-collapse>
@@ -397,7 +398,7 @@ export default {
},
isCss () {
let i = 0
Object.values(this.spiderForm.config.stages).forEach(stage => {
this.spiderForm.config.stages.forEach(stage => {
stage.fields.forEach(field => {
if (!field.css) i++
})
@@ -406,7 +407,7 @@ export default {
},
isXpath () {
let i = 0
Object.values(this.spiderForm.config.stages).forEach(stage => {
this.spiderForm.config.stages.forEach(stage => {
stage.fields.forEach(field => {
if (!field.xpath) i++
})
@@ -414,7 +415,7 @@ export default {
return i === 0
},
activeNames () {
return Object.values(this.spiderForm.config.stages).map(d => d.name)
return this.spiderForm.config.stages.map(d => d.name)
},
startUrlClass () {
if (!this.spiderForm.config.start_url) {
@@ -464,7 +465,7 @@ export default {
// 加入剩余阶段
i = 0
const restStages = Object.values(this.spiderForm.config.stages)
const restStages = this.spiderForm.config.stages
.filter(stage => !allStageNames.has(stage.name))
restStages.forEach(stage => {
// 加入节点信息
@@ -479,17 +480,10 @@ export default {
})
return nodes
// const stages = Object.values(this.spiderForm.config.stages)
// return stages.map((stage, i) => {
// return {
// name: stage.name,
// ...stage
// }
// })
},
stageEdges () {
const edges = []
const stages = Object.values(this.spiderForm.config.stages)
const stages = this.spiderForm.config.stages
stages.forEach(stage => {
for (let i = 0; i < stage.fields.length; i++) {
const field = stage.fields[i]
@@ -509,15 +503,20 @@ export default {
this.spiderForm.crawl_type = value
},
async onSave () {
this.$st.sendEv('爬虫详情-配置', '保存')
this.$st.sendEv('爬虫详情', '配置', '保存')
this.saveLoading = true
try {
await this.$store.dispatch('spider/postConfigSpiderConfig')
this.$message.success(this.$t('Spider info has been saved successfully'))
const res = await this.$store.dispatch('spider/postConfigSpiderConfig')
if (!res.data.error) {
this.$message.success(this.$t('Spider info has been saved successfully'))
return true
}
return false
} catch (e) {
this.$message.error(this.$t('Something wrong happened'))
return false
} finally {
this.saveLoading = false
}
this.saveLoading = false
},
onDialogClose () {
this.dialogVisible = false
@@ -544,15 +543,17 @@ export default {
.finally(() => {
this.previewLoading = false
})
this.$st.sendEv('爬虫详情-配置', '预览')
this.$st.sendEv('爬虫详情', '配置', '预览')
})
}
})
},
async onCrawl () {
await this.onSave()
this.crawlConfirmDialogVisible = true
this.$st.sendEv('爬虫详情-配置', '点击运行')
this.$st.sendEv('爬虫详情', '配置', '点击运行')
const res = await this.onSave()
if (res) {
this.crawlConfirmDialogVisible = true
}
},
onExtractFields () {
this.$refs['form'].validate(res => {
@@ -580,7 +581,7 @@ export default {
.finally(() => {
this.extractFieldsLoading = false
})
this.$st.sendEv('爬虫详情-配置', '提取字段')
this.$st.sendEv('爬虫详情', '配置', '提取字段')
})
}
})
@@ -595,7 +596,8 @@ export default {
return value
},
onClickSelectorType (selectorType) {
Object.values(this.spiderForm.config.stages).forEach(stage => {
this.$st.sendEv('爬虫详情', '配置', `点击阶段选择器类别-${selectorType}`)
this.spiderForm.config.stages.forEach(stage => {
// 列表
if (selectorType === 'css') {
if (stage.list_xpath) stage.list_xpath = ''
@@ -627,40 +629,29 @@ export default {
})
},
onStageNameFocus (ev) {
console.log(ev)
ev.stopPropagation()
// ev.preventDefault()
},
onEditStage (stage) {
this.$st.sendEv('爬虫详情', '配置', '更改阶段名称')
this.$set(stage, 'isEdit', !stage.isEdit)
setTimeout(() => {
this.$refs[`stage-name-${stage.name}`][0].focus()
}, 0)
},
onCopyStage (stage) {
this.$st.sendEv('爬虫详情', '配置', '复制阶段')
const stages = this.spiderForm.config.stages
const ts = Math.floor(new Date().getTime()).toString()
const newStage = JSON.parse(JSON.stringify(stage))
newStage.name = `stage_${ts}`
this.$set(this.spiderForm.config.stages, newStage.name, newStage)
},
onRemoveStage (stage) {
const stages = JSON.parse(JSON.stringify(this.spiderForm.config.stages))
delete stages[stage.name]
this.$set(this.spiderForm.config, 'stages', stages)
if (Object.keys(stages).length === 0) {
this.onAddStage()
newStage.name = `${stage.name}_copy_${ts}`
for (let i = 0; i < stages.length; i++) {
if (stage.name === stages[i].name) {
stages.splice(i + 1, 0, newStage)
}
}
// 重置next_stage被设置为该stage的field
Object.values(this.spiderForm.config.stages).forEach(_stage => {
_stage.fields.forEach(field => {
if (field.next_stage === stage.name) {
this.$set(field, 'next_stage', '')
}
})
})
},
onAddStage (stage) {
const stages = JSON.parse(JSON.stringify(this.spiderForm.config.stages))
addStage (index) {
const stages = this.spiderForm.config.stages
const ts = Math.floor(new Date().getTime()).toString()
const newStageName = `stage_${ts}`
const newField = { name: `field_${ts}`, next_stage: '' }
@@ -671,15 +662,46 @@ export default {
} else {
newField['xpath'] = '//body'
}
stages[newStageName] = {
stages.splice(index + 1, 0, {
name: newStageName,
list_css: this.isCss ? 'body' : '',
list_xpath: this.isXpath ? '//body' : '',
page_css: '',
page_xpath: '',
fields: [newField]
})
},
onRemoveStage (stage) {
this.$st.sendEv('爬虫详情', '配置', '删除阶段')
const stages = this.spiderForm.config.stages
for (let i = 0; i < stages.length; i++) {
if (stage.name === stages[i].name) {
stages.splice(i, 1)
break
}
}
// 如果只剩一个stage加入新的stage
if (stages.length === 0) {
this.addStage(0)
}
// 重置next_stage被设置为该stage的field
stages.forEach(_stage => {
_stage.fields.forEach(field => {
if (field.next_stage === stage.name) {
this.$set(field, 'next_stage', '')
}
})
})
},
onAddStage (stage) {
this.$st.sendEv('爬虫详情', '配置', '添加阶段')
const stages = this.spiderForm.config.stages
for (let i = 0; i < stages.length; i++) {
if (stage.name === stages[i].name) {
this.addStage(i)
break
}
}
this.$set(this.spiderForm.config, 'stages', stages)
},
renderProcessChart () {
const option = {
@@ -797,10 +819,12 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
},
onCheckIsList (value, stage) {
if (value) {
this.$st('爬虫详情', '配置', '勾选列表页')
if (!stage.list_css && !stage.list_xpath) {
stage.list_xpath = '//body'
}
} else {
this.$st('爬虫详情', '配置', '取消勾选列表页')
stage.list_css = ''
stage.list_xpath = ''
}
@@ -822,10 +846,12 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
},
onCheckIsPage (value, stage) {
if (value) {
this.$st('爬虫详情', '配置', '勾选分页')
if (!stage.page_css && !stage.page_xpath) {
stage.page_xpath = '//body'
}
} else {
this.$st('爬虫详情', '配置', '取消勾选分页')
stage.page_css = ''
stage.page_xpath = ''
}
@@ -852,7 +878,7 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
}
},
mounted () {
this.activeNames = Object.keys(this.spiderForm.config.stages)
this.activeNames = this.spiderForm.config.stages.map(stage => stage.name)
}
}
</script>

View File

@@ -49,11 +49,11 @@ export default {
name: '',
value: ''
})
this.$st.sendEv('爬虫详情-环境', '添加')
this.$st.sendEv('爬虫详情', '环境', '添加')
},
deleteEnv (index) {
this.spiderForm.envs.splice(index, 1)
this.$st.sendEv('爬虫详情-环境', '删除')
this.$st.sendEv('爬虫详情', '环境', '删除')
},
save () {
this.$store.dispatch('spider/editSpider')
@@ -63,7 +63,7 @@ export default {
.catch(error => {
this.$message.error(error)
})
this.$st.sendEv('爬虫详情-环境', '保存')
this.$st.sendEv('爬虫详情', '环境', '保存')
}
}
}

View File

@@ -112,7 +112,7 @@ export default {
methods: {
onCrawl () {
this.crawlConfirmDialogVisible = true
this.$st.sendEv('爬虫详情-概览', '点击运行')
this.$st.sendEv('爬虫详情', '概览', '点击运行')
},
onSave () {
this.$refs['spiderForm'].validate(res => {
@@ -126,7 +126,7 @@ export default {
})
}
})
this.$st.sendEv('爬虫详情-概览', '保存')
this.$st.sendEv('爬虫详情', '概览', '保存')
},
fetchSiteSuggestions (keyword, callback) {
this.$request.get('/sites', {

View File

@@ -52,11 +52,11 @@ export default {
methods: {
onClickNodeTitle () {
this.$router.push(`/nodes/${this.nodeForm._id}`)
this.$st.sendEv('任务详情-概览', '点击节点详情')
this.$st.sendEv('任务详情', '概览', '点击节点详情')
},
onClickSpiderTitle () {
this.$router.push(`/spiders/${this.spiderForm._id}`)
this.$st.sendEv('任务详情-概览', '点击爬虫详情')
this.$st.sendEv('任务详情', '概览', '点击爬虫详情')
}
},
created () {

View File

@@ -1,11 +1,5 @@
<template>
<div class="fields-table-view">
<!-- <el-row class="button-group-container">-->
<!-- <label class="title">{{$t(this.title)}}</label>-->
<!-- <div class="button-group">-->
<!-- <el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>-->
<!-- </div>-->
<!-- </el-row>-->
<el-row>
<el-table :data="fields"
class="table edit"
@@ -171,32 +165,14 @@ export default {
}
},
methods: {
addField () {
this.fields.push({
type: 'css',
extract_type: 'text'
})
this.$st.sendEv('爬虫详情-配置', '添加字段')
},
deleteField (index) {
this.fields.splice(index, 1)
this.$st.sendEv('爬虫详情-配置', '删除字段')
},
onNameChange (row) {
if (this.fields.filter(d => d.name === row.name).length > 1) {
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
}
this.$st.sendEv('爬虫详情-配置', '更改字段')
},
onCheck (row) {
this.fields.forEach(d => {
if (row.name !== d.name) {
this.$set(d, 'is_detail', false)
}
})
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
this.$st.sendEv('爬虫详情', '配置', '更改字段')
},
onClickSelectorType (row, selectorType) {
this.$st.sendEv('爬虫详情', '配置', `点击字段选择器类别-${selectorType}`)
if (selectorType === 'css') {
if (row.xpath) this.$set(row, 'xpath', '')
if (!row.css) this.$set(row, 'css', 'body')
@@ -206,6 +182,7 @@ export default {
}
},
onClickIsAttribute (row, isAttribute) {
this.$st.sendEv('爬虫详情', '配置', '设置字段属性')
if (!isAttribute) {
// 文本
if (row.attr) this.$set(row, 'attr', '')
@@ -224,6 +201,7 @@ export default {
}
},
onRemoveField (row) {
this.$st.sendEv('爬虫详情', '配置', '删除字段')
for (let i = 0; i < this.fields.length; i++) {
if (row.name === this.fields[i].name) {
this.fields.splice(i, 1)
@@ -238,6 +216,7 @@ export default {
}
},
onAddField (row) {
this.$st.sendEv('爬虫详情', '配置', '添加字段')
for (let i = 0; i < this.fields.length; i++) {
if (row.name === this.fields[i].name) {
this.fields.splice(i + 1, 0, {

View File

@@ -77,51 +77,14 @@ export default {
}
},
methods: {
addField () {
this.list.push({
type: 'css',
extract_type: 'text'
})
this.$st.sendEv('爬虫详情-配置', '添加字段')
},
deleteField (index) {
this.list.splice(index, 1)
this.$st.sendEv('爬虫详情-配置', '删除字段')
},
onChange (row) {
if (this.list.filter(d => d.name === row.name).length > 1) {
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
}
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', this.list)
this.$st.sendEv('爬虫详情-配置', '更改字段')
},
onCheck (row) {
this.list.forEach(d => {
if (row.name !== d.name) {
this.$set(d, 'is_detail', false)
}
})
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
},
onClickSelectorType (row, selectorType) {
if (selectorType === 'css') {
if (row.xpath) this.$set(row, 'xpath', '')
if (!row.css) this.$set(row, 'css', 'body')
} else {
if (row.css) this.$set(row, 'css', '')
if (!row.xpath) this.$set(row, 'xpath', '//body')
}
},
onClickIsAttribute (row, isAttribute) {
if (!isAttribute) {
// 文本
if (row.attr) this.$set(row, 'attr', '')
} else {
// 属性
if (!row.attr) this.$set(row, 'attr', 'href')
}
},
onRemoveField (row) {
this.$st.sendEv('爬虫详情', '配置', '删除设置')
const list = JSON.parse(JSON.stringify(this.list))
for (let i = 0; i < list.length; i++) {
if (row.name === list[i].name) {
@@ -137,6 +100,7 @@ export default {
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', list)
},
onAddField (row) {
this.$st.sendEv('爬虫详情', '配置', '添加设置')
const list = JSON.parse(JSON.stringify(this.list))
for (let i = 0; i < list.length; i++) {
if (row.name === list[i].name) {

View File

@@ -212,6 +212,11 @@ export default {
'Schedule Description': '定时任务描述',
'Parameters': '参数',
'Add Schedule': '添加定时任务',
'stop': '暂停',
'running': '运行',
'error': '错误',
'Not Found Node': '节点配置错误',
'Not Found Spider': '爬虫配置错误',
// 网站
'Site': '网站',
@@ -254,7 +259,7 @@ export default {
'Executables': '执行文件',
// 弹出框
Notification: '提示',
'Notification': '提示',
'Are you sure to delete this node?': '你确定要删除该节点?',
'Are you sure to run this spider?': '你确定要运行该爬虫?',
'Node info has been saved successfully': '节点信息已成功保存',
@@ -274,6 +279,7 @@ export default {
'Saved successfully': '成功保存',
'Please zip your spider files from the root directory': '爬虫文件请从根目录下开始压缩。',
'English': 'English',
'Are you sure to delete the schedule task?': '确定删除定时任务?',
// 登录
'Sign in': '登录',
'Sign-in': '登录',

View File

@@ -1,8 +1,9 @@
import request from '../../api/request'
const state = {
scheduleList: [],
scheduleForm: {}
scheduleForm: {
node_ids: []
}
}
const getters = {}
@@ -31,6 +32,12 @@ const actions = {
},
removeSchedule ({ state }, id) {
request.delete(`/schedules/${id}`)
},
stopSchedule ({ state, dispatch }, id) {
return request.post(`/schedules/${id}/stop`)
},
runSchedule ({ state, dispatch }, id) {
return request.post(`/schedules/${id}/run`)
}
}

View File

@@ -1,6 +1,5 @@
import Vue from 'vue'
import request from '../../api/request'
import axisModelCommonMixin from 'echarts/src/coord/axisModelCommonMixin'
const state = {
// list of spiders

View File

@@ -102,6 +102,11 @@ const actions = {
dispatch('getTaskList')
})
},
deleteTaskMultiple ({ state }, ids) {
return request.delete(`/tasks_multiple`, {
ids: ids
})
},
getTaskLog ({ state, commit }, id) {
commit('SET_TASK_LOG', '')
return request.get(`/tasks/${id}/log`)

View File

@@ -8,9 +8,9 @@
<i class="el-icon-arrow-down el-icon--right"></i>
</span>
<el-dropdown-menu slot="dropdown" class="user-dropdown">
<el-dropdown-item>
<span style="display:block;">v0.3.5</span>
</el-dropdown-item>
<!-- <el-dropdown-item>-->
<!-- <span style="display:block;">v0.4.1</span>-->
<!-- </el-dropdown-item>-->
<el-dropdown-item>
<span style="display:block;" @click="logout">{{$t('Logout')}}</span>
</el-dropdown-item>
@@ -74,7 +74,7 @@ export default {
this.$i18n.locale = lang
this.$store.commit('lang/SET_LANG', lang)
this.$st.sendEv('全局', '切换中英文', 'lang', lang)
this.$st.sendEv('全局', '切换中英文', lang)
}
}
}

View File

@@ -1,7 +1,7 @@
<template>
<el-scrollbar wrap-class="scrollbar-wrapper">
<div class="sidebar-logo" :class="isCollapse ? 'collapsed' : ''">
<span>C</span><span v-show="!isCollapse">rawlab</span>
<span>C</span><span v-show="!isCollapse">rawlab<span class="version">v{{version}}</span></span>
</div>
<el-menu
:show-timeout="200"
@@ -48,6 +48,11 @@ export default {
isCollapse () {
return !this.sidebar.opened
}
},
data () {
return {
version: '0.4.1'
}
}
}
</script>
@@ -73,4 +78,10 @@ export default {
.sidebar-container .sidebar-logo.collapsed {
padding-left: 8px;
}
.sidebar-container .sidebar-logo .version {
margin-left: 5px;
font-weight: normal;
font-size: 12px;
}
</style>

View File

@@ -202,7 +202,7 @@ export default {
},
onRefresh () {
this.$store.dispatch('node/getNodeList')
this.$st.sendEv('节点', '刷新')
this.$st.sendEv('节点列表', '刷新')
},
onSubmit () {
const vm = this
@@ -246,13 +246,13 @@ export default {
message: 'Deleted successfully'
})
})
this.$st.sendEv('节点', '删除', 'id', row._id)
this.$st.sendEv('节点列表', '删除节点')
})
},
onView (row) {
this.$router.push(`/nodes/${row._id}`)
this.$st.sendEv('节点', '查看', 'id', row._id)
this.$st.sendEv('节点列表', '查看节点')
},
onPageChange () {
this.$store.dispatch('node/getNodeList')

View File

@@ -14,9 +14,15 @@
<el-form-item :label="$t('Schedule Name')" prop="name" required>
<el-input v-model="scheduleForm.name" :placeholder="$t('Schedule Name')"></el-input>
</el-form-item>
<el-form-item :label="$t('Node')" prop="node_id" required>
<el-select v-model="scheduleForm.node_id">
<!--<el-option :label="$t('All Nodes')" value="000000000000000000000000"></el-option>-->
<el-form-item :label="$t('Run Type')" prop="run_type" required>
<el-select v-model="scheduleForm.run_type" :placeholder="$t('Run Type')">
<el-option value="all-nodes" :label="$t('All Nodes')"/>
<el-option value="selected-nodes" :label="$t('Selected Nodes')"/>
<el-option value="random" :label="$t('Random')"/>
</el-select>
</el-form-item>
<el-form-item v-if="scheduleForm.run_type === 'selected-nodes'" :label="$t('Nodes')" prop="node_ids" required>
<el-select v-model="scheduleForm.node_ids" :placeholder="$t('Nodes')" multiple filterable>
<el-option
v-for="op in nodeList"
:key="op._id"
@@ -27,30 +33,19 @@
</el-select>
</el-form-item>
<el-form-item :label="$t('Spider')" prop="spider_id" required>
<el-select v-model="scheduleForm.spider_id" filterable>
<el-select v-model="scheduleForm.spider_id" :placeholder="$t('Spider')" filterable>
<el-option
v-for="op in spiderList"
:key="op._id"
:value="op._id"
:label="op.name"
:disabled="!op.cmd"
:label="`${op.display_name} (${op.name})`"
:disabled="isDisabledSpider(op)"
>
</el-option>
</el-select>
</el-form-item>
<!--:rules="cronRules"-->
<el-form-item :label="$t('schedules.cron')" prop="cron" required>
<!--<template slot="label">-->
<!--<el-tooltip :content="$t('schedules.cron_format')"-->
<!--placement="top">-->
<!--<span>-->
<!--{{$t('schedules.cron')}}-->
<!--<i class="fa fa-exclamation-circle"></i>-->
<!--</span>-->
<!--</el-tooltip>-->
<!--</template>-->
<el-input style="padding-right:10px"
v-model="scheduleForm.cron"
<el-form-item :label="$t('Cron')" prop="cron" required>
<el-input v-model="scheduleForm.cron"
:placeholder="$t('schedules.cron')">
</el-input>
<!--<el-button size="small" style="width:100px" type="primary" @click="onShowCronDialog">{{$t('schedules.add_cron')}}</el-button>-->
@@ -79,7 +74,7 @@
<!--cron generation popup-->
<!--<el-dialog title="生成 Cron" :visible.sync="showCron">-->
<!--<vcrontab @hide="showCron=false" @fill="onCrontabFill" :expression="expression"></vcrontab>-->
<!--<vcrontab @hide="showCron=false" @fill="onCrontabFill" :expression="expression"></vcrontab>-->
<!--</el-dialog>-->
<el-card style="border-radius: 0">
@@ -102,26 +97,53 @@
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
border>
<template v-for="col in columns">
<el-table-column :key="col.name"
<el-table-column v-if="col.name === 'status'"
:key="col.name"
:property="col.name"
:label="$t(col.label)"
:sortable="col.sortable"
:align="col.align"
:width="col.width">
<template slot-scope="scope">
{{$t(scope.row[col.name])}}
<el-tooltip v-if="scope.row[col.name] === 'error'" :content="$t(scope.row['message'])" placement="top">
<el-tag class="status-tag" type="danger">
{{scope.row[col.name] ? $t(scope.row[col.name]) : $t('NA')}}
</el-tag>
</el-tooltip>
<el-tag class="status-tag" v-else>
{{scope.row[col.name] ? $t(scope.row[col.name]) : $t('NA')}}
</el-tag>
</template>
</el-table-column>
<el-table-column v-else-if="col.name === 'run_type'" :key="col.name" :label="$t(col.label)">
<template slot-scope="scope">
<template v-if="scope.row.run_type === 'all-nodes'">{{$t('All Nodes')}}</template>
<template v-else-if="scope.row.run_type === 'selected-nodes'">{{$t('Selected Nodes')}}</template>
<template v-else-if="scope.row.run_type === 'random'">{{$t('Random')}}</template>
</template>
</el-table-column>
<el-table-column v-else :key="col.name"
:property="col.name"
:label="$t(col.label)"
:sortable="col.sortable"
:align="col.align"
:width="col.width">
<template slot-scope="scope">
{{scope.row[col.name]}}
</template>
</el-table-column>
</template>
<el-table-column :label="$t('Action')" align="left" width="150px" fixed="right">
<el-table-column :label="$t('Action')" align="left" width="180px" fixed="right">
<template slot-scope="scope">
<!-- 编辑 -->
<el-tooltip :content="$t('Edit')" placement="top">
<el-button type="warning" icon="el-icon-edit" size="mini" @click="onEdit(scope.row)"></el-button>
</el-tooltip>
<!-- 删除 -->
<el-tooltip :content="$t('Remove')" placement="top">
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
</el-tooltip>
<el-tooltip v-if="isShowRun(scope.row)" :content="$t('Run')" placement="top">
<el-tooltip v-if="false" :content="$t(getStatusTooltip(scope.row))" placement="top">
<el-button type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row)"></el-button>
</el-tooltip>
</template>
@@ -145,11 +167,13 @@ export default {
return {
columns: [
{ name: 'name', label: 'Name', width: '180' },
{ name: 'cron', label: 'schedules.cron', width: '120' },
{ name: 'cron', label: 'Cron', width: '120' },
{ name: 'run_type', label: 'Run Type', width: '150' },
{ name: 'node_name', label: 'Node', width: '150' },
{ name: 'spider_name', label: 'Spider', width: '150' },
{ name: 'param', label: 'Parameters', width: '150' },
{ name: 'description', label: 'Description', width: 'auto' }
{ name: 'description', label: 'Description', width: 'auto' },
{ name: 'status', label: 'Status', width: 'auto' }
],
isEdit: false,
dialogTitle: '',
@@ -187,8 +211,8 @@ export default {
onAdd () {
this.isEdit = false
this.dialogVisible = true
this.$store.commit('schedule/SET_SCHEDULE_FORM', {})
this.$st.sendEv('定时任务', '添加')
this.$store.commit('schedule/SET_SCHEDULE_FORM', { node_ids: [] })
this.$st.sendEv('定时任务', '添加定时任务')
},
onAddSubmit () {
this.$refs.scheduleForm.validate(res => {
@@ -214,45 +238,92 @@ export default {
}
}
})
this.$st.sendEv('定时任务', '提交')
this.$st.sendEv('定时任务', '提交定时任务')
},
isShowRun () {
isShowRun (row) {
},
onEdit (row) {
this.$store.commit('schedule/SET_SCHEDULE_FORM', row)
this.dialogVisible = true
this.isEdit = true
this.$st.sendEv('定时任务', '修改', 'id', row._id)
this.$st.sendEv('定时任务', '修改定时任务')
},
onRemove (row) {
this.$store.dispatch('schedule/removeSchedule', row._id)
.then(() => {
setTimeout(() => {
this.$store.dispatch('schedule/getScheduleList')
this.$message.success(`Schedule "${row.name}" has been removed`)
}, 100)
this.$confirm(this.$t('Are you sure to delete the schedule task?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(() => {
this.$store.dispatch('schedule/removeSchedule', row._id)
.then(() => {
setTimeout(() => {
this.$store.dispatch('schedule/getScheduleList')
this.$message.success(`Schedule "${row.name}" has been removed`)
}, 100)
})
}).catch(() => {
})
this.$st.sendEv('定时任务', '删除定时任务')
},
onCrawl (row) {
// 停止定时任务
if (!row.status || row.status === 'running') {
this.$confirm(this.$t('Are you sure to delete the schedule task?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(() => {
this.$store.dispatch('schedule/stopSchedule', row._id)
.then((resp) => {
if (resp.data.status === 'ok') {
this.$store.dispatch('schedule/getScheduleList')
return
}
this.$message({
type: 'error',
message: resp.data.error
})
})
}).catch(() => {
})
this.$st.sendEv('定时任务', '删除', 'id', row._id)
},
onCrawl () {
},
onCrontabFill (value) {
value = value.replace(/[?]/g, '*')
this.$set(this.scheduleForm, 'cron', value)
this.$st.sendEv('定时任务', '提交生成Cron', 'cron', this.scheduleForm.cron)
},
onShowCronDialog () {
this.showCron = true
if (this.expression.split(' ').length < 7) {
// this.expression = (this.scheduleForm.cron + ' ').replace(/[?]/g, '*')
this.expression = this.scheduleForm.cron + ' '
} else {
// this.expression = this.scheduleForm.cron.replace(/[?]/g, '*')
this.expression = this.scheduleForm.cron
}
this.$st.sendEv('定时任务', '点击生成Cron', 'cron', this.scheduleForm.cron)
// 运行定时任务
if (row.status === 'stop') {
this.$confirm(this.$t('Are you sure to delete the schedule task?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(() => {
this.$store.dispatch('schedule/runSchedule', row._id)
.then((resp) => {
if (resp.data.status === 'ok') {
this.$store.dispatch('schedule/getScheduleList')
return
}
this.$message({
type: 'error',
message: resp.data.error
})
})
}).catch(() => {
})
}
},
isDisabledSpider (spider) {
if (spider.type === 'customized') {
return !spider.cmd
} else {
return false
}
},
getStatusTooltip (row) {
if (row.status === 'stop') {
return 'Start'
} else if (row.status === 'running') {
return 'Stop'
} else if (row.status === 'error') {
return 'Start'
}
}
},
created () {
@@ -289,4 +360,8 @@ export default {
min-height: 360px;
margin-top: 10px;
}
.status-tag {
cursor: pointer;
}
</style>

View File

@@ -370,17 +370,17 @@ export default {
}
await this.$store.dispatch('spider/getSpiderList')
this.$router.push(`/spiders/${res2.data.data._id}`)
this.$st.sendEv('爬虫', '添加爬虫-可配置爬虫')
this.$st.sendEv('爬虫列表', '添加爬虫', '可配置爬虫')
})
},
onAddCustomized () {
this.addDialogVisible = false
this.addCustomizedDialogVisible = true
this.$st.sendEv('爬虫', '添加爬虫-自定义爬虫')
this.$st.sendEv('爬虫列表', '添加爬虫', '自定义爬虫')
},
onRefresh () {
this.getList()
this.$st.sendEv('爬虫', '刷新')
this.$st.sendEv('爬虫列表', '刷新')
},
onSubmit () {
const vm = this
@@ -434,19 +434,19 @@ export default {
message: 'Deleted successfully'
})
})
this.$st.sendEv('爬虫', '删除')
this.$st.sendEv('爬虫列表', '删除爬虫')
})
},
onCrawl (row, ev) {
ev.stopPropagation()
this.crawlConfirmDialogVisible = true
this.activeSpiderId = row._id
this.$st.sendEv('爬虫', '点击运行')
this.$st.sendEv('爬虫列表', '点击运行')
},
onView (row, ev) {
ev.stopPropagation()
this.$router.push('/spiders/' + row._id)
this.$st.sendEv('爬虫', '查看')
this.$st.sendEv('爬虫列表', '查看爬虫')
},
onImport () {
this.$refs.importForm.validate(valid => {
@@ -467,7 +467,7 @@ export default {
})
}
})
this.$st.sendEv('爬虫', '导入爬虫')
this.$st.sendEv('爬虫列表', '导入爬虫')
},
openImportDialog () {
this.dialogVisible = true
@@ -495,10 +495,6 @@ export default {
callback(data)
})
},
onSiteSelect (item) {
this.$store.commit('spider/SET_FILTER_SITE', item._id)
this.$st.sendEv('爬虫', '搜索网站')
},
onAddConfigurableSiteSelect (item) {
this.spiderForm.site = item._id
},

View File

@@ -97,7 +97,7 @@ export default {
},
downloadCSV () {
this.$store.dispatch('task/getTaskResultExcel', this.$route.params.id)
this.$st.sendEv('任务详情-结果', '下载CSV')
this.$st.sendEv('任务详情', '结果', '下载CSV')
},
getTaskLog () {
if (this.$route.params.id) {

View File

@@ -4,31 +4,12 @@
<!--filter-->
<div class="filter">
<div class="left">
<!--<el-select size="small" class="filter-select"-->
<!--v-model="filter.node_id"-->
<!--:placeholder="$t('Node')"-->
<!--filterable-->
<!--clearable-->
<!--@change="onSelectNode">-->
<!--<el-option v-for="op in nodeList" :key="op._id" :value="op._id" :label="op.name"></el-option>-->
<!--</el-select>-->
<!--<el-select size="small" class="filter-select"-->
<!--v-model="filter.spider_id"-->
<!--:placeholder="$t('Spider')"-->
<!--filterable-->
<!--clearable-->
<!--@change="onSelectSpider">-->
<!--<el-option v-for="op in spiderList" :key="op._id" :value="op._id" :label="op.name"></el-option>-->
<!--</el-select>-->
<!--<el-button size="small" type="success"-->
<!--icon="el-icon-search"-->
<!--class="refresh"-->
<!--@click="onRefresh">-->
<!--{{$t('Search')}}-->
<!--</el-button>-->
</div>
<!--<div class="right">-->
<!--</div>-->
<div class="right">
<el-button @click="onRemoveMultipleTask" size="small" type="danger">
删除任务
</el-button>
</div>
</div>
<!--./filter-->
@@ -38,7 +19,9 @@
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
border
@row-click="onRowClick"
@selection-change="onSelectionChange">
>
<el-table-column type="selection" width="55"/>
<template v-for="col in columns">
<el-table-column v-if="col.name === 'spider_name'"
:key="col.name"
@@ -181,7 +164,9 @@ export default {
{ name: 'total_duration', label: 'Total Duration (sec)', width: '80', align: 'right' },
{ name: 'result_count', label: 'Results Count', width: '80' }
// { name: 'avg_num_results', label: 'Average Results Count per Second', width: '80' }
]
],
multipleSelection: []
}
},
computed: {
@@ -228,12 +213,6 @@ export default {
}
return false
})
// .filter((d, index) => {
// // pagination
// const pageNum = this.pageNum
// const pageSize = this.pageSize
// return (pageSize * (pageNum - 1) <= index) && (index < pageSize * pageNum)
// })
}
},
methods: {
@@ -242,13 +221,37 @@ export default {
},
onRefresh () {
this.$store.dispatch('task/getTaskList')
this.$st.sendEv('任务', '搜索')
this.$st.sendEv('任务列表', '搜索')
},
onSelectNode () {
this.$st.sendEv('任务', '选择节点')
},
onSelectSpider () {
this.$st.sendEv('任务', '选择爬虫')
onRemoveMultipleTask () {
if (this.multipleSelection.length === 0) {
this.$message({
type: 'error',
message: '选择要删除的任务'
})
return
}
this.$confirm('确定删除任务', '提示', {
confirmButtonText: '确定',
cancelButtonText: '取消',
type: 'warning'
}).then(() => {
let ids = this.multipleSelection.map(item => item._id)
this.$store.dispatch('task/deleteTaskMultiple', ids).then((resp) => {
if (resp.data.status === 'ok') {
this.$message({
type: 'success',
message: '删除任务成功'
})
this.$store.dispatch('task/getTaskList')
return
}
this.$message({
type: 'error',
message: resp.data.error
})
})
}).catch(() => {})
},
onRemove (row, ev) {
ev.stopPropagation()
@@ -264,20 +267,20 @@ export default {
message: 'Deleted successfully'
})
})
this.$st.sendEv('任务', '删除', 'id', row._id)
this.$st.sendEv('任务列表', '删除任务')
})
},
onView (row) {
this.$router.push(`/tasks/${row._id}`)
this.$st.sendEv('任务', '搜索', 'id', row._id)
this.$st.sendEv('任务列表', '查看任务')
},
onClickSpider (row) {
this.$router.push(`/spiders/${row.spider_id}`)
this.$st.sendEv('任务', '点击爬虫详情', 'id', row.spider_id)
this.$st.sendEv('任务列表', '点击爬虫详情')
},
onClickNode (row) {
this.$router.push(`/nodes/${row.node_id}`)
this.$st.sendEv('任务', '点击节点详情', 'id', row.node_id)
this.$st.sendEv('任务列表', '点击节点详情')
},
onPageChange () {
setTimeout(() => {
@@ -304,6 +307,9 @@ export default {
if (column.label !== this.$t('Action')) {
this.onView(row)
}
},
onSelectionChange (val) {
this.multipleSelection = val
}
},
created () {
@@ -312,10 +318,9 @@ export default {
this.$store.dispatch('node/getNodeList')
},
mounted () {
// request task list every 5 seconds
this.handle = setInterval(() => {
this.$store.dispatch('task/getTaskList')
}, 5000)
// this.handle = setInterval(() => {
// this.$store.dispatch('task/getTaskList')
// }, 5000)
},
destroyed () {
clearInterval(this.handle)

View File

@@ -161,7 +161,7 @@ export default {
message: this.$t('Deleted successfully')
})
})
this.$st.sendEv('用户', '删除', 'id', row._id)
this.$st.sendEv('用户列表', '删除用户')
})
// this.$store.commit('user/SET_USER_FORM', row)
},
@@ -178,7 +178,7 @@ export default {
})
}
})
this.$st.sendEv('用户', '编辑')
this.$st.sendEv('用户列表', '编辑用户')
}
},
created () {