mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-29 18:00:51 +01:00
added spider stats
This commit is contained in:
@@ -95,6 +95,7 @@ func main() {
|
||||
app.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取
|
||||
app.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入
|
||||
app.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录
|
||||
app.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
|
||||
// 任务
|
||||
app.GET("/tasks", routes.GetTaskList) // 任务列表
|
||||
app.GET("/tasks/:id", routes.GetTask) // 任务详情
|
||||
@@ -110,6 +111,8 @@ func main() {
|
||||
app.PUT("/schedules", routes.PutSchedule) // 创建定时任务
|
||||
app.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务
|
||||
app.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务
|
||||
// 统计数据
|
||||
app.GET("/stats/home", routes.GetHomeStats) // 首页统计数据
|
||||
}
|
||||
|
||||
// 路由ping
|
||||
|
||||
@@ -137,3 +137,15 @@ func GetNodeTaskList(id bson.ObjectId) ([]Task, error) {
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
|
||||
func GetNodeCount(query interface{}) (int, error) {
|
||||
s, c := database.GetCol("nodes")
|
||||
defer s.Close()
|
||||
|
||||
count, err := c.Find(query).Count()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
@@ -129,3 +129,15 @@ func RemoveSchedule(id bson.ObjectId) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetScheduleCount() (int, error) {
|
||||
s, c := database.GetCol("schedules")
|
||||
defer s.Close()
|
||||
|
||||
count, err := c.Count()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
@@ -92,6 +92,8 @@ func (spider *Spider) GetLastTask() (Task, error) {
|
||||
return tasks[0], nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) {
|
||||
s, c := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
@@ -165,3 +167,15 @@ func RemoveSpider(id bson.ObjectId) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetSpiderCount() (int, error) {
|
||||
s, c := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
|
||||
count, err := c.Count()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo"
|
||||
@@ -10,25 +11,34 @@ import (
|
||||
)
|
||||
|
||||
type Task struct {
|
||||
Id string `json:"_id" bson:"_id"`
|
||||
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
|
||||
StartTs time.Time `json:"start_ts" bson:"start_ts"`
|
||||
FinishTs time.Time `json:"finish_ts" bson:"finish_ts"`
|
||||
Status string `json:"status" bson:"status"`
|
||||
NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
|
||||
LogPath string `json:"log_path" bson:"log_path"`
|
||||
Cmd string `json:"cmd" bson:"cmd"`
|
||||
Error string `json:"error" bson:"error"`
|
||||
Id string `json:"_id" bson:"_id"`
|
||||
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
|
||||
StartTs time.Time `json:"start_ts" bson:"start_ts"`
|
||||
FinishTs time.Time `json:"finish_ts" bson:"finish_ts"`
|
||||
Status string `json:"status" bson:"status"`
|
||||
NodeId bson.ObjectId `json:"node_id" bson:"node_id"`
|
||||
LogPath string `json:"log_path" bson:"log_path"`
|
||||
Cmd string `json:"cmd" bson:"cmd"`
|
||||
Error string `json:"error" bson:"error"`
|
||||
ResultCount int `json:"result_count" bson:"result_count"`
|
||||
WaitDuration float64 `json:"wait_duration" bson:"wait_duration"`
|
||||
RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"`
|
||||
TotalDuration float64 `json:"total_duration" bson:"total_duration"`
|
||||
|
||||
// 前端数据
|
||||
SpiderName string `json:"spider_name"`
|
||||
NodeName string `json:"node_name"`
|
||||
NumResults int `json:"num_results"`
|
||||
|
||||
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
|
||||
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
|
||||
}
|
||||
|
||||
type TaskDailyItem struct {
|
||||
Date string `json:"date" bson:"_id"`
|
||||
TaskCount int `json:"task_count" bson:"task_count"`
|
||||
AvgRuntimeDuration float64 `json:"avg_runtime_duration" bson:"avg_runtime_duration"`
|
||||
}
|
||||
|
||||
func (t *Task) GetSpider() (Spider, error) {
|
||||
spider, err := GetSpider(t.SpiderId)
|
||||
if err != nil {
|
||||
@@ -123,17 +133,6 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas
|
||||
} else {
|
||||
tasks[i].NodeName = node.Name
|
||||
}
|
||||
|
||||
// 获取结果数
|
||||
if spider.Col == "" {
|
||||
continue
|
||||
}
|
||||
s, c := database.GetCol(spider.Col)
|
||||
tasks[i].NumResults, err = c.Find(bson.M{"task_id": task.Id}).Count()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
s.Close()
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
@@ -190,3 +189,141 @@ func RemoveTask(id string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetTaskCount(query interface{}) (int, error) {
|
||||
s, c := database.GetCol("tasks")
|
||||
defer s.Close()
|
||||
|
||||
count, err := c.Find(query).Count()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) {
|
||||
s, c := database.GetCol("tasks")
|
||||
defer s.Close()
|
||||
|
||||
// 起始日期
|
||||
startDate := time.Now().Add(- 30 * 24 * time.Hour)
|
||||
endDate := time.Now()
|
||||
|
||||
// query
|
||||
query["create_ts"] = bson.M{
|
||||
"$gte": startDate,
|
||||
"$lt": endDate,
|
||||
}
|
||||
|
||||
// match
|
||||
op1 := bson.M{
|
||||
"$match": query,
|
||||
}
|
||||
|
||||
// project
|
||||
op2 := bson.M{
|
||||
"$project": bson.M{
|
||||
"date": bson.M{
|
||||
"$dateToString": bson.M{
|
||||
"format": "%Y%m%d",
|
||||
"date": "$create_ts",
|
||||
"timezone": "Asia/Shanghai",
|
||||
},
|
||||
},
|
||||
"success_count": bson.M{
|
||||
"$cond": []interface{}{
|
||||
bson.M{
|
||||
"$eq": []string{
|
||||
"$status",
|
||||
constants.StatusFinished,
|
||||
},
|
||||
},
|
||||
1,
|
||||
0,
|
||||
},
|
||||
},
|
||||
"runtime_duration": "$runtime_duration",
|
||||
},
|
||||
}
|
||||
|
||||
// group
|
||||
op3 := bson.M{
|
||||
"$group": bson.M{
|
||||
"_id": "$date",
|
||||
"task_count": bson.M{"$sum": 1},
|
||||
"runtime_duration": bson.M{"$sum": "$runtime_duration"},
|
||||
},
|
||||
}
|
||||
|
||||
op4 := bson.M{
|
||||
"$project": bson.M{
|
||||
"task_count": "$task_count",
|
||||
"date": "$date",
|
||||
"avg_runtime_duration": bson.M{
|
||||
"$divide": []string{"$runtime_duration", "$task_count"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// run aggregation
|
||||
var items []TaskDailyItem
|
||||
if err := c.Pipe([]bson.M{op1, op2, op3, op4}).All(&items); err != nil {
|
||||
return items, err
|
||||
}
|
||||
|
||||
// 缓存每日数据
|
||||
dict := make(map[string]TaskDailyItem)
|
||||
for _, item := range items {
|
||||
dict[item.Date] = item
|
||||
}
|
||||
|
||||
// 遍历日期
|
||||
var dailyItems []TaskDailyItem
|
||||
for date := startDate; endDate.Sub(date) > 0; date = date.Add(24 * time.Hour) {
|
||||
dateStr := date.Format("20060102")
|
||||
dailyItems = append(dailyItems, TaskDailyItem{
|
||||
Date: dateStr,
|
||||
TaskCount: dict[dateStr].TaskCount,
|
||||
AvgRuntimeDuration: dict[dateStr].AvgRuntimeDuration,
|
||||
})
|
||||
}
|
||||
|
||||
return dailyItems, nil
|
||||
}
|
||||
|
||||
func UpdateTaskResultCount(id string) (err error) {
|
||||
// 获取任务
|
||||
task, err := GetTask(id)
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// 获取爬虫
|
||||
spider, err := GetSpider(task.SpiderId)
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
|
||||
// 获取结果数量
|
||||
s, c := database.GetCol(spider.Col)
|
||||
defer s.Close()
|
||||
resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count()
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
|
||||
// 保存结果数量
|
||||
task.ResultCount = resultCount
|
||||
if err := task.Save(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
72
backend/routes/stats.go
Normal file
72
backend/routes/stats.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/model"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func GetHomeStats(c *gin.Context) {
|
||||
type DataOverview struct {
|
||||
TaskCount int `json:"task_count"`
|
||||
SpiderCount int `json:"spider_count"`
|
||||
ActiveNodeCount int `json:"active_node_count"`
|
||||
ScheduleCount int `json:"schedule_count"`
|
||||
}
|
||||
|
||||
type Data struct {
|
||||
Overview DataOverview `json:"overview"`
|
||||
Daily []model.TaskDailyItem `json:"daily"`
|
||||
}
|
||||
|
||||
// 任务总数
|
||||
taskCount, err := model.GetTaskCount(nil)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 在线节点总数
|
||||
activeNodeCount, err := model.GetNodeCount(bson.M{"status": constants.StatusOnline})
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 爬虫总数
|
||||
spiderCount, err := model.GetSpiderCount()
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 定时任务数
|
||||
scheduleCount, err := model.GetScheduleCount()
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 每日任务数
|
||||
items, err := model.GetDailyTaskStats(bson.M{})
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: Data{
|
||||
Overview: DataOverview{
|
||||
ActiveNodeCount: activeNodeCount,
|
||||
TaskCount: taskCount,
|
||||
SpiderCount: spiderCount,
|
||||
ScheduleCount: scheduleCount,
|
||||
},
|
||||
Daily: items,
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -104,6 +104,27 @@ func GetCurrentNode() (model.Node, error) {
|
||||
|
||||
// 如果获取失败
|
||||
if err != nil {
|
||||
// 如果为主节点,表示为第一次注册,插入节点信息
|
||||
if IsMaster() {
|
||||
// 获取本机IP地址
|
||||
ip, err := GetIp()
|
||||
if err != nil {
|
||||
debug.PrintStack()
|
||||
return model.Node{}, err
|
||||
}
|
||||
// 生成节点
|
||||
node = model.Node{
|
||||
Id: bson.NewObjectId(),
|
||||
Ip: ip,
|
||||
Name: mac,
|
||||
Mac: mac,
|
||||
IsMaster: true,
|
||||
}
|
||||
if err := node.Add(); err != nil {
|
||||
return node, err
|
||||
}
|
||||
return node, nil
|
||||
}
|
||||
// 增加错误次数
|
||||
errNum++
|
||||
|
||||
|
||||
@@ -142,9 +142,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
|
||||
return
|
||||
}
|
||||
t.Status = constants.StatusCancelled
|
||||
} else if signal == constants.TaskFinish {
|
||||
// 完成进程
|
||||
t.Status = constants.StatusFinished
|
||||
}
|
||||
|
||||
// 保存任务
|
||||
@@ -205,6 +202,17 @@ func GetWorkerPrefix(id int) string {
|
||||
return "[Worker " + strconv.Itoa(id) + "] "
|
||||
}
|
||||
|
||||
// 统计任务结果数
|
||||
func SaveTaskResultCount(id string) func() {
|
||||
return func() {
|
||||
if err := model.UpdateTaskResultCount(id); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 执行任务
|
||||
func ExecuteTask(id int) {
|
||||
if LockList[id] {
|
||||
@@ -315,9 +323,10 @@ func ExecuteTask(id int) {
|
||||
}
|
||||
|
||||
// 任务赋值
|
||||
t.NodeId = node.Id // 任务节点信息
|
||||
t.StartTs = time.Now() // 任务开始时间
|
||||
t.Status = constants.StatusRunning // 任务状态
|
||||
t.NodeId = node.Id // 任务节点信息
|
||||
t.StartTs = time.Now() // 任务开始时间
|
||||
t.Status = constants.StatusRunning // 任务状态
|
||||
t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长
|
||||
|
||||
// 开始执行任务
|
||||
log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")")
|
||||
@@ -329,12 +338,45 @@ func ExecuteTask(id int) {
|
||||
return
|
||||
}
|
||||
|
||||
// 起一个cron执行器来统计任务结果数
|
||||
cronExec := cron.New(cron.WithSeconds())
|
||||
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
|
||||
if err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
return
|
||||
}
|
||||
cronExec.Start()
|
||||
defer cronExec.Stop()
|
||||
|
||||
// 执行Shell命令
|
||||
if err := ExecuteShellCmd(cmd, cwd, t, spider); err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// 更新任务结果数
|
||||
if err := model.UpdateTaskResultCount(t.Id); err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// 完成进程
|
||||
t, err = model.GetTask(t.Id)
|
||||
if err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
return
|
||||
}
|
||||
t.Status = constants.StatusFinished // 任务状态: 已完成
|
||||
t.FinishTs = time.Now() // 结束时间
|
||||
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
|
||||
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
|
||||
|
||||
// 保存任务
|
||||
if err := t.Save(); err != nil {
|
||||
log.Errorf(GetWorkerPrefix(id) + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// 结束计时
|
||||
toc := time.Now()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user