From 946bb7ceba61a810dd088805997db48aa6519dff Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 26 Jul 2019 17:07:24 +0800 Subject: [PATCH] added spider stats --- backend/main.go | 3 + backend/model/node.go | 12 + backend/model/schedule.go | 12 + backend/model/spider.go | 14 + backend/model/task.go | 179 ++++++++++-- backend/routes/stats.go | 72 +++++ backend/services/node.go | 21 ++ backend/services/task.go | 54 +++- .../Environment/EnvironmentList.vue | 4 + frontend/src/components/File/FileList.vue | 14 - .../src/components/InfoView/TaskInfoView.vue | 50 +++- frontend/src/components/Node/NodeNetwork.vue | 17 +- frontend/src/components/Stats/MetricCard.vue | 1 - frontend/src/components/Stats/SpiderStats.vue | 82 +----- .../components/TableView/TaskTableView.vue | 27 +- frontend/src/i18n/zh.js | 6 +- frontend/src/store/modules/spider.js | 10 +- frontend/src/views/home/Home.vue | 24 +- frontend/src/views/schedule/ScheduleList.vue | 81 +++--- frontend/src/views/spider/SpiderDetail.vue | 19 +- frontend/src/views/spider/SpiderList.vue | 238 +++++++-------- frontend/src/views/task/TaskList.vue | 274 +++++++++--------- 22 files changed, 763 insertions(+), 451 deletions(-) create mode 100644 backend/routes/stats.go diff --git a/backend/main.go b/backend/main.go index e4669ce1..3d83a251 100644 --- a/backend/main.go +++ b/backend/main.go @@ -95,6 +95,7 @@ func main() { app.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 app.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入 app.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 + app.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 // 任务 app.GET("/tasks", routes.GetTaskList) // 任务列表 app.GET("/tasks/:id", routes.GetTask) // 任务详情 @@ -110,6 +111,8 @@ func main() { app.PUT("/schedules", routes.PutSchedule) // 创建定时任务 app.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务 app.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务 + // 统计数据 + app.GET("/stats/home", routes.GetHomeStats) // 首页统计数据 } // 路由ping diff --git a/backend/model/node.go b/backend/model/node.go index 4c51e41d..7bdffb1c 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -137,3 +137,15 @@ func GetNodeTaskList(id bson.ObjectId) ([]Task, error) { } return tasks, nil } + +func GetNodeCount(query interface{}) (int, error) { + s, c := database.GetCol("nodes") + defer s.Close() + + count, err := c.Find(query).Count() + if err != nil { + return 0, err + } + + return count, nil +} diff --git a/backend/model/schedule.go b/backend/model/schedule.go index eee32dcf..9f77c452 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -129,3 +129,15 @@ func RemoveSchedule(id bson.ObjectId) error { return nil } + +func GetScheduleCount() (int, error) { + s, c := database.GetCol("schedules") + defer s.Close() + + count, err := c.Count() + if err != nil { + return 0, err + } + + return count, nil +} diff --git a/backend/model/spider.go b/backend/model/spider.go index 037e6b06..406c4bdc 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -92,6 +92,8 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } + + func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -165,3 +167,15 @@ func RemoveSpider(id bson.ObjectId) error { return nil } + +func GetSpiderCount() (int, error) { + s, c := database.GetCol("spiders") + defer s.Close() + + count, err := c.Count() + if err != nil { + return 0, err + } + + return count, nil +} diff --git a/backend/model/task.go b/backend/model/task.go index e2384ed5..8ae782b5 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -1,6 +1,7 @@ package model import ( + "crawlab/constants" "crawlab/database" "github.com/apex/log" "github.com/globalsign/mgo" @@ -10,25 +11,34 @@ import ( ) type Task struct { - Id string `json:"_id" bson:"_id"` - SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` - StartTs time.Time `json:"start_ts" bson:"start_ts"` - FinishTs time.Time `json:"finish_ts" bson:"finish_ts"` - Status string `json:"status" bson:"status"` - NodeId bson.ObjectId `json:"node_id" bson:"node_id"` - LogPath string `json:"log_path" bson:"log_path"` - Cmd string `json:"cmd" bson:"cmd"` - Error string `json:"error" bson:"error"` + Id string `json:"_id" bson:"_id"` + SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` + StartTs time.Time `json:"start_ts" bson:"start_ts"` + FinishTs time.Time `json:"finish_ts" bson:"finish_ts"` + Status string `json:"status" bson:"status"` + NodeId bson.ObjectId `json:"node_id" bson:"node_id"` + LogPath string `json:"log_path" bson:"log_path"` + Cmd string `json:"cmd" bson:"cmd"` + Error string `json:"error" bson:"error"` + ResultCount int `json:"result_count" bson:"result_count"` + WaitDuration float64 `json:"wait_duration" bson:"wait_duration"` + RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` + TotalDuration float64 `json:"total_duration" bson:"total_duration"` // 前端数据 SpiderName string `json:"spider_name"` NodeName string `json:"node_name"` - NumResults int `json:"num_results"` CreateTs time.Time `json:"create_ts" bson:"create_ts"` UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } +type TaskDailyItem struct { + Date string `json:"date" bson:"_id"` + TaskCount int `json:"task_count" bson:"task_count"` + AvgRuntimeDuration float64 `json:"avg_runtime_duration" bson:"avg_runtime_duration"` +} + func (t *Task) GetSpider() (Spider, error) { spider, err := GetSpider(t.SpiderId) if err != nil { @@ -123,17 +133,6 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas } else { tasks[i].NodeName = node.Name } - - // 获取结果数 - if spider.Col == "" { - continue - } - s, c := database.GetCol(spider.Col) - tasks[i].NumResults, err = c.Find(bson.M{"task_id": task.Id}).Count() - if err != nil { - continue - } - s.Close() } return tasks, nil } @@ -190,3 +189,141 @@ func RemoveTask(id string) error { return nil } + +func GetTaskCount(query interface{}) (int, error) { + s, c := database.GetCol("tasks") + defer s.Close() + + count, err := c.Find(query).Count() + if err != nil { + return 0, err + } + + return count, nil +} + +func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) { + s, c := database.GetCol("tasks") + defer s.Close() + + // 起始日期 + startDate := time.Now().Add(- 30 * 24 * time.Hour) + endDate := time.Now() + + // query + query["create_ts"] = bson.M{ + "$gte": startDate, + "$lt": endDate, + } + + // match + op1 := bson.M{ + "$match": query, + } + + // project + op2 := bson.M{ + "$project": bson.M{ + "date": bson.M{ + "$dateToString": bson.M{ + "format": "%Y%m%d", + "date": "$create_ts", + "timezone": "Asia/Shanghai", + }, + }, + "success_count": bson.M{ + "$cond": []interface{}{ + bson.M{ + "$eq": []string{ + "$status", + constants.StatusFinished, + }, + }, + 1, + 0, + }, + }, + "runtime_duration": "$runtime_duration", + }, + } + + // group + op3 := bson.M{ + "$group": bson.M{ + "_id": "$date", + "task_count": bson.M{"$sum": 1}, + "runtime_duration": bson.M{"$sum": "$runtime_duration"}, + }, + } + + op4 := bson.M{ + "$project": bson.M{ + "task_count": "$task_count", + "date": "$date", + "avg_runtime_duration": bson.M{ + "$divide": []string{"$runtime_duration", "$task_count"}, + }, + }, + } + + // run aggregation + var items []TaskDailyItem + if err := c.Pipe([]bson.M{op1, op2, op3, op4}).All(&items); err != nil { + return items, err + } + + // 缓存每日数据 + dict := make(map[string]TaskDailyItem) + for _, item := range items { + dict[item.Date] = item + } + + // 遍历日期 + var dailyItems []TaskDailyItem + for date := startDate; endDate.Sub(date) > 0; date = date.Add(24 * time.Hour) { + dateStr := date.Format("20060102") + dailyItems = append(dailyItems, TaskDailyItem{ + Date: dateStr, + TaskCount: dict[dateStr].TaskCount, + AvgRuntimeDuration: dict[dateStr].AvgRuntimeDuration, + }) + } + + return dailyItems, nil +} + +func UpdateTaskResultCount(id string) (err error) { + // 获取任务 + task, err := GetTask(id) + if err != nil { + log.Errorf(err.Error()) + return err + } + + // 获取爬虫 + spider, err := GetSpider(task.SpiderId) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + // 获取结果数量 + s, c := database.GetCol(spider.Col) + defer s.Close() + resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count() + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + // 保存结果数量 + task.ResultCount = resultCount + if err := task.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + return nil +} diff --git a/backend/routes/stats.go b/backend/routes/stats.go new file mode 100644 index 00000000..8590bbd7 --- /dev/null +++ b/backend/routes/stats.go @@ -0,0 +1,72 @@ +package routes + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/gin-gonic/gin" + "github.com/globalsign/mgo/bson" + "net/http" +) + +func GetHomeStats(c *gin.Context) { + type DataOverview struct { + TaskCount int `json:"task_count"` + SpiderCount int `json:"spider_count"` + ActiveNodeCount int `json:"active_node_count"` + ScheduleCount int `json:"schedule_count"` + } + + type Data struct { + Overview DataOverview `json:"overview"` + Daily []model.TaskDailyItem `json:"daily"` + } + + // 任务总数 + taskCount, err := model.GetTaskCount(nil) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 在线节点总数 + activeNodeCount, err := model.GetNodeCount(bson.M{"status": constants.StatusOnline}) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 爬虫总数 + spiderCount, err := model.GetSpiderCount() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 定时任务数 + scheduleCount, err := model.GetScheduleCount() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 每日任务数 + items, err := model.GetDailyTaskStats(bson.M{}) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: Data{ + Overview: DataOverview{ + ActiveNodeCount: activeNodeCount, + TaskCount: taskCount, + SpiderCount: spiderCount, + ScheduleCount: scheduleCount, + }, + Daily: items, + }, + }) +} diff --git a/backend/services/node.go b/backend/services/node.go index 1bf5ed3d..0c2c9527 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -104,6 +104,27 @@ func GetCurrentNode() (model.Node, error) { // 如果获取失败 if err != nil { + // 如果为主节点,表示为第一次注册,插入节点信息 + if IsMaster() { + // 获取本机IP地址 + ip, err := GetIp() + if err != nil { + debug.PrintStack() + return model.Node{}, err + } + // 生成节点 + node = model.Node{ + Id: bson.NewObjectId(), + Ip: ip, + Name: mac, + Mac: mac, + IsMaster: true, + } + if err := node.Add(); err != nil { + return node, err + } + return node, nil + } // 增加错误次数 errNum++ diff --git a/backend/services/task.go b/backend/services/task.go index 306faf19..51b596a7 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -142,9 +142,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e return } t.Status = constants.StatusCancelled - } else if signal == constants.TaskFinish { - // 完成进程 - t.Status = constants.StatusFinished } // 保存任务 @@ -205,6 +202,17 @@ func GetWorkerPrefix(id int) string { return "[Worker " + strconv.Itoa(id) + "] " } +// 统计任务结果数 +func SaveTaskResultCount(id string) func() { + return func() { + if err := model.UpdateTaskResultCount(id); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + } +} + // 执行任务 func ExecuteTask(id int) { if LockList[id] { @@ -315,9 +323,10 @@ func ExecuteTask(id int) { } // 任务赋值 - t.NodeId = node.Id // 任务节点信息 - t.StartTs = time.Now() // 任务开始时间 - t.Status = constants.StatusRunning // 任务状态 + t.NodeId = node.Id // 任务节点信息 + t.StartTs = time.Now() // 任务开始时间 + t.Status = constants.StatusRunning // 任务状态 + t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长 // 开始执行任务 log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")") @@ -329,12 +338,45 @@ func ExecuteTask(id int) { return } + // 起一个cron执行器来统计任务结果数 + cronExec := cron.New(cron.WithSeconds()) + _, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id)) + if err != nil { + log.Errorf(GetWorkerPrefix(id) + err.Error()) + return + } + cronExec.Start() + defer cronExec.Stop() + // 执行Shell命令 if err := ExecuteShellCmd(cmd, cwd, t, spider); err != nil { log.Errorf(GetWorkerPrefix(id) + err.Error()) return } + // 更新任务结果数 + if err := model.UpdateTaskResultCount(t.Id); err != nil { + log.Errorf(GetWorkerPrefix(id) + err.Error()) + return + } + + // 完成进程 + t, err = model.GetTask(t.Id) + if err != nil { + log.Errorf(GetWorkerPrefix(id) + err.Error()) + return + } + t.Status = constants.StatusFinished // 任务状态: 已完成 + t.FinishTs = time.Now() // 结束时间 + t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长 + t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长 + + // 保存任务 + if err := t.Save(); err != nil { + log.Errorf(GetWorkerPrefix(id) + err.Error()) + return + } + // 结束计时 toc := time.Now() diff --git a/frontend/src/components/Environment/EnvironmentList.vue b/frontend/src/components/Environment/EnvironmentList.vue index 6858aba4..dbc2c9bb 100644 --- a/frontend/src/components/Environment/EnvironmentList.vue +++ b/frontend/src/components/Environment/EnvironmentList.vue @@ -74,4 +74,8 @@ export default { width: 100%; text-align: right; } + + .el-table { + min-height: 360px; + } diff --git a/frontend/src/components/File/FileList.vue b/frontend/src/components/File/FileList.vue index 88f48b46..4d263a7a 100644 --- a/frontend/src/components/File/FileList.vue +++ b/frontend/src/components/File/FileList.vue @@ -87,23 +87,9 @@ export default { } }, methods: { - getIcon (type) { - if (type === 1) { - return 'fa-file-o' - } else if (type === 2) { - return 'fa-folder' - } - }, onEdit () { this.isEdit = true }, - onChange (path) { - this.$store.commit('file/SET_CURRENT_PATH', path) - }, - onChangeSubmit () { - this.isEdit = false - this.$store.dispatch('file/getFileList', { path: this.currentPath }) - }, onItemClick (item) { if (item.is_dir) { // 目录 diff --git a/frontend/src/components/InfoView/TaskInfoView.vue b/frontend/src/components/InfoView/TaskInfoView.vue index 5eaefb3b..bfe6419a 100644 --- a/frontend/src/components/InfoView/TaskInfoView.vue +++ b/frontend/src/components/InfoView/TaskInfoView.vue @@ -13,24 +13,33 @@ - + - - + + - - + + - - + + + + + + + + + + + - - - - - + + + + +
{{ taskForm.error }} @@ -50,6 +59,7 @@ import { mapState } from 'vuex' import StatusTag from '../Status/StatusTag' +import dayjs from 'dayjs' export default { name: 'NodeInfoView', @@ -70,6 +80,22 @@ export default { .then(() => { this.$message.success(`Task "${this.$route.params.id}" has been sent signal to stop`) }) + }, + getTime (str) { + if (!str || str.match('^0001')) return 'NA' + return dayjs(str).format('YYYY-MM-DD HH:mm:ss') + }, + getWaitDuration (row) { + if (row.start_ts.match('^0001')) return 'NA' + return dayjs(row.start_ts).diff(row.create_ts, 'second') + }, + getRuntimeDuration (row) { + if (row.finish_ts.match('^0001')) return 'NA' + return dayjs(row.finish_ts).diff(row.start_ts, 'second') + }, + getTotalDuration (row) { + if (row.finish_ts.match('^0001')) return 'NA' + return dayjs(row.finish_ts).diff(row.create_ts, 'second') } } } diff --git a/frontend/src/components/Node/NodeNetwork.vue b/frontend/src/components/Node/NodeNetwork.vue index e185be09..3932216b 100644 --- a/frontend/src/components/Node/NodeNetwork.vue +++ b/frontend/src/components/Node/NodeNetwork.vue @@ -100,14 +100,14 @@ export default { if (this.masterNode.id === this.nodes[i].id) continue // master - links.push({ - source: this.masterNode.id, - target: this.nodes[i].id, - value: 0.5, - lineStyle: { - color: '#409EFF' - } - }) + // links.push({ + // source: this.masterNode.id, + // target: this.nodes[i].id, + // value: 0.5, + // lineStyle: { + // color: '#409EFF' + // } + // }) } return links } @@ -120,6 +120,7 @@ export default { }, tooltip: { formatter: params => { + if (!params.data.name) return let str = '' if (params.data.name) str += '' + params.data.name + '
' if (params.data.ip) str += 'IP: ' + params.data.ip + '
' diff --git a/frontend/src/components/Stats/MetricCard.vue b/frontend/src/components/Stats/MetricCard.vue index 14658dbd..1858553d 100644 --- a/frontend/src/components/Stats/MetricCard.vue +++ b/frontend/src/components/Stats/MetricCard.vue @@ -27,7 +27,6 @@ export default { default: '' }, value: { - type: String, default: '' }, type: { diff --git a/frontend/src/components/Stats/SpiderStats.vue b/frontend/src/components/Stats/SpiderStats.vue index 516c6b0e..f239f823 100644 --- a/frontend/src/components/Stats/SpiderStats.vue +++ b/frontend/src/components/Stats/SpiderStats.vue @@ -17,20 +17,14 @@ type="success"/>
- - -

{{$t('Tasks by Status')}}

-
-
-
- +

{{$t('Daily Tasks')}}

@@ -39,13 +33,7 @@
- - -

{{$t('Tasks by Node')}}

-
-
-
- +

{{$t('Daily Avg Duration (sec)')}}

@@ -71,64 +59,6 @@ export default { } }, methods: { - renderTaskPieStatus () { - const chart = echarts.init(this.$el.querySelector('#task-pie-status')) - const option = { - tooltip: { - show: true - }, - series: [{ - name: '', - type: 'pie', - // radius: ['50%', '70%'], - data: this.statusStats.map(d => { - let color - if (d.name === 'SUCCESS') { - color = '#67c23a' - } else if (d.name === 'STARTED') { - color = '#e6a23c' - } else if (d.name === 'FAILURE') { - color = '#f56c6c' - } else { - color = 'grey' - } - return { - name: this.$t(d.name), - value: d.value, - itemStyle: { - color - } - } - }) - }] - } - chart.setOption(option) - }, - - renderTaskPieNode () { - const chart = echarts.init(this.$el.querySelector('#task-pie-node')) - const option = { - tooltip: { - show: true - }, - series: [{ - name: '', - type: 'pie', - // radius: ['50%', '70%'], - data: this.nodeStats.map(d => { - return { - name: d.name, - value: d.value - // itemStyle: { - // color - // } - } - }) - }] - } - chart.setOption(option) - }, - renderTaskLine () { const chart = echarts.init(this.$el.querySelector('#task-line')) const option = { @@ -145,7 +75,7 @@ export default { }, series: [{ type: 'line', - data: this.dailyStats.map(d => d.count), + data: this.dailyStats.map(d => d.task_count), areaStyle: {}, smooth: true }], @@ -173,7 +103,7 @@ export default { }, series: [{ type: 'line', - data: this.dailyStats.map(d => d.duration), + data: this.dailyStats.map(d => d.avg_runtime_duration), areaStyle: {}, smooth: true }], @@ -186,9 +116,7 @@ export default { }, render () { - this.renderTaskPieStatus() this.renderTaskLine() - this.renderTaskPieNode() this.renderDurationLine() }, diff --git a/frontend/src/components/TableView/TaskTableView.vue b/frontend/src/components/TableView/TaskTableView.vue index afd61540..cd2ec10c 100644 --- a/frontend/src/components/TableView/TaskTableView.vue +++ b/frontend/src/components/TableView/TaskTableView.vue @@ -4,7 +4,7 @@
{{title}}
- + + + + @@ -23,7 +28,7 @@ - + @@ -276,6 +280,7 @@ export default { } .table { + min-height: 360px; margin-top: 10px; } diff --git a/frontend/src/views/spider/SpiderDetail.vue b/frontend/src/views/spider/SpiderDetail.vue index 0925e9fd..0875759e 100644 --- a/frontend/src/views/spider/SpiderDetail.vue +++ b/frontend/src/views/spider/SpiderDetail.vue @@ -9,7 +9,7 @@ - + @@ -107,18 +107,31 @@ export default { display: flex; align-items: center; position: absolute; - right: 20px; + right: 48px; /*float: right;*/ z-index: 999; - margin-top: -7px; + margin-top: 5px; } .selector .el-select { + height: 30px; + line-height: 30px; padding-left: 10px; + width: 180px; + border-radius: 0; + } + + .selector .el-select >>> .el-input__icon, + .selector .el-select >>> .el-input__inner { + border-radius: 0; + height: 30px; + line-height: 30px; } .label { text-align: right; width: 80px; + color: #909399; + font-weight: 100; } diff --git a/frontend/src/views/spider/SpiderList.vue b/frontend/src/views/spider/SpiderList.vue index 92cf36c2..12d16e93 100644 --- a/frontend/src/views/spider/SpiderList.vue +++ b/frontend/src/views/spider/SpiderList.vue @@ -94,41 +94,6 @@ - -
- - - - - - -
- - -
-
- - {{$t('Import Spiders')}} - - - {{$t('Add Spider')}} - - - {{$t('Refresh')}} - -
-
- - - - - - diff --git a/frontend/src/views/task/TaskList.vue b/frontend/src/views/task/TaskList.vue index ae50c11a..bf011453 100644 --- a/frontend/src/views/task/TaskList.vue +++ b/frontend/src/views/task/TaskList.vue @@ -1,142 +1,146 @@