From 57f1edc20775cb2c52e1bbe82b24aa9c90fb2799 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Sun, 23 Feb 2020 09:19:18 +0800 Subject: [PATCH] updated dockerpush.yml --- .github/workflows/dockerpush.yml | 3 + backend/main.go | 1 + backend/routes/spider.go | 329 ++++++++++++++++++------------- backend/services/spider.go | 3 + 4 files changed, 195 insertions(+), 141 deletions(-) diff --git a/.github/workflows/dockerpush.yml b/.github/workflows/dockerpush.yml index ba65f700..81b1da4c 100644 --- a/.github/workflows/dockerpush.yml +++ b/.github/workflows/dockerpush.yml @@ -54,6 +54,9 @@ jobs: - name: Deploy run: | + # Strip git ref prefix from version + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + if [ $VERSION == "release"]; then apt-get install -y curl curl ${{ secrets.JENKINS_RELEASE_URL }} diff --git a/backend/main.go b/backend/main.go index 7ddfdbf8..a35291ca 100644 --- a/backend/main.go +++ b/backend/main.go @@ -164,6 +164,7 @@ func main() { authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫(ID) authGroup.DELETE("/spiders", routes.DeleteSelectedSpider) // 删除选择的爬虫 authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 + authGroup.POST("/spiders/:id/copy", routes.CopySpider) // 拷贝爬虫 authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 authGroup.GET("/spiders/:id/file/tree", routes.GetSpiderFileTree) // 爬虫文件目录树读取 authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 9ad77948..16480ffa 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -26,6 +26,8 @@ import ( "time" ) +// ======== 爬虫管理 ======== + func GetSpiderList(c *gin.Context) { pageNum, _ := c.GetQuery("page_num") pageSize, _ := c.GetQuery("page_size") @@ -240,6 +242,39 @@ func PutSpider(c *gin.Context) { }) } +func CopySpider(c *gin.Context) { + type ReqBody struct { + Name string `json:"name"` + } + + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + } + + var reqBody ReqBody + if err := c.ShouldBindJSON(&reqBody); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + if err := services.CopySpider(spider, reqBody.Name); err != nil { + HandleError(http.StatusInternalServerError, c, err) + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + func UploadSpider(c *gin.Context) { // 从body中获取文件 uploadFile, err := c.FormFile("file") @@ -647,7 +682,151 @@ func GetSpiderTasks(c *gin.Context) { }) } -// 爬虫文件管理 +func GetSpiderStats(c *gin.Context) { + type Overview struct { + TaskCount int `json:"task_count" bson:"task_count"` + ResultCount int `json:"result_count" bson:"result_count"` + SuccessCount int `json:"success_count" bson:"success_count"` + SuccessRate float64 `json:"success_rate"` + TotalWaitDuration float64 `json:"wait_duration" bson:"wait_duration"` + TotalRuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` + AvgWaitDuration float64 `json:"avg_wait_duration"` + AvgRuntimeDuration float64 `json:"avg_runtime_duration"` + } + + type Data struct { + Overview Overview `json:"overview"` + Daily []model.TaskDailyItem `json:"daily"` + } + + id := c.Param("id") + + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + log.Errorf(err.Error()) + HandleError(http.StatusInternalServerError, c, err) + return + } + + s, col := database.GetCol("tasks") + defer s.Close() + + // 起始日期 + startDate := time.Now().Add(-time.Hour * 24 * 30) + endDate := time.Now() + + // match + op1 := bson.M{ + "$match": bson.M{ + "spider_id": spider.Id, + "create_ts": bson.M{ + "$gte": startDate, + "$lt": endDate, + }, + }, + } + + // project + op2 := bson.M{ + "$project": bson.M{ + "success_count": bson.M{ + "$cond": []interface{}{ + bson.M{ + "$eq": []string{ + "$status", + constants.StatusFinished, + }, + }, + 1, + 0, + }, + }, + "result_count": "$result_count", + "wait_duration": "$wait_duration", + "runtime_duration": "$runtime_duration", + }, + } + + // group + op3 := bson.M{ + "$group": bson.M{ + "_id": nil, + "task_count": bson.M{"$sum": 1}, + "success_count": bson.M{"$sum": "$success_count"}, + "result_count": bson.M{"$sum": "$result_count"}, + "wait_duration": bson.M{"$sum": "$wait_duration"}, + "runtime_duration": bson.M{"$sum": "$runtime_duration"}, + }, + } + + // run aggregation pipeline + var overview Overview + if err := col.Pipe([]bson.M{op1, op2, op3}).One(&overview); err != nil { + if err == mgo.ErrNotFound { + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: Data{ + Overview: overview, + Daily: []model.TaskDailyItem{}, + }, + }) + return + } + log.Errorf(err.Error()) + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 后续处理 + successCount, _ := strconv.ParseFloat(strconv.Itoa(overview.SuccessCount), 64) + taskCount, _ := strconv.ParseFloat(strconv.Itoa(overview.TaskCount), 64) + overview.SuccessRate = successCount / taskCount + overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount + overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount + + items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id}) + if err != nil { + log.Errorf(err.Error()) + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: Data{ + Overview: overview, + Daily: items, + }, + }) +} + +func GetSpiderSchedules(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") + return + } + + // 获取定时任务 + list, err := model.GetScheduleList(bson.M{"spider_id": bson.ObjectIdHex(id)}) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: list, + }) +} + +// ======== ./爬虫管理 ======== + +// ======== 爬虫文件管理 ======== func GetSpiderDir(c *gin.Context) { // 爬虫ID @@ -946,147 +1125,9 @@ func RenameSpiderFile(c *gin.Context) { }) } -func GetSpiderStats(c *gin.Context) { - type Overview struct { - TaskCount int `json:"task_count" bson:"task_count"` - ResultCount int `json:"result_count" bson:"result_count"` - SuccessCount int `json:"success_count" bson:"success_count"` - SuccessRate float64 `json:"success_rate"` - TotalWaitDuration float64 `json:"wait_duration" bson:"wait_duration"` - TotalRuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` - AvgWaitDuration float64 `json:"avg_wait_duration"` - AvgRuntimeDuration float64 `json:"avg_runtime_duration"` - } +// ======== 爬虫文件管理 ======== - type Data struct { - Overview Overview `json:"overview"` - Daily []model.TaskDailyItem `json:"daily"` - } - - id := c.Param("id") - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - s, col := database.GetCol("tasks") - defer s.Close() - - // 起始日期 - startDate := time.Now().Add(-time.Hour * 24 * 30) - endDate := time.Now() - - // match - op1 := bson.M{ - "$match": bson.M{ - "spider_id": spider.Id, - "create_ts": bson.M{ - "$gte": startDate, - "$lt": endDate, - }, - }, - } - - // project - op2 := bson.M{ - "$project": bson.M{ - "success_count": bson.M{ - "$cond": []interface{}{ - bson.M{ - "$eq": []string{ - "$status", - constants.StatusFinished, - }, - }, - 1, - 0, - }, - }, - "result_count": "$result_count", - "wait_duration": "$wait_duration", - "runtime_duration": "$runtime_duration", - }, - } - - // group - op3 := bson.M{ - "$group": bson.M{ - "_id": nil, - "task_count": bson.M{"$sum": 1}, - "success_count": bson.M{"$sum": "$success_count"}, - "result_count": bson.M{"$sum": "$result_count"}, - "wait_duration": bson.M{"$sum": "$wait_duration"}, - "runtime_duration": bson.M{"$sum": "$runtime_duration"}, - }, - } - - // run aggregation pipeline - var overview Overview - if err := col.Pipe([]bson.M{op1, op2, op3}).One(&overview); err != nil { - if err == mgo.ErrNotFound { - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: overview, - Daily: []model.TaskDailyItem{}, - }, - }) - return - } - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 后续处理 - successCount, _ := strconv.ParseFloat(strconv.Itoa(overview.SuccessCount), 64) - taskCount, _ := strconv.ParseFloat(strconv.Itoa(overview.TaskCount), 64) - overview.SuccessRate = successCount / taskCount - overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount - overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount - - items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id}) - if err != nil { - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: overview, - Daily: items, - }, - }) -} - -func GetSpiderSchedules(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - // 获取定时任务 - list, err := model.GetScheduleList(bson.M{"spider_id": bson.ObjectIdHex(id)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: list, - }) -} +// ======== Scrapy 部分 ======== func GetSpiderScrapySpiders(c *gin.Context) { id := c.Param("id") @@ -1328,6 +1369,10 @@ func GetSpiderScrapySpiderFilepath(c *gin.Context) { }) } +// ======== ./Scrapy 部分 ======== + +// ======== Git 部分 ======== + func PostSpiderSyncGit(c *gin.Context) { id := c.Param("id") @@ -1377,3 +1422,5 @@ func PostSpiderResetGit(c *gin.Context) { Message: "success", }) } + +// ======== ./Git 部分 ======== diff --git a/backend/services/spider.go b/backend/services/spider.go index 6d450ef1..27805da4 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -293,6 +293,9 @@ func CancelSpider(id string) error { return nil } +func CopySpider(spider model.Spider, newName string) error { +} + // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器