mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
483 lines
11 KiB
Go
483 lines
11 KiB
Go
package routes
|
||
|
||
import (
|
||
"crawlab/constants"
|
||
"crawlab/database"
|
||
"crawlab/model"
|
||
"crawlab/services"
|
||
"crawlab/utils"
|
||
"github.com/apex/log"
|
||
"github.com/gin-gonic/gin"
|
||
"github.com/globalsign/mgo"
|
||
"github.com/globalsign/mgo/bson"
|
||
"github.com/pkg/errors"
|
||
uuid "github.com/satori/go.uuid"
|
||
"github.com/spf13/viper"
|
||
"io/ioutil"
|
||
"net/http"
|
||
"os"
|
||
"path/filepath"
|
||
"runtime/debug"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
func GetSpiderList(c *gin.Context) {
|
||
results, err := model.GetSpiderList(nil, 0, 0)
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: results,
|
||
})
|
||
}
|
||
|
||
func GetSpider(c *gin.Context) {
|
||
id := c.Param("id")
|
||
|
||
if !bson.IsObjectIdHex(id) {
|
||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||
}
|
||
|
||
result, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: result,
|
||
})
|
||
}
|
||
|
||
func PostSpider(c *gin.Context) {
|
||
id := c.Param("id")
|
||
|
||
if !bson.IsObjectIdHex(id) {
|
||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||
}
|
||
|
||
var item model.Spider
|
||
if err := c.ShouldBindJSON(&item); err != nil {
|
||
HandleError(http.StatusBadRequest, c, err)
|
||
return
|
||
}
|
||
|
||
if err := model.UpdateSpider(bson.ObjectIdHex(id), item); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func PublishAllSpiders(c *gin.Context) {
|
||
if err := services.PublishAllSpiders(); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func PublishSpider(c *gin.Context) {
|
||
id := c.Param("id")
|
||
|
||
if !bson.IsObjectIdHex(id) {
|
||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||
}
|
||
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
if err := services.PublishSpider(spider); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func PutSpider(c *gin.Context) {
|
||
// 从body中获取文件
|
||
file, err := c.FormFile("file")
|
||
if err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 如果不为zip文件,返回错误
|
||
if !strings.HasSuffix(file.Filename, ".zip") {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file"))
|
||
return
|
||
}
|
||
|
||
// 以防tmp目录不存在
|
||
tmpPath := viper.GetString("other.tmppath")
|
||
if !utils.Exists(tmpPath) {
|
||
if err := os.Mkdir(tmpPath, os.ModePerm); err != nil {
|
||
log.Error("mkdir other.tmppath dir error:" + err.Error())
|
||
debug.PrintStack()
|
||
HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error"))
|
||
return
|
||
}
|
||
}
|
||
|
||
// 保存到本地临时文件
|
||
randomId := uuid.NewV4()
|
||
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
|
||
if err := c.SaveUploadedFile(file, tmpFilePath); err != nil {
|
||
log.Error("save upload file error: " + err.Error())
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 读取临时文件
|
||
tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777)
|
||
if err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
if err = tmpFile.Close(); err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 目标目录
|
||
dstPath := filepath.Join(
|
||
viper.GetString("spider.path"),
|
||
strings.Replace(file.Filename, ".zip", "", 1),
|
||
)
|
||
|
||
// 如果目标目录已存在,删除目标目录
|
||
if utils.Exists(dstPath) {
|
||
if err := os.RemoveAll(dstPath); err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
}
|
||
}
|
||
|
||
// 将临时文件解压到爬虫目录
|
||
if err := utils.DeCompress(tmpFile, dstPath); err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 删除临时文件
|
||
if err = os.Remove(tmpFilePath); err != nil {
|
||
debug.PrintStack()
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 更新爬虫
|
||
services.UpdateSpiders()
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func DeleteSpider(c *gin.Context) {
|
||
id := c.Param("id")
|
||
|
||
if !bson.IsObjectIdHex(id) {
|
||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||
return
|
||
}
|
||
|
||
// 获取该爬虫
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 删除爬虫文件目录
|
||
if err := os.RemoveAll(spider.Src); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 从数据库中删除该爬虫
|
||
if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func GetSpiderTasks(c *gin.Context) {
|
||
id := c.Param("id")
|
||
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
tasks, err := spider.GetTasks()
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: tasks,
|
||
})
|
||
}
|
||
|
||
func GetSpiderDir(c *gin.Context) {
|
||
// 爬虫ID
|
||
id := c.Param("id")
|
||
|
||
// 目录相对路径
|
||
path := c.Query("path")
|
||
|
||
// 获取爬虫
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 获取目录下文件列表
|
||
f, err := ioutil.ReadDir(filepath.Join(spider.Src, path))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 遍历文件列表
|
||
var fileList []model.File
|
||
for _, file := range f {
|
||
fileList = append(fileList, model.File{
|
||
Name: file.Name(),
|
||
IsDir: file.IsDir(),
|
||
Size: file.Size(),
|
||
Path: filepath.Join(path, file.Name()),
|
||
})
|
||
}
|
||
|
||
// 返回结果
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: fileList,
|
||
})
|
||
}
|
||
|
||
func GetSpiderFile(c *gin.Context) {
|
||
// 爬虫ID
|
||
id := c.Param("id")
|
||
|
||
// 文件相对路径
|
||
path := c.Query("path")
|
||
|
||
// 获取爬虫
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 读取文件
|
||
fileBytes, err := ioutil.ReadFile(filepath.Join(spider.Src, path))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
}
|
||
|
||
// 返回结果
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: string(fileBytes),
|
||
})
|
||
}
|
||
|
||
type SpiderFileReqBody struct {
|
||
Path string `json:"path"`
|
||
Content string `json:"content"`
|
||
}
|
||
|
||
func PostSpiderFile(c *gin.Context) {
|
||
// 爬虫ID
|
||
id := c.Param("id")
|
||
|
||
// 文件相对路径
|
||
var reqBody SpiderFileReqBody
|
||
if err := c.ShouldBindJSON(&reqBody); err != nil {
|
||
HandleError(http.StatusBadRequest, c, err)
|
||
return
|
||
}
|
||
|
||
// 获取爬虫
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 写文件
|
||
if err := ioutil.WriteFile(filepath.Join(spider.Src, reqBody.Path), []byte(reqBody.Content), os.ModePerm); err != nil {
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 返回结果
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
})
|
||
}
|
||
|
||
func GetSpiderStats(c *gin.Context) {
|
||
type Overview struct {
|
||
TaskCount int `json:"task_count" bson:"task_count"`
|
||
ResultCount int `json:"result_count" bson:"result_count"`
|
||
SuccessCount int `json:"success_count" bson:"success_count"`
|
||
SuccessRate float64 `json:"success_rate"`
|
||
TotalWaitDuration float64 `json:"wait_duration" bson:"wait_duration"`
|
||
TotalRuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"`
|
||
AvgWaitDuration float64 `json:"avg_wait_duration"`
|
||
AvgRuntimeDuration float64 `json:"avg_runtime_duration"`
|
||
}
|
||
|
||
type Data struct {
|
||
Overview Overview `json:"overview"`
|
||
Daily []model.TaskDailyItem `json:"daily"`
|
||
}
|
||
|
||
id := c.Param("id")
|
||
|
||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||
if err != nil {
|
||
log.Errorf(err.Error())
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
s, col := database.GetCol("tasks")
|
||
defer s.Close()
|
||
|
||
// 起始日期
|
||
startDate := time.Now().Add(-time.Hour * 24 * 30)
|
||
endDate := time.Now()
|
||
|
||
// match
|
||
op1 := bson.M{
|
||
"$match": bson.M{
|
||
"spider_id": spider.Id,
|
||
"create_ts": bson.M{
|
||
"$gte": startDate,
|
||
"$lt": endDate,
|
||
},
|
||
},
|
||
}
|
||
|
||
// project
|
||
op2 := bson.M{
|
||
"$project": bson.M{
|
||
"success_count": bson.M{
|
||
"$cond": []interface{}{
|
||
bson.M{
|
||
"$eq": []string{
|
||
"$status",
|
||
constants.StatusFinished,
|
||
},
|
||
},
|
||
1,
|
||
0,
|
||
},
|
||
},
|
||
"result_count": "$result_count",
|
||
"wait_duration": "$wait_duration",
|
||
"runtime_duration": "$runtime_duration",
|
||
},
|
||
}
|
||
|
||
// group
|
||
op3 := bson.M{
|
||
"$group": bson.M{
|
||
"_id": nil,
|
||
"task_count": bson.M{"$sum": 1},
|
||
"success_count": bson.M{"$sum": "$success_count"},
|
||
"result_count": bson.M{"$sum": "$result_count"},
|
||
"wait_duration": bson.M{"$sum": "$wait_duration"},
|
||
"runtime_duration": bson.M{"$sum": "$runtime_duration"},
|
||
},
|
||
}
|
||
|
||
// run aggregation pipeline
|
||
var overview Overview
|
||
if err := col.Pipe([]bson.M{op1, op2, op3}).One(&overview); err != nil {
|
||
if err == mgo.ErrNotFound {
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: Data{
|
||
Overview: overview,
|
||
Daily: []model.TaskDailyItem{},
|
||
},
|
||
})
|
||
return
|
||
}
|
||
log.Errorf(err.Error())
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
// 后续处理
|
||
successCount, _ := strconv.ParseFloat(strconv.Itoa(overview.SuccessCount), 64)
|
||
taskCount, _ := strconv.ParseFloat(strconv.Itoa(overview.TaskCount), 64)
|
||
overview.SuccessRate = successCount / taskCount
|
||
overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount
|
||
overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount
|
||
|
||
items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id})
|
||
if err != nil {
|
||
log.Errorf(err.Error())
|
||
HandleError(http.StatusInternalServerError, c, err)
|
||
return
|
||
}
|
||
|
||
c.JSON(http.StatusOK, Response{
|
||
Status: "ok",
|
||
Message: "success",
|
||
Data: Data{
|
||
Overview: overview,
|
||
Daily: items,
|
||
},
|
||
})
|
||
}
|