Files
crawlab/backend/routes/spider.go
2019-08-18 18:45:19 +08:00

483 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package routes
import (
"crawlab/constants"
"crawlab/database"
"crawlab/model"
"crawlab/services"
"crawlab/utils"
"github.com/apex/log"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"github.com/pkg/errors"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"runtime/debug"
"strconv"
"strings"
"time"
)
func GetSpiderList(c *gin.Context) {
results, err := model.GetSpiderList(nil, 0, 0)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: results,
})
}
func GetSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
result, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
}
func PostSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
var item model.Spider
if err := c.ShouldBindJSON(&item); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
if err := model.UpdateSpider(bson.ObjectIdHex(id), item); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PublishAllSpiders(c *gin.Context) {
if err := services.PublishAllSpiders(); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PublishSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
if err := services.PublishSpider(spider); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PutSpider(c *gin.Context) {
// 从body中获取文件
file, err := c.FormFile("file")
if err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
// 如果不为zip文件返回错误
if !strings.HasSuffix(file.Filename, ".zip") {
debug.PrintStack()
HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file"))
return
}
// 以防tmp目录不存在
tmpPath := viper.GetString("other.tmppath")
if !utils.Exists(tmpPath) {
if err := os.Mkdir(tmpPath, os.ModePerm); err != nil {
log.Error("mkdir other.tmppath dir error:" + err.Error())
debug.PrintStack()
HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error"))
return
}
}
// 保存到本地临时文件
randomId := uuid.NewV4()
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
if err := c.SaveUploadedFile(file, tmpFilePath); err != nil {
log.Error("save upload file error: " + err.Error())
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
// 读取临时文件
tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777)
if err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
if err = tmpFile.Close(); err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
// 目标目录
dstPath := filepath.Join(
viper.GetString("spider.path"),
strings.Replace(file.Filename, ".zip", "", 1),
)
// 如果目标目录已存在,删除目标目录
if utils.Exists(dstPath) {
if err := os.RemoveAll(dstPath); err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
}
}
// 将临时文件解压到爬虫目录
if err := utils.DeCompress(tmpFile, dstPath); err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
// 删除临时文件
if err = os.Remove(tmpFilePath); err != nil {
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)
return
}
// 更新爬虫
services.UpdateSpiders()
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func DeleteSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
// 获取该爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 删除爬虫文件目录
if err := os.RemoveAll(spider.Src); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 从数据库中删除该爬虫
if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetSpiderTasks(c *gin.Context) {
id := c.Param("id")
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
tasks, err := spider.GetTasks()
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: tasks,
})
}
func GetSpiderDir(c *gin.Context) {
// 爬虫ID
id := c.Param("id")
// 目录相对路径
path := c.Query("path")
// 获取爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 获取目录下文件列表
f, err := ioutil.ReadDir(filepath.Join(spider.Src, path))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 遍历文件列表
var fileList []model.File
for _, file := range f {
fileList = append(fileList, model.File{
Name: file.Name(),
IsDir: file.IsDir(),
Size: file.Size(),
Path: filepath.Join(path, file.Name()),
})
}
// 返回结果
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: fileList,
})
}
func GetSpiderFile(c *gin.Context) {
// 爬虫ID
id := c.Param("id")
// 文件相对路径
path := c.Query("path")
// 获取爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 读取文件
fileBytes, err := ioutil.ReadFile(filepath.Join(spider.Src, path))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
}
// 返回结果
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: string(fileBytes),
})
}
type SpiderFileReqBody struct {
Path string `json:"path"`
Content string `json:"content"`
}
func PostSpiderFile(c *gin.Context) {
// 爬虫ID
id := c.Param("id")
// 文件相对路径
var reqBody SpiderFileReqBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 获取爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 写文件
if err := ioutil.WriteFile(filepath.Join(spider.Src, reqBody.Path), []byte(reqBody.Content), os.ModePerm); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 返回结果
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetSpiderStats(c *gin.Context) {
type Overview struct {
TaskCount int `json:"task_count" bson:"task_count"`
ResultCount int `json:"result_count" bson:"result_count"`
SuccessCount int `json:"success_count" bson:"success_count"`
SuccessRate float64 `json:"success_rate"`
TotalWaitDuration float64 `json:"wait_duration" bson:"wait_duration"`
TotalRuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"`
AvgWaitDuration float64 `json:"avg_wait_duration"`
AvgRuntimeDuration float64 `json:"avg_runtime_duration"`
}
type Data struct {
Overview Overview `json:"overview"`
Daily []model.TaskDailyItem `json:"daily"`
}
id := c.Param("id")
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
log.Errorf(err.Error())
HandleError(http.StatusInternalServerError, c, err)
return
}
s, col := database.GetCol("tasks")
defer s.Close()
// 起始日期
startDate := time.Now().Add(-time.Hour * 24 * 30)
endDate := time.Now()
// match
op1 := bson.M{
"$match": bson.M{
"spider_id": spider.Id,
"create_ts": bson.M{
"$gte": startDate,
"$lt": endDate,
},
},
}
// project
op2 := bson.M{
"$project": bson.M{
"success_count": bson.M{
"$cond": []interface{}{
bson.M{
"$eq": []string{
"$status",
constants.StatusFinished,
},
},
1,
0,
},
},
"result_count": "$result_count",
"wait_duration": "$wait_duration",
"runtime_duration": "$runtime_duration",
},
}
// group
op3 := bson.M{
"$group": bson.M{
"_id": nil,
"task_count": bson.M{"$sum": 1},
"success_count": bson.M{"$sum": "$success_count"},
"result_count": bson.M{"$sum": "$result_count"},
"wait_duration": bson.M{"$sum": "$wait_duration"},
"runtime_duration": bson.M{"$sum": "$runtime_duration"},
},
}
// run aggregation pipeline
var overview Overview
if err := col.Pipe([]bson.M{op1, op2, op3}).One(&overview); err != nil {
if err == mgo.ErrNotFound {
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: Data{
Overview: overview,
Daily: []model.TaskDailyItem{},
},
})
return
}
log.Errorf(err.Error())
HandleError(http.StatusInternalServerError, c, err)
return
}
// 后续处理
successCount, _ := strconv.ParseFloat(strconv.Itoa(overview.SuccessCount), 64)
taskCount, _ := strconv.ParseFloat(strconv.Itoa(overview.TaskCount), 64)
overview.SuccessRate = successCount / taskCount
overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount
overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount
items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id})
if err != nil {
log.Errorf(err.Error())
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: Data{
Overview: overview,
Daily: items,
},
})
}