Merge pull request #695 from crawlab-team/develop

Develop
This commit is contained in:
Marvin Zhang
2020-04-21 14:30:19 +08:00
committed by GitHub
42 changed files with 1396 additions and 313 deletions

View File

@@ -1,3 +1,20 @@
# 0.4.10 (2020-04-21)
### 功能 / 优化
- **优化日志管理**. 集中化管理日志储存在 MongoDB减少对 PubSub 的依赖允许日志异常检测.
- **自动安装依赖**. 允许从 `requirements.txt` `package.json` 自动安装依赖.
- **API Token**. 允许用户生成 API Token并利用它们来集成到自己的系统中.
- **Web Hook**. 当任务开始或结束时触发 Web Hook http 请求到预定义好的 URL.
- **自动生成结果集**. 如果没有设置自动设置结果集为 `results_<spider_name>`.
- **优化项目列表**. 项目列表中不展示 "No Project".
- **升级 Node.js**. Node.js 版本从 v8.12 升级到 v10.19.
- **定时任务增加运行按钮**. 允许用户在定时任务界面手动运行爬虫任务.
### Bug 修复
- **无法注册**. [#670](https://github.com/crawlab-team/crawlab/issues/670)
- **爬虫定时任务标签 Cron 表达式显示秒**. [#678](https://github.com/crawlab-team/crawlab/issues/678)
- **爬虫每日数据缺失**. [#684](https://github.com/crawlab-team/crawlab/issues/684)
- **结果数量未即时更新**. [#689](https://github.com/crawlab-team/crawlab/issues/689)
# 0.4.9 (2020-03-31)
### 功能 / 优化
- **挑战**. 用户可以完成不同的趣味挑战..
@@ -10,6 +27,7 @@
- **支持任务重试**. 允许任务重新触发历史任务.
### Bug 修复
- **无法注册**. [#670](https://github.com/crawlab-team/crawlab/issues/670)
- **CLI 无法在 Windows 上使用**. [#580](https://github.com/crawlab-team/crawlab/issues/580)
- **重新上传错误**. [#643](https://github.com/crawlab-team/crawlab/issues/643) [#640](https://github.com/crawlab-team/crawlab/issues/640)
- **上传丢失文件目录**. [#646](https://github.com/crawlab-team/crawlab/issues/646)

View File

@@ -1,3 +1,20 @@
# 0.4.10 (2020-04-21)
### Features / Enhancement
- **Enhanced Log Management**. Centralizing log storage in MongoDB, reduced the dependency of PubSub, allowing log error detection.
- **API Token**. Allow users to generate API tokens and use them to integrate into their own systems.
- **Web Hook**. Trigger a Web Hook http request to pre-defined URL when a task starts or finishes.
- **Auto Install Dependencies**. Allow installing dependencies automatically from `requirements.txt` or `package.json`.
- **Auto Results Collection**. Set results collection to `results_<spider_name>` if it is not set.
- **Optimized Project List**. Not display "No Project" item in the project list.
- **Upgrade Node.js**. Upgrade Node.js version from v8.12 to v10.19.
- **Add Run Button in Schedule Page**. Allow users to manually run task in Schedule Page.
### Bug Fixes
- **Cannot register**. [#670](https://github.com/crawlab-team/crawlab/issues/670)
- **Spider schedule tab cron expression shows second**. [#678](https://github.com/crawlab-team/crawlab/issues/678)
- **Missing daily stats in spider**. [#684](https://github.com/crawlab-team/crawlab/issues/684)
- **Results count not update in time**. [#689](https://github.com/crawlab-team/crawlab/issues/689)
# 0.4.9 (2020-03-31)
### Features / Enhancement
- **Challenges**. Users can achieve different challenges based on their actions.

View File

@@ -38,13 +38,14 @@ task:
workers: 4
other:
tmppath: "/tmp"
version: 0.4.9
version: 0.4.10
setting:
allowRegister: "N"
enableTutorial: "N"
runOnMaster: "Y"
demoSpiders: "N"
checkScrapy: "Y"
autoInstall: "Y"
notification:
mail:
server: ''

5
backend/constants/log.go Normal file
View File

@@ -0,0 +1,5 @@
package constants
const (
ErrorRegexPattern = "(?:[ :,.]|^)((?:error|exception|traceback)s?)(?:[ :,.]|$)"
)

View File

@@ -22,6 +22,8 @@ type Lang struct {
LockPath string `json:"lock_path"`
InstallScript string `json:"install_script"`
InstallStatus string `json:"install_status"`
DepFileName string `json:"dep_file_name"`
InstallDepArgs string `json:"install_dep_cmd"`
}
type Dependency struct {
@@ -30,3 +32,7 @@ type Dependency struct {
Description string `json:"description"`
Installed bool `json:"installed"`
}
type PackageJson struct {
Dependencies map[string]string `json:"dependencies"`
}

View File

@@ -218,6 +218,7 @@ func main() {
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务
authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务
authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志
authGroup.GET("/tasks/:id/error-log", routes.GetTaskErrorLog) // 任务错误日志
authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果
authGroup.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果
authGroup.POST("/tasks/:id/restart", routes.RestartTask) // 重新开始任务
@@ -274,6 +275,12 @@ func main() {
authGroup.PUT("/actions", routes.PutAction) // 新增操作
//authGroup.POST("/actions/:id", routes.PostAction) // 修改操作
}
// API Token
{
authGroup.GET("/tokens", routes.GetTokens) // 获取 Tokens
authGroup.PUT("/tokens", routes.PutToken) // 添加 Token
authGroup.DELETE("/tokens/:id", routes.DeleteToken) // 删除 Token
}
// 统计数据
authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据
// 文件

View File

@@ -11,14 +11,6 @@ import (
func AuthorizationMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
// 如果为登录或注册,不用校验
//if c.Request.URL.Path == "/login" ||
// (c.Request.URL.Path == "/users" && c.Request.Method == "PUT") ||
// strings.HasSuffix(c.Request.URL.Path, "download") {
// c.Next()
// return
//}
// 获取token string
tokenStr := c.GetHeader("Authorization")
@@ -46,6 +38,8 @@ func AuthorizationMiddleware() gin.HandlerFunc {
return
}
}
// 设置用户
c.Set(constants.ContextUser, &user)
// 校验成功

View File

@@ -4,6 +4,7 @@ import (
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"os"
"runtime/debug"
@@ -11,11 +12,22 @@ import (
)
type LogItem struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Message string `json:"msg" bson:"msg"`
TaskId string `json:"task_id" bson:"task_id"`
IsError bool `json:"is_error" bson:"is_error"`
Ts time.Time `json:"ts" bson:"ts"`
Id bson.ObjectId `json:"_id" bson:"_id"`
Message string `json:"msg" bson:"msg"`
TaskId string `json:"task_id" bson:"task_id"`
Seq int64 `json:"seq" bson:"seq"`
Ts time.Time `json:"ts" bson:"ts"`
ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"`
}
type ErrorLogItem struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
TaskId string `json:"task_id" bson:"task_id"`
Message string `json:"msg" bson:"msg"`
LogId bson.ObjectId `json:"log_id" bson:"log_id"`
Seq int64 `json:"seq" bson:"seq"`
Ts time.Time `json:"ts" bson:"ts"`
ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"`
}
// 获取本地日志
@@ -65,15 +77,91 @@ func AddLogItem(l LogItem) error {
return nil
}
func GetLogItemList(filter interface{}, skip int, limit int, sortStr string) ([]LogItem, error) {
func AddLogItems(ls []LogItem) error {
if len(ls) == 0 {
return nil
}
s, c := database.GetCol("logs")
defer s.Close()
var docs []interface{}
for _, l := range ls {
docs = append(docs, l)
}
if err := c.Insert(docs...); err != nil {
log.Errorf("insert log error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
func AddErrorLogItem(e ErrorLogItem) error {
s, c := database.GetCol("error_logs")
defer s.Close()
var l LogItem
err := c.FindId(bson.M{"log_id": e.LogId}).One(&l)
if err != nil && err == mgo.ErrNotFound {
if err := c.Insert(e); err != nil {
log.Errorf("insert log error: " + err.Error())
debug.PrintStack()
return err
}
}
return nil
}
func GetLogItemList(query bson.M, keyword string, skip int, limit int, sortStr string) ([]LogItem, error) {
s, c := database.GetCol("logs")
defer s.Close()
filter := query
var logItems []LogItem
if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortStr).All(&logItems); err != nil {
debug.PrintStack()
return logItems, err
if keyword == "" {
filter["seq"] = bson.M{
"$gte": skip,
"$lt": skip + limit,
}
if err := c.Find(filter).Sort(sortStr).All(&logItems); err != nil {
debug.PrintStack()
return logItems, err
}
} else {
filter["msg"] = bson.M{
"$regex": bson.RegEx{
Pattern: keyword,
Options: "i",
},
}
if err := c.Find(filter).Sort(sortStr).Skip(skip).Limit(limit).All(&logItems); err != nil {
debug.PrintStack()
return logItems, err
}
}
return logItems, nil
}
func GetLogItemTotal(query bson.M, keyword string) (int, error) {
s, c := database.GetCol("logs")
defer s.Close()
filter := query
if keyword != "" {
filter["msg"] = bson.M{
"$regex": bson.RegEx{
Pattern: keyword,
Options: "i",
},
}
}
total, err := c.Find(filter).Count()
if err != nil {
debug.PrintStack()
return total, err
}
return total, nil
}

View File

@@ -64,6 +64,10 @@ type Spider struct {
DedupField string `json:"dedup_field" bson:"dedup_field"` // 去重字段
DedupMethod string `json:"dedup_method" bson:"dedup_method"` // 去重方式
// Web Hook
IsWebHook bool `json:"is_web_hook" bson:"is_web_hook"` // 是否开启 Web Hook
WebHookUrl string `json:"web_hook_url" bson:"web_hook_url"` // Web Hook URL
// 前端展示
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
LastStatus string `json:"last_status"` // 最后执行状态

View File

@@ -3,6 +3,7 @@ package model
import (
"crawlab/constants"
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"runtime/debug"
@@ -21,6 +22,7 @@ type Task struct {
Param string `json:"param" bson:"param"`
Error string `json:"error" bson:"error"`
ResultCount int `json:"result_count" bson:"result_count"`
ErrorLogCount int `json:"error_log_count" bson:"error_log_count"`
WaitDuration float64 `json:"wait_duration" bson:"wait_duration"`
RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"`
TotalDuration float64 `json:"total_duration" bson:"total_duration"`
@@ -88,11 +90,9 @@ func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, tot
return
}
if spider.Col == "" {
return
}
col := utils.GetSpiderCol(spider.Col, spider.Name)
s, c := database.GetCol(spider.Col)
s, c := database.GetCol(col)
defer s.Close()
query := bson.M{
@@ -109,17 +109,39 @@ func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, tot
return
}
func (t *Task) GetLogItems() (logItems []LogItem, err error) {
func (t *Task) GetLogItems(keyword string, page int, pageSize int) (logItems []LogItem, logTotal int, err error) {
query := bson.M{
"task_id": t.Id,
}
logItems, err = GetLogItemList(query, 0, constants.Infinite, "+_id")
logTotal, err = GetLogItemTotal(query, keyword)
if err != nil {
return logItems, err
return logItems, logTotal, err
}
return logItems, nil
logItems, err = GetLogItemList(query, keyword, (page-1)*pageSize, pageSize, "+_id")
if err != nil {
return logItems, logTotal, err
}
return logItems, logTotal, nil
}
func (t *Task) GetErrorLogItems(n int) (errLogItems []ErrorLogItem, err error) {
s, c := database.GetCol("error_logs")
defer s.Close()
query := bson.M{
"task_id": t.Id,
}
if err := c.Find(query).Limit(n).All(&errLogItems); err != nil {
log.Errorf("find error logs error: " + err.Error())
debug.PrintStack()
return errLogItems, err
}
return errLogItems, nil
}
func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Task, error) {
@@ -365,8 +387,11 @@ func UpdateTaskResultCount(id string) (err error) {
return err
}
// default results collection
col := utils.GetSpiderCol(spider.Col, spider.Name)
// 获取结果数量
s, c := database.GetCol(spider.Col)
s, c := database.GetCol(col)
defer s.Close()
resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count()
if err != nil {
@@ -385,6 +410,41 @@ func UpdateTaskResultCount(id string) (err error) {
return nil
}
// update error log count
func UpdateErrorLogCount(id string) (err error) {
s, c := database.GetCol("error_logs")
defer s.Close()
query := bson.M{
"task_id": id,
}
count, err := c.Find(query).Count()
if err != nil {
log.Errorf("update error log count error: " + err.Error())
debug.PrintStack()
return err
}
st, ct := database.GetCol("tasks")
defer st.Close()
task, err := GetTask(id)
if err != nil {
log.Errorf(err.Error())
return err
}
task.ErrorLogCount = count
if err := ct.UpdateId(id, task); err != nil {
log.Errorf("update error log count error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
// convert all running tasks to abnormal tasks
func UpdateTaskToAbnormal(nodeId bson.ObjectId) error {
s, c := database.GetCol("tasks")
defer s.Close()
@@ -406,3 +466,45 @@ func UpdateTaskToAbnormal(nodeId bson.ObjectId) error {
}
return nil
}
// update task error logs
func UpdateTaskErrorLogs(taskId string, errorRegexPattern string) error {
s, c := database.GetCol("logs")
defer s.Close()
if errorRegexPattern == "" {
errorRegexPattern = constants.ErrorRegexPattern
}
query := bson.M{
"task_id": taskId,
"msg": bson.M{
"$regex": bson.RegEx{
Pattern: errorRegexPattern,
Options: "i",
},
},
}
var logs []LogItem
if err := c.Find(query).All(&logs); err != nil {
log.Errorf("find error logs error: " + err.Error())
debug.PrintStack()
return err
}
for _, l := range logs {
e := ErrorLogItem{
Id: bson.NewObjectId(),
TaskId: l.TaskId,
Message: l.Message,
LogId: l.Id,
Seq: l.Seq,
Ts: time.Now(),
}
if err := AddErrorLogItem(e); err != nil {
return err
}
}
return nil
}

80
backend/model/token.go Normal file
View File

@@ -0,0 +1,80 @@
package model
import (
"crawlab/database"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
)
type Token struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Token string `json:"token" bson:"token"`
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (t *Token) Add() error {
s, c := database.GetCol("tokens")
defer s.Close()
if err := c.Insert(t); err != nil {
log.Errorf("insert token error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
func (t *Token) Delete() error {
s, c := database.GetCol("tokens")
defer s.Close()
if err := c.RemoveId(t.Id); err != nil {
log.Errorf("insert token error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
func GetTokenById(id bson.ObjectId) (t Token, err error) {
s, c := database.GetCol("tokens")
defer s.Close()
if err = c.FindId(id).One(&t); err != nil {
return t, err
}
return t, nil
}
func GetTokensByUserId(uid bson.ObjectId) (tokens []Token, err error) {
s, c := database.GetCol("tokens")
defer s.Close()
if err = c.Find(bson.M{"user_id": uid}).All(&tokens); err != nil {
log.Errorf("find tokens error: " + err.Error())
debug.PrintStack()
return tokens, err
}
return tokens, nil
}
func DeleteTokenById(id bson.ObjectId) error {
t, err := GetTokenById(id)
if err != nil {
return err
}
if err := t.Delete(); err != nil {
return err
}
return nil
}

View File

@@ -29,6 +29,9 @@ type UserSetting struct {
DingTalkRobotWebhook string `json:"ding_talk_robot_webhook" bson:"ding_talk_robot_webhook"`
WechatRobotWebhook string `json:"wechat_robot_webhook" bson:"wechat_robot_webhook"`
EnabledNotifications []string `json:"enabled_notifications" bson:"enabled_notifications"`
ErrorRegexPattern string `json:"error_regex_pattern" bson:"error_regex_pattern"`
MaxErrorLog int `json:"max_error_log" bson:"max_error_log"`
LogExpireDuration int64 `json:"log_expire_duration" bson:"log_expire_duration"`
}
func (user *User) Save() error {

View File

@@ -234,13 +234,43 @@ func DeleteTask(c *gin.Context) {
}
func GetTaskLog(c *gin.Context) {
type RequestData struct {
PageNum int `form:"page_num"`
PageSize int `form:"page_size"`
Keyword string `form:"keyword"`
}
id := c.Param("id")
logItems, err := services.GetTaskLog(id)
var reqData RequestData
if err := c.ShouldBindQuery(&reqData); err != nil {
HandleErrorF(http.StatusBadRequest, c, "invalid request")
return
}
logItems, logTotal, err := services.GetTaskLog(id, reqData.Keyword, reqData.PageNum, reqData.PageSize)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
HandleSuccessData(c, logItems)
c.JSON(http.StatusOK, ListResponse{
Status: "ok",
Message: "success",
Data: logItems,
Total: logTotal,
})
}
func GetTaskErrorLog(c *gin.Context) {
id := c.Param("id")
u := services.GetCurrentUser(c)
errLogItems, err := services.GetTaskErrorLog(id, u.Setting.MaxErrorLog)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: errLogItems,
})
}
func GetTaskResults(c *gin.Context) {
@@ -364,4 +394,4 @@ func RestartTask(c *gin.Context) {
return
}
HandleSuccess(c)
}
}

68
backend/routes/token.go Normal file
View File

@@ -0,0 +1,68 @@
package routes
import (
"crawlab/model"
"crawlab/services"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"net/http"
"time"
)
func GetTokens(c *gin.Context) {
u := services.GetCurrentUser(c)
tokens, err := model.GetTokensByUserId(u.Id)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: tokens,
})
}
func PutToken(c *gin.Context) {
u := services.GetCurrentUser(c)
tokenStr, err := services.MakeToken(u)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
t := model.Token{
Id: bson.NewObjectId(),
Token: tokenStr,
UserId: u.Id,
CreateTs: time.Now(),
UpdateTs: time.Now(),
}
if err := t.Add(); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func DeleteToken(c *gin.Context) {
id := c.Param("id")
if err := model.DeleteTokenById(bson.ObjectIdHex(id)); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}

View File

@@ -98,6 +98,11 @@ func PutUser(c *gin.Context) {
// UserId
uid := services.GetCurrentUserId(c)
// 空 UserId 处理
if uid == "" {
uid = bson.ObjectIdHex(constants.ObjectIdNull)
}
// 添加用户
if err := services.CreateNewUser(reqData.Username, reqData.Password, reqData.Role, reqData.Email, uid); err != nil {
HandleError(http.StatusInternalServerError, c, err)
@@ -237,6 +242,11 @@ func PostMe(c *gin.Context) {
user.Setting.WechatRobotWebhook = reqBody.Setting.WechatRobotWebhook
}
user.Setting.EnabledNotifications = reqBody.Setting.EnabledNotifications
user.Setting.ErrorRegexPattern = reqBody.Setting.ErrorRegexPattern
if reqBody.Setting.MaxErrorLog != 0 {
user.Setting.MaxErrorLog = reqBody.Setting.MaxErrorLog
}
user.Setting.LogExpireDuration = reqBody.Setting.LogExpireDuration
if user.UserId.Hex() == "" {
user.UserId = bson.ObjectIdHex(constants.ObjectIdNull)

View File

@@ -12,16 +12,16 @@ BASE_DIR=`dirname $0`
export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm
# install Node.js v8.12
# install Node.js v10.19
export NVM_NODEJS_ORG_MIRROR=http://npm.taobao.org/mirrors/node
nvm install 8.12
nvm install 10.19
# create soft links
ln -s $HOME/.nvm/versions/node/v8.12.0/bin/npm /usr/local/bin/npm
ln -s $HOME/.nvm/versions/node/v8.12.0/bin/node /usr/local/bin/node
ln -s $HOME/.nvm/versions/node/v10.19.0/bin/npm /usr/local/bin/npm
ln -s $HOME/.nvm/versions/node/v10.19.0/bin/node /usr/local/bin/node
# environments manipulation
export NODE_PATH=$HOME.nvm/versions/node/v8.12.0/lib/node_modules
export NODE_PATH=$HOME.nvm/versions/node/v10.19.0/lib/node_modules
export PATH=$NODE_PATH:$PATH
# install chromium

View File

@@ -7,7 +7,6 @@ import (
"crawlab/lib/cron"
"crawlab/model"
"crawlab/utils"
"encoding/json"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
@@ -22,41 +21,6 @@ import (
// 任务日志频道映射
var TaskLogChanMap = utils.NewChanMap()
// 获取远端日志
func GetRemoteLog(task model.Task) (logStr string, err error) {
// 序列化消息
msg := entity.NodeMessage{
Type: constants.MsgTypeGetLog,
LogPath: task.LogPath,
TaskId: task.Id,
}
msgBytes, err := json.Marshal(&msg)
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return "", err
}
// 发布获取日志消息
channel := "nodes:" + task.NodeId.Hex()
if _, err := database.RedisClient.Publish(channel, utils.BytesToString(msgBytes)); err != nil {
log.Errorf(err.Error())
return "", err
}
// 生成频道等待获取log
ch := TaskLogChanMap.ChanBlocked(task.Id)
select {
case logStr = <-ch:
log.Infof("get remote log")
case <-time.After(30 * time.Second):
logStr = "get remote log timeout"
}
return logStr, nil
}
// 定时删除日志
func DeleteLogPeriodically() {
logDir := viper.GetString("log.path")
@@ -168,10 +132,31 @@ func InitDeleteLogPeriodically() error {
func InitLogIndexes() error {
s, c := database.GetCol("logs")
defer s.Close()
se, ce := database.GetCol("error_logs")
defer s.Close()
defer se.Close()
_ = c.EnsureIndexKey("task_id")
_ = c.EnsureIndex(mgo.Index{
Key: []string{"$text:msg"},
Key: []string{"task_id", "seq"},
})
_ = c.EnsureIndex(mgo.Index{
Key: []string{"task_id", "msg"},
})
_ = c.EnsureIndex(mgo.Index{
Key: []string{"expire_ts"},
Sparse: true,
ExpireAfter: 0 * time.Second,
})
_ = ce.EnsureIndex(mgo.Index{
Key: []string{"task_id"},
})
_ = ce.EnsureIndex(mgo.Index{
Key: []string{"log_id"},
})
_ = ce.EnsureIndex(mgo.Index{
Key: []string{"expire_ts"},
Sparse: true,
ExpireAfter: 0 * time.Second,
})
return nil

View File

@@ -218,6 +218,9 @@ func PublishSpider(spider model.Spider) {
Spider: spider,
}
// 安装依赖
go spiderSync.InstallDeps()
//目录不存在,则直接下载
path := filepath.Join(viper.GetString("spider.path"), spider.Name)
if !utils.Exists(path) {
@@ -434,7 +437,9 @@ func CopySpider(spider model.Spider, newName string) error {
}
func UpdateSpiderDedup(spider model.Spider) error {
s, c := database.GetCol(spider.Col)
col := utils.GetSpiderCol(spider.Col, spider.Name)
s, c := database.GetCol(col)
defer s.Close()
if !spider.IsDedup {

View File

@@ -16,6 +16,8 @@ import (
"path"
"path/filepath"
"runtime/debug"
"strings"
"sync"
)
const (
@@ -183,3 +185,63 @@ func (s *SpiderSync) Download() {
_ = database.RedisClient.HDel("spider", key)
}
// locks for dependency installation
var installLockMap sync.Map
// install dependencies
func (s *SpiderSync) InstallDeps() {
langs := utils.GetLangList()
for _, l := range langs {
// no dep file name is found, skip
if l.DepFileName == "" {
continue
}
// being locked, i.e. installation is running, skip
key := s.Spider.Name + "|" + l.Name
_, locked := installLockMap.Load(key)
if locked {
continue
}
// no dep file found, skip
if !utils.Exists(path.Join(s.Spider.Src, l.DepFileName)) {
continue
}
// no dep install executable found, skip
if !utils.Exists(l.DepExecutablePath) {
continue
}
// lock
installLockMap.Store(key, true)
// command to install dependencies
cmd := exec.Command(l.DepExecutablePath, strings.Split(l.InstallDepArgs, " ")...)
// working directory
cmd.Dir = s.Spider.Src
// compatibility with node.js
if l.ExecutableName == constants.Nodejs {
deps, err := utils.GetPackageJsonDeps(path.Join(s.Spider.Src, l.DepFileName))
if err != nil {
continue
}
cmd = exec.Command(l.DepExecutablePath, strings.Split(l.InstallDepArgs+" "+strings.Join(deps, " "), " ")...)
}
// start executing command
output, err := cmd.Output()
if err != nil {
log.Errorf("install dep error: " + err.Error())
log.Errorf(string(output))
debug.PrintStack()
}
// unlock
installLockMap.Delete(key)
}
}

View File

@@ -15,8 +15,10 @@ import (
"fmt"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"github.com/imroc/req"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"net/http"
"os"
"os/exec"
"path"
@@ -113,16 +115,19 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spide
// 默认把Node.js的全局node_modules加入环境变量
envPath := os.Getenv("PATH")
homePath := os.Getenv("HOME")
nodeVersion := "v8.12.0"
nodeVersion := "v10.19.0"
nodePath := path.Join(homePath, ".nvm/versions/node", nodeVersion, "lib/node_modules")
if !strings.Contains(envPath, nodePath) {
_ = os.Setenv("PATH", nodePath+":"+envPath)
}
_ = os.Setenv("NODE_PATH", nodePath)
// default results collection
col := utils.GetSpiderCol(spider.Col, spider.Name)
// 默认环境变量
cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id)
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+spider.Col)
cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+col)
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host"))
cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port"))
if viper.GetString("mongo.db") != "" {
@@ -161,16 +166,7 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spide
return cmd
}
func SetLogConfig(cmd *exec.Cmd, t model.Task) error {
//fLog, err := os.Create(path)
//if err != nil {
// log.Errorf("create task log file error: %s", path)
// debug.PrintStack()
// return err
//}
//cmd.Stdout = fLog
//cmd.Stderr = fLog
func SetLogConfig(cmd *exec.Cmd, t model.Task, u model.User) error {
// get stdout reader
stdout, err := cmd.StdoutPipe()
readerStdout := bufio.NewReader(stdout)
@@ -189,21 +185,49 @@ func SetLogConfig(cmd *exec.Cmd, t model.Task) error {
return err
}
var seq int64
var logs []model.LogItem
isStdoutFinished := false
isStderrFinished := false
// periodically (1 sec) insert log items
go func() {
for {
_ = model.AddLogItems(logs)
logs = []model.LogItem{}
if isStdoutFinished && isStderrFinished {
break
}
time.Sleep(5 * time.Second)
}
}()
// expire duration (in seconds)
expireDuration := u.Setting.LogExpireDuration
if expireDuration == 0 {
// by default not expire
expireDuration = constants.Infinite
}
// read stdout
go func() {
for {
line, err := readerStdout.ReadString('\n')
if err != nil {
isStdoutFinished = true
break
}
line = strings.Replace(line, "\n", "", -1)
_ = model.AddLogItem(model.LogItem{
Id: bson.NewObjectId(),
Message: line,
TaskId: t.Id,
IsError: false,
Ts: time.Now(),
})
seq++
l := model.LogItem{
Id: bson.NewObjectId(),
Seq: seq,
Message: line,
TaskId: t.Id,
Ts: time.Now(),
ExpireTs: time.Now().Add(time.Duration(expireDuration) * time.Second),
}
logs = append(logs, l)
}
}()
@@ -211,24 +235,28 @@ func SetLogConfig(cmd *exec.Cmd, t model.Task) error {
go func() {
for {
line, err := readerStderr.ReadString('\n')
line = strings.Replace(line, "\n", "", -1)
if err != nil {
isStderrFinished = true
break
}
_ = model.AddLogItem(model.LogItem{
Id: bson.NewObjectId(),
Message: line,
TaskId: t.Id,
IsError: true,
Ts: time.Now(),
})
line = strings.Replace(line, "\n", "", -1)
seq++
l := model.LogItem{
Id: bson.NewObjectId(),
Seq: seq,
Message: line,
TaskId: t.Id,
Ts: time.Now(),
ExpireTs: time.Now().Add(time.Duration(expireDuration) * time.Second),
}
logs = append(logs, l)
}
}()
return nil
}
func FinishOrCancelTask(ch chan string, cmd *exec.Cmd, t model.Task) {
func FinishOrCancelTask(ch chan string, cmd *exec.Cmd, s model.Spider, t model.Task) {
// 传入信号,此处阻塞
signal := <-ch
log.Infof("process received signal: %s", signal)
@@ -259,6 +287,8 @@ func FinishOrCancelTask(ch chan string, cmd *exec.Cmd, t model.Task) {
t.FinishTs = time.Now()
_ = t.Save()
go FinishUpTask(s, t)
}
func StartTaskProcess(cmd *exec.Cmd, t model.Task) error {
@@ -275,7 +305,7 @@ func StartTaskProcess(cmd *exec.Cmd, t model.Task) error {
return nil
}
func WaitTaskProcess(cmd *exec.Cmd, t model.Task) error {
func WaitTaskProcess(cmd *exec.Cmd, t model.Task, s model.Spider) error {
if err := cmd.Wait(); err != nil {
log.Errorf("wait process finish error: %s", err.Error())
debug.PrintStack()
@@ -291,16 +321,19 @@ func WaitTaskProcess(cmd *exec.Cmd, t model.Task) error {
t.FinishTs = time.Now()
t.Status = constants.StatusError
_ = t.Save()
FinishUpTask(s, t)
}
}
return err
}
return nil
}
// 执行shell命令
func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (err error) {
func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider, u model.User) (err error) {
log.Infof("cwd: %s", cwd)
log.Infof("cmd: %s", cmdStr)
@@ -316,7 +349,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
cmd.Dir = cwd
// 日志配置
if err := SetLogConfig(cmd, t); err != nil {
if err := SetLogConfig(cmd, t, u); err != nil {
return err
}
@@ -341,7 +374,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
// 起一个goroutine来监控进程
ch := utils.TaskExecChanMap.ChanBlocked(t.Id)
go FinishOrCancelTask(ch, cmd, t)
go FinishOrCancelTask(ch, cmd, s, t)
// kill的时候可以kill所有的子进程
if runtime.GOOS != constants.Windows {
@@ -354,7 +387,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
}
// 同步等待进程完成
if err := WaitTaskProcess(cmd, t); err != nil {
if err := WaitTaskProcess(cmd, t, s); err != nil {
return err
}
ch <- constants.TaskFinish
@@ -412,6 +445,22 @@ func SaveTaskResultCount(id string) func() {
}
}
// Scan Error Logs
func ScanErrorLogs(t model.Task) func() {
return func() {
u, err := model.GetUser(t.UserId)
if err != nil {
return
}
if err := model.UpdateTaskErrorLogs(t.Id, u.Setting.ErrorRegexPattern); err != nil {
return
}
if err := model.UpdateErrorLogCount(t.Id); err != nil {
return
}
}
}
// 执行任务
func ExecuteTask(id int) {
if flag, ok := LockList.Load(id); ok {
@@ -508,12 +557,22 @@ func ExecuteTask(id int) {
cmd += " " + t.Param
}
// 获得触发任务用户
user, err := model.GetUser(t.UserId)
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
return
}
// 任务赋值
t.NodeId = node.Id // 任务节点信息
t.StartTs = time.Now() // 任务开始时间
t.Status = constants.StatusRunning // 任务状态
t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长
// 发送 Web Hook 请求 (任务开始)
go SendWebHookRequest(user, t, spider)
// 文件检查
if err := SpiderFileCheck(t, spider); err != nil {
log.Errorf("spider file check error: %s", err.Error())
@@ -527,26 +586,29 @@ func ExecuteTask(id int) {
_ = t.Save()
// 起一个cron执行器来统计任务结果数
if spider.Col != "" {
cronExec := cron.New(cron.WithSeconds())
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
return
}
cronExec.Start()
defer cronExec.Stop()
}
// 获得触发任务用户
user, err := model.GetUser(t.UserId)
cronExec := cron.New(cron.WithSeconds())
_, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id))
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
debug.PrintStack()
return
}
cronExec.Start()
defer cronExec.Stop()
// 起一个cron来更新错误日志
cronExecErrLog := cron.New(cron.WithSeconds())
_, err = cronExecErrLog.AddFunc("*/30 * * * * *", ScanErrorLogs(t))
if err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
debug.PrintStack()
return
}
cronExecErrLog.Start()
defer cronExecErrLog.Stop()
// 执行Shell命令
if err := ExecuteShellCmd(cmd, cwd, t, spider); err != nil {
if err := ExecuteShellCmd(cmd, cwd, t, spider, user); err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
// 如果发生错误,则发送通知
@@ -554,16 +616,15 @@ func ExecuteTask(id int) {
if user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskEnd || user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskError {
SendNotifications(user, t, spider)
}
// 发送 Web Hook 请求 (任务开始)
go SendWebHookRequest(user, t, spider)
return
}
// 更新任务结果数
if spider.Col != "" {
if err := model.UpdateTaskResultCount(t.Id); err != nil {
log.Errorf(GetWorkerPrefix(id) + err.Error())
return
}
}
// 完成任务收尾工作
go FinishUpTask(spider, t)
// 完成进程
t, err = model.GetTask(t.Id)
@@ -578,6 +639,9 @@ func ExecuteTask(id int) {
t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长
t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长
// 发送 Web Hook 请求 (任务结束)
go SendWebHookRequest(user, t, spider)
// 如果是任务结束时发送通知,则发送通知
if user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskEnd {
SendNotifications(user, t, spider)
@@ -598,6 +662,20 @@ func ExecuteTask(id int) {
log.Infof(GetWorkerPrefix(id) + "task (id:" + t.Id + ")" + " finished. elapsed:" + durationStr + " sec")
}
func FinishUpTask(s model.Spider, t model.Task) {
// 更新任务结果数
go func() {
if err := model.UpdateTaskResultCount(t.Id); err != nil {
return
}
}()
// 更新任务错误日志
go func() {
ScanErrorLogs(t)()
}()
}
func SpiderFileCheck(t model.Task, spider model.Spider) error {
// 判断爬虫文件是否存在
gfFile := model.GetGridFs(spider.FileId)
@@ -622,60 +700,34 @@ func SpiderFileCheck(t model.Task, spider model.Spider) error {
return nil
}
func GetTaskLog(id string) (logItems []model.LogItem, err error) {
func GetTaskLog(id string, keyword string, page int, pageSize int) (logItems []model.LogItem, logTotal int, err error) {
task, err := model.GetTask(id)
if err != nil {
return
}
logItems, err = task.GetLogItems()
logItems, logTotal, err = task.GetLogItems(keyword, page, pageSize)
if err != nil {
return logItems, err
return logItems, logTotal, err
}
return logItems, nil
return logItems, logTotal, nil
}
//if IsMasterNode(task.NodeId.Hex()) {
// if !utils.Exists(task.LogPath) {
// fileDir, err := MakeLogDir(task)
//
// if err != nil {
// log.Errorf(err.Error())
// }
//
// fileP := GetLogFilePaths(fileDir, task)
//
// // 获取日志文件路径
// fLog, err := os.Create(fileP)
// defer fLog.Close()
// if err != nil {
// log.Errorf("create task log file error: %s", fileP)
// debug.PrintStack()
// }
// task.LogPath = fileP
// if err := task.Save(); err != nil {
// log.Errorf(err.Error())
// debug.PrintStack()
// }
//
// }
// // 若为主节点,获取本机日志
// logBytes, err := model.GetLocalLog(task.LogPath)
// if err != nil {
// log.Errorf(err.Error())
// logStr = err.Error()
// } else {
// logStr = utils.BytesToString(logBytes)
// }
// return logStr, err
//}
//// 若不为主节点,获取远端日志
//logStr, err = GetRemoteLog(task)
//if err != nil {
// log.Errorf(err.Error())
//
//}
//return logStr, err
func GetTaskErrorLog(id string, n int) (errLogItems []model.ErrorLogItem, err error) {
if n == 0 {
n = 1000
}
task, err := model.GetTask(id)
if err != nil {
return
}
errLogItems, err = task.GetErrorLogItems(n)
if err != nil {
return
}
return errLogItems, nil
}
func CancelTask(id string) (err error) {
@@ -953,6 +1005,44 @@ func SendNotifications(u model.User, t model.Task, s model.Spider) {
}
}
func SendWebHookRequest(u model.User, t model.Task, s model.Spider) {
type RequestBody struct {
Status string `json:"status"`
Task model.Task `json:"task"`
Spider model.Spider `json:"spider"`
UserName string `json:"user_name"`
}
if s.IsWebHook && s.WebHookUrl != "" {
// request header
header := req.Header{
"Content-Type": "application/json; charset=utf-8",
}
// request body
reqBody := RequestBody{
Status: t.Status,
UserName: u.Username,
Task: t,
Spider: s,
}
// make POST http request
res, err := req.Post(s.WebHookUrl, header, req.BodyJSON(reqBody))
if err != nil {
log.Errorf("sent web hook request with error: " + err.Error())
debug.PrintStack()
return
}
if res.Response().StatusCode != http.StatusOK {
log.Errorf(fmt.Sprintf("sent web hook request with error http code: %d, task_id: %s, status: %s", res.Response().StatusCode, t.Id, t.Status))
debug.PrintStack()
return
}
log.Infof(fmt.Sprintf("sent web hook request, task_id: %s, status: %s)", t.Id, t.Status))
}
}
func InitTaskExecutor() error {
// 构造任务执行器
c := cron.New(cron.WithSeconds())

View File

@@ -114,6 +114,9 @@ func CreateNewUser(username string, password string, role string, email string,
func GetCurrentUser(c *gin.Context) *model.User {
data, _ := c.Get(constants.ContextUser)
if data == nil {
return &model.User{}
}
return data.(*model.User)
}

8
backend/utils/spider.go Normal file
View File

@@ -0,0 +1,8 @@
package utils
func GetSpiderCol(col string, name string) string {
if col == "" {
return "results_" + name
}
return col
}

View File

@@ -1,6 +1,12 @@
package utils
import "crawlab/entity"
import (
"crawlab/entity"
"encoding/json"
"github.com/apex/log"
"io/ioutil"
"runtime/debug"
)
func GetLangList() []entity.Lang {
list := []entity.Lang{
@@ -10,6 +16,8 @@ func GetLangList() []entity.Lang {
ExecutablePaths: []string{"/usr/bin/python", "/usr/local/bin/python"},
DepExecutablePath: "/usr/local/bin/pip",
LockPath: "/tmp/install-python.lock",
DepFileName: "requirements.txt",
InstallDepArgs: "install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt",
},
{
Name: "Node.js",
@@ -18,6 +26,8 @@ func GetLangList() []entity.Lang {
DepExecutablePath: "/usr/local/bin/npm",
LockPath: "/tmp/install-nodejs.lock",
InstallScript: "install-nodejs.sh",
DepFileName: "package.json",
InstallDepArgs: "install -g --registry=https://registry.npm.taobao.org",
},
{
Name: "Java",
@@ -60,3 +70,24 @@ func GetLangFromLangNamePlain(name string) entity.Lang {
}
return entity.Lang{}
}
func GetPackageJsonDeps(filepath string) (deps []string, err error) {
data, err := ioutil.ReadFile(filepath)
if err != nil {
log.Errorf("get package.json deps error: " + err.Error())
debug.PrintStack()
return deps, err
}
var packageJson entity.PackageJson
if err := json.Unmarshal(data, &packageJson); err != nil {
log.Errorf("get package.json deps error: " + err.Error())
debug.PrintStack()
return deps, err
}
for d, v := range packageJson.Dependencies {
deps = append(deps, d+"@"+v)
}
return deps, nil
}

View File

@@ -1,6 +1,6 @@
{
"name": "crawlab",
"version": "0.4.9",
"version": "0.4.10",
"private": true,
"scripts": {
"serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development",

View File

@@ -48,6 +48,9 @@ export default {
// get latest version
await this.$store.dispatch('version/getLatestRelease')
// get user info
await this.$store.dispatch('user/getInfo')
// remove loading-placeholder
const elLoading = document.querySelector('#loading-placeholder')
elLoading.remove()

View File

@@ -45,10 +45,10 @@
/>
</el-form-item>
</template>
<el-form-item :label="$t('Results Collection')" prop="col" required>
<el-form-item :label="$t('Results Collection')" prop="col">
<el-input
v-model="spiderForm.col"
:placeholder="$t('Results Collection')"
:placeholder="$t('By default: ') + 'results_<spider_name>'"
:disabled="isView || isPublic"
/>
</el-form-item>
@@ -96,7 +96,7 @@
</el-form-item>
</el-col>
</el-row>
<el-form-item v-if="!isView && !isConfigurable" :label="$t('Is De-Duplicated')" prop="dedup_field"
<el-form-item v-if="!isView" :label="$t('De-Duplication')" prop="dedup_field"
:rules="dedupRules">
<div style="display: flex; align-items: center; height: 40px">
<el-switch
@@ -123,6 +123,21 @@
/>
</div>
</el-form-item>
<el-form-item v-if="!isView" label="Web Hook">
<div style="display: flex; align-items: center; height: 40px">
<el-switch
v-model="spiderForm.is_web_hook"
active-color="#13ce66"
:disabled="isView || isPublic"
/>
<el-input
v-if="spiderForm.is_web_hook"
v-model="spiderForm.web_hook_url"
:placeholder="$t('Please enter Web Hook URL')"
style="margin-left: 20px"
/>
</div>
</el-form-item>
<el-row>
<el-col :span="6">
<el-form-item v-if="!isView" :label="$t('Is Public')" prop="is_public">

View File

@@ -12,8 +12,8 @@
<el-form-item :label="$t('Status')">
<status-tag :status="taskForm.status"/>
<el-badge
v-if="errorLogData.length > 0"
:value="errorLogData.length"
v-if="taskForm.error_log_count > 0"
:value="taskForm.error_log_count"
style="margin-left:10px; cursor:pointer;"
>
<el-tag type="danger" @click="onClickLogWithErrors">
@@ -79,8 +79,7 @@
<script>
import {
mapState,
mapGetters
mapState
} from 'vuex'
import StatusTag from '../Status/StatusTag'
import dayjs from 'dayjs'
@@ -91,9 +90,7 @@ export default {
computed: {
...mapState('task', [
'taskForm',
'taskLog'
]),
...mapGetters('task', [
'taskLog',
'errorLogData'
]),
isRunning () {

View File

@@ -13,6 +13,10 @@
</template>
<script>
import {
mapGetters
} from 'vuex'
export default {
name: 'LogItem',
props: {
@@ -45,10 +49,19 @@ export default {
},
data () {
return {
errorRegex: this.$utils.log.errorRegex
}
},
computed: {
...mapGetters('user', [
'userInfo'
]),
errorRegex () {
if (!this.userInfo.setting.error_regex_pattern) {
return this.$utils.log.errorRegex
}
console.log(this.userInfo.setting.error_regex_pattern)
return new RegExp(this.userInfo.setting.error_regex_pattern, 'i')
},
dataHtml () {
let html = this.data.replace(this.errorRegex, ' <span style="font-weight: bolder; text-decoration: underline">$1</span> ')
if (!this.searchString) return html

View File

@@ -2,25 +2,45 @@
<div class="log-view-container">
<div class="filter-wrapper">
<div class="left">
<el-button
size="small"
type="primary"
icon="el-icon-download"
<el-switch
v-model="isLogAutoScroll"
:inactive-text="$t('Auto-Scroll')"
style="margin-right: 10px"
:disabled="isToBottom"
@click="onAutoScroll"
>
{{$t('Auto-Scroll')}}
</el-button>
</el-switch>
<!-- <el-switch-->
<!-- v-model="isLogAutoFetch"-->
<!-- :inactive-text="$t('Auto-Refresh')"-->
<!-- style="margin-right: 10px"-->
<!-- >-->
<!-- </el-switch>-->
<el-input
v-model="searchString"
v-model="logKeyword"
size="small"
suffix-icon="el-icon-search"
:placeholder="$t('Search Log')"
style="width: 240px; margin-right: 10px"
clearable
/>
<el-button
size="small"
type="primary"
icon="el-icon-search"
@click="onSearchLog"
>
{{$t('Search Log')}}
</el-button>
</div>
<div class="right">
<el-pagination
size="small"
:total="taskLogTotal"
:current-page.sync="taskLogPage"
:page-sizes="[1000, 2000, 5000, 10000]"
:page-size.sync="taskLogPageSize"
:pager-count="3"
layout="sizes, prev, pager, next"
/>
<el-badge
v-if="errorLogData.length > 0"
:value="errorLogData.length"
@@ -38,6 +58,7 @@
</div>
<div class="content">
<div
v-loading="isLogFetchLoading"
class="log-view-wrapper"
:class="isErrorsCollapsed ? 'errors-collapsed' : ''"
>
@@ -68,11 +89,8 @@
:class="currentLogIndex === item.index ? 'active' : ''"
@click="onClickError(item)"
>
<span class="line-no">
{{item.index}}
</span>
<span class="line-content">
{{item.data}}
{{item.msg}}
</span>
</li>
</ul>
@@ -108,7 +126,6 @@ export default {
return {
item: LogItem,
searchString: '',
isToBottom: false,
isScrolling: false,
isScrolling2nd: false,
errorRegex: this.$utils.log.errorRegex,
@@ -119,11 +136,14 @@ export default {
},
computed: {
...mapState('task', [
'taskForm'
'taskForm',
'taskLogTotal',
'logKeyword',
'isLogFetchLoading',
'errorLogData'
]),
...mapGetters('task', [
'logData',
'errorLogData'
'logData'
]),
currentLogIndex: {
get () {
@@ -133,6 +153,54 @@ export default {
this.$store.commit('task/SET_CURRENT_LOG_INDEX', value)
}
},
logKeyword: {
get () {
return this.$store.state.task.logKeyword
},
set (value) {
this.$store.commit('task/SET_LOG_KEYWORD', value)
}
},
taskLogPage: {
get () {
return this.$store.state.task.taskLogPage
},
set (value) {
this.$store.commit('task/SET_TASK_LOG_PAGE', value)
}
},
taskLogPageSize: {
get () {
return this.$store.state.task.taskLogPageSize
},
set (value) {
this.$store.commit('task/SET_TASK_LOG_PAGE_SIZE', value)
}
},
isLogAutoScroll: {
get () {
return this.$store.state.task.isLogAutoScroll
},
set (value) {
this.$store.commit('task/SET_IS_LOG_AUTO_SCROLL', value)
}
},
isLogAutoFetch: {
get () {
return this.$store.state.task.isLogAutoFetch
},
set (value) {
this.$store.commit('task/SET_IS_LOG_AUTO_FETCH', value)
}
},
isLogFetchLoading: {
get () {
return this.$store.state.task.isLogFetchLoading
},
set (value) {
this.$store.commit('task/SET_IS_LOG_FETCH_LOADING', value)
}
},
filteredLogData () {
return this.logData.filter(d => {
if (!this.searchString) return true
@@ -145,8 +213,26 @@ export default {
}
},
watch: {
searchString () {
this.$st.sendEv('任务详情', '日志', '搜索日志')
taskLogPage () {
this.$emit('search')
this.$st.sendEv('任务详情', '日志', '改变页数')
},
taskLogPageSize () {
this.$emit('search')
this.$st.sendEv('任务详情', '日志', '改变日志每页条数')
},
isLogAutoScroll () {
if (this.isLogAutoScroll) {
this.$store.dispatch('task/getTaskLog', {
id: this.$route.params.id,
keyword: this.logKeyword
}).then(() => {
this.toBottom()
})
this.$st.sendEv('任务详情', '日志', '点击自动滚动')
} else {
this.$st.sendEv('任务详情', '日志', '取消自动滚动')
}
}
},
methods: {
@@ -160,40 +246,18 @@ export default {
index: logItem.index,
logItem,
data: isAnsi ? convert.toHtml(logItem.data) : logItem.data,
searchString: this.searchString,
searchString: this.logKeyword,
active: logItem.active,
isAnsi
}
}
},
onToBottom () {
if (this.isScrolling) return
this.isToBottom = true
},
onScroll () {
if (this.isScrolling2nd) {
this.isToBottom = false
}
this.isScrolling = true
setTimeout(() => {
this.isScrolling2nd = true
setTimeout(() => {
this.isScrolling2nd = false
}, 50)
}, 50)
setTimeout(() => {
this.isScrolling = false
}, 100)
},
toBottom () {
this.$el.querySelector('.log-view').scrollTo({ top: 99999999 })
setTimeout(() => {
this.isToBottom = true
}, 50)
},
onAutoScroll () {
this.toBottom()
this.$st.sendEv('任务详情', '日志', '点击自动滚动')
},
toggleErrors () {
this.isErrorsCollapsed = !this.isErrorsCollapsed
@@ -202,21 +266,24 @@ export default {
this.isErrorCollapsing = false
}, 300)
},
onClickError (item) {
this.currentLogIndex = item.index
this.isToBottom = false
const handle = setInterval(() => {
this.isToBottom = false
}, 10)
setTimeout(() => {
clearInterval(handle)
}, 500)
async onClickError (item) {
const page = Math.ceil(item.seq / this.taskLogPageSize)
this.$store.commit('task/SET_LOG_KEYWORD', '')
this.$store.commit('task/SET_TASK_LOG_PAGE', page)
this.$store.commit('task/SET_IS_LOG_AUTO_SCROLL', false)
this.$store.commit('task/SET_ACTIVE_ERROR_LOG_ITEM', item)
this.$emit('search')
this.$st.sendEv('任务详情', '日志', '点击错误日志')
},
onSearchLog () {
this.$emit('search')
this.$st.sendEv('任务详情', '日志', '搜索日志')
}
},
mounted () {
this.currentLogIndex = 0
this.handle = setInterval(() => {
if (this.isToBottom) {
if (this.isLogAutoScroll) {
this.toBottom()
}
}, 200)
@@ -319,4 +386,13 @@ export default {
width: calc(100% - 70px);
padding-left: 10px;
}
.right {
display: flex;
align-items: center;
}
.right .el-pagination {
margin-right: 10px;
}
</style>

View File

@@ -5,20 +5,6 @@ You cannot add nodes directly on the web interface in Crawlab.
Adding a node is quite simple. The only thing you have to do is to run a Crawlab service on your target machine.
#### Docker Deployment
If you are running Crawlab using Docker, you can start a new \`worker\` container on the target machine, or add a \`worker\` service in the \`docker-compose.yml\`.
\`\`\`bash
docker run -d --restart always --name crawlab_worker \\
-e CRAWLAB_SERVER_MASTER=N \\
-e CRAWLAB_MONGO_HOST=xxx.xxx.xxx.xxx \\ # make sure you are connecting to the same MongoDB
-e CRAWLAB_REDIS_ADDRESS=xxx.xxx.xxx.xxx \\ # make sure you are connecting to the same Redis
tikazyq/crawlab:latest
\`\`\`
#### Direct Deploy
If you are deploying directly, the only thing you have to do is to run a backend service on the target machine, you can refer to [Direct Deploy](https://docs.crawlab.cn/Installation/Direct.html).
For more information, please refer to the [Official Documentation](https://docs.crawlab.cn).
For details, please refer to the [Multi-Node Deployment Documentation](https://docs.crawlab.cn/Installation/MultiNode.html).
`
}

View File

@@ -232,6 +232,7 @@ export default {
'Please enter de-duplicated field': '请输入去重字段',
'Overwrite': '覆盖',
'Ignore': '忽略',
'De-Duplication': '去重',
// 爬虫列表
'Name': '名称',
@@ -260,6 +261,7 @@ export default {
'Selected Nodes': '指定节点',
'Search Log': '搜索日志',
'Auto-Scroll': '自动滚动',
'Auto-Refresh': '自动刷新',
'Updating log...': '正在更新日志...',
'Error Count': '错误数',
'Log with errors': '日志错误',
@@ -485,6 +487,12 @@ export default {
'Allow Sending Statistics': '允许发送统计信息',
'General': '通用',
'Enable Tutorial': '启用教程',
'Error Regex Pattern': '异常正则表达式',
'By default: ': '默认: ',
'Max Error Logs Display': '最大异常日志展示',
'Log Errors': '日志错误',
'No Expire': '不过期',
'Log Expire Duration': '日志过期时间',
// 挑战
'Challenge': '挑战',
@@ -494,6 +502,22 @@ export default {
'Not Achieved': '未达成',
'Start Challenge': '开始挑战',
// 时间
'Second': '秒',
'Seconds': '秒',
'Minute': '分',
'Minutes': '分',
'Hour': '小时',
'Hours': '小时',
'Day': '天',
'Days': '天',
'Week': '周',
'Weeks': '周',
'Month': '月',
'Months': '月',
'Year': '年',
'Years': '年',
// 全局
'Related Documentation': '相关文档',
'Click to view related Documentation': '点击查看相关文档',
@@ -521,21 +545,7 @@ export default {
添加节点的方式非常简单,您只需要在目标机器上运行一个 Crawlab 服务就可以了。
#### Docker 部署
如果您是用 Docker 启动 Crawlab可以在目标机器上运行一个新的 \`worker\` 容器,或者在 \`docker-compose.yml\` 中添加 \`worker\` 服务。
\`\`\`bash
docker run -d --restart always --name crawlab_worker \\
-e CRAWLAB_SERVER_MASTER=N \\
-e CRAWLAB_MONGO_HOST=xxx.xxx.xxx.xxx \\ # 保证连接的是同一个 MongoDB
-e CRAWLAB_REDIS_ADDRESS=xxx.xxx.xxx.xxx \\ # 保证连接的是同一个 Redis
tikazyq/crawlab:latest
\`\`\`
#### 直接部署
如果您是用直接部署,只需要在目标机器上启动一个后端服务,请参考 [直接部署文档](https://docs.crawlab.cn/Installation/Direct.html)。
更多信息,请参考 [官方文档](https://docs.crawlab.cn)。
具体操作,请参照 [多节点部署文档](https://docs.crawlab.cn/Installation/MultiNode.html)。
`,
// 教程
@@ -624,6 +634,14 @@ docker run -d --restart always --name crawlab_worker \\
'Please enter your Wechat account': '请输入您的微信账号',
'Please enter your feedback content': '请输入您的反馈内容',
'No response from the server. Please make sure your server is running correctly. You can also refer to the documentation to solve this issue.': '服务器无响应请保证您的服务器正常运行您也可以参考文档来解决这个问题文档链接在下方',
'Are you sure to restart this task?': '确认重新运行该任务?',
'Are you sure to delete the project?': '确认删除该项目?',
'You have no projects created. You can create a project by clicking the "Add" button.': '您没有创建项目请点击 "添加项目" 按钮来创建一个新项目',
'Added API token successfully': '成功添加 API Token',
'Deleted API token successfully': '成功删除 API Token',
'Are you sure to add an API token?': '确认创建 API Token?',
'Are you sure to delete this API token?': '确认删除该 API Token?',
'Please enter Web Hook URL': '请输入 Web Hook URL',
// 其他
'Star crawlab-team/crawlab on GitHub': ' GitHub 上为 Crawlab 加星吧'

View File

@@ -285,7 +285,16 @@ const actions = {
async getScheduleList ({ state, commit }, payload) {
const { id } = payload
const res = await request.get(`/spiders/${id}/schedules`)
commit('schedule/SET_SCHEDULE_LIST', res.data.data, { root: true })
let data = res.data.data
if (data) {
data = data.map(d => {
const arr = d.cron.split(' ')
arr.splice(0, 1)
d.cron = arr.join(' ')
return d
})
}
commit('schedule/SET_SCHEDULE_LIST', data, { root: true })
},
async getFileTree ({ state, commit }, payload) {
const id = payload ? payload.id : state.spiderForm._id

View File

@@ -6,8 +6,6 @@ const state = {
taskList: [],
taskListTotalCount: 0,
taskForm: {},
taskLog: [],
currentLogIndex: 0,
taskResultsData: [],
taskResultsColumns: [],
taskResultsTotalCount: 0,
@@ -21,6 +19,18 @@ const state = {
// pagination
pageNum: 1,
pageSize: 10,
// log
currentLogIndex: 0,
logKeyword: '',
errorLogData: [],
isLogAutoScroll: false,
isLogAutoFetch: false,
isLogFetchLoading: false,
taskLog: [],
taskLogTotal: 0,
taskLogPage: 1,
taskLogPageSize: 5000,
activeErrorLogItem: {},
// results
resultsPageNum: 1,
resultsPageSize: 10
@@ -63,8 +73,11 @@ const getters = {
return data
},
errorLogData (state, getters) {
return getters.logData.filter(d => {
return d.data.match(utils.log.errorRegex)
const idxList = getters.logData.map(d => d._id)
return state.errorLogData.map(d => {
const idx = idxList.indexOf(d._id)
d.index = getters.logData[idx].index
return d
})
}
}
@@ -79,6 +92,9 @@ const mutations = {
SET_TASK_LOG (state, value) {
state.taskLog = value
},
SET_TASK_LOG_TOTAL (state, value) {
state.taskLogTotal = value
},
SET_CURRENT_LOG_INDEX (state, value) {
state.currentLogIndex = value
},
@@ -105,6 +121,30 @@ const mutations = {
},
SET_TASK_RESULTS_TOTAL_COUNT (state, value) {
state.taskResultsTotalCount = value
},
SET_LOG_KEYWORD (state, value) {
state.logKeyword = value
},
SET_ERROR_LOG_DATA (state, value) {
state.errorLogData = value
},
SET_TASK_LOG_PAGE (state, value) {
state.taskLogPage = value
},
SET_TASK_LOG_PAGE_SIZE (state, value) {
state.taskLogPageSize = value
},
SET_IS_LOG_AUTO_SCROLL (state, value) {
state.isLogAutoScroll = value
},
SET_IS_LOG_AUTO_FETCH (state, value) {
state.isLogAutoFetch = value
},
SET_IS_LOG_FETCH_LOADING (state, value) {
state.isLogFetchLoading = value
},
SET_ACTIVE_ERROR_LOG_ITEM (state, value) {
state.activeErrorLogItem = value
}
}
@@ -149,10 +189,26 @@ const actions = {
dispatch('getTaskList')
})
},
getTaskLog ({ state, commit }, id) {
return request.get(`/tasks/${id}/log`)
getTaskLog ({ state, commit }, { id, keyword }) {
return request.get(`/tasks/${id}/log`, {
keyword,
page_num: state.taskLogPage,
page_size: state.taskLogPageSize
})
.then(response => {
commit('SET_TASK_LOG', response.data.data || [])
commit('SET_TASK_LOG_TOTAL', response.data.total || 0)
// auto switch to next page if not reaching the end
if (state.isLogAutoScroll && state.taskLogTotal > (state.taskLogPage * state.taskLogPageSize)) {
commit('SET_TASK_LOG_PAGE', Math.ceil(state.taskLogTotal / state.taskLogPageSize))
}
})
},
getTaskErrorLog ({ state, commit }, id) {
return request.get(`/tasks/${id}/error-log`, {})
.then(response => {
commit('SET_ERROR_LOG_DATA', response.data.data || [])
})
},
getTaskResults ({ state, commit }, id) {

View File

@@ -87,6 +87,10 @@ const user = {
getInfo ({ commit, state }) {
return request.get('/me')
.then(response => {
// ensure compatibility
if (!response.data.data.setting.max_error_log) {
response.data.data.setting.max_error_log = 1000
}
commit('SET_USER_INFO', response.data.data)
window.localStorage.setItem('user_info', JSON.stringify(response.data.data))
})

View File

@@ -1,5 +1,9 @@
const regexToken = ' :,.'
export default {
errorRegex: new RegExp(`(?:[${regexToken}]|^)((?:error|exception|traceback)s?)(?:[${regexToken}]|$)`, 'gi')
// errorRegex: new RegExp(`(?:[${regexToken}]|^)((?:error|exception|traceback)s?)(?:[${regexToken}]|$)`, 'gi')
errorRegex: new RegExp(`(?:[${regexToken}]|^)((?:error|exception|traceback)s?)(?:[${regexToken}]|$)`, 'gi'),
errorWhitelist: [
'log_count/ERROR'
]
}

View File

@@ -93,6 +93,6 @@ export default {
.sidebar-container .sidebar-logo .version {
margin-left: 5px;
font-weight: normal;
font-size: 12px;
font-size: 11px;
}
</style>

View File

@@ -77,7 +77,7 @@
<ul v-else class="list">
<li
class="item"
v-for="(item, index) in projectList"
v-for="item in projectList.filter(d => d._id !== '000000000000000000000000')"
:key="item._id"
@click="onView(item)"
>
@@ -87,8 +87,7 @@
<i v-if="!isNoProject(item)" class="btn-edit fa fa-edit" @click="onEdit(item)"></i>
<i v-if="!isNoProject(item)" class="btn-close fa fa-trash-o" @click="onRemove(item)"></i>
<el-row>
<h4 v-if="index !== projectList.length - 1" class="title">{{ item.name }}</h4>
<h4 v-else class="title">{{ $t('No Project') }}</h4>
<h4 class="title">{{ item.name }}</h4>
</el-row>
<el-row>
<div style="display: flex; justify-content: space-between">

View File

@@ -184,6 +184,15 @@
</el-dialog>
<!--./cron generation popup-->
<!--crawl confirm dialog-->
<crawl-confirm-dialog
:visible="crawlConfirmDialogVisible"
:spider-id="scheduleForm.spider_id"
@close="() => crawlConfirmDialogVisible = false"
@confirm="() => crawlConfirmDialogVisible = false"
/>
<!--./crawl confirm dialog-->
<el-card style="border-radius: 0" class="schedule-list">
<!--filter-->
<div class="filter">
@@ -257,25 +266,31 @@
</template>
</el-table-column>
</template>
<el-table-column :label="$t('Action')" class="actions" align="left" width="130" fixed="right">
<el-table-column :label="$t('Action')" class="actions" align="left" width="170" fixed="right">
<template slot-scope="scope">
<!--编辑-->
<!--edit-->
<el-tooltip :content="$t('Edit')" placement="top">
<el-button type="warning" icon="el-icon-edit" size="mini" @click="onEdit(scope.row)"></el-button>
</el-tooltip>
<!--./编辑-->
<!--./edit-->
<!--删除-->
<!--delete-->
<el-tooltip :content="$t('Remove')" placement="top">
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
</el-tooltip>
<!--./删除-->
<!--./delete-->
<!--查看任务-->
<!--view tasks-->
<el-tooltip :content="$t('View Tasks')" placement="top">
<el-button type="primary" icon="el-icon-search" size="mini" @click="onViewTasks(scope.row)"></el-button>
</el-tooltip>
<!--./查看任务-->
<!--./view tasks-->
<!--run-->
<el-tooltip :content="$t('Run')" placement="top">
<el-button type="success" icon="fa fa-bug" size="mini" @click="onRun(scope.row)"></el-button>
</el-tooltip>
<!--./run-->
</template>
</el-table-column>
</el-table>
@@ -292,10 +307,12 @@ import {
} from 'vuex'
import ParametersDialog from '../../components/Common/ParametersDialog'
import ScheduleTaskList from '../../components/Schedule/ScheduleTaskList'
import CrawlConfirmDialog from '../../components/Common/CrawlConfirmDialog'
export default {
name: 'ScheduleList',
components: {
CrawlConfirmDialog,
ScheduleTaskList,
VueCronLinux,
ParametersDialog
@@ -326,6 +343,7 @@ export default {
isLoading: false,
isParametersVisible: false,
isViewTasksDialogVisible: false,
crawlConfirmDialogVisible: false,
// tutorial
tourSteps: [
@@ -622,6 +640,11 @@ export default {
this.$refs['schedule-task-list'].update()
}, 100)
this.$st.sendEv('定时任务', '查看任务列表')
},
async onRun (row) {
this.crawlConfirmDialogVisible = true
this.$store.commit('schedule/SET_SCHEDULE_FORM', row)
this.$st.sendEv('定时任务', '点击运行任务')
}
},
created () {

View File

@@ -33,8 +33,10 @@
<!--./新增全局变量-->
<el-tabs v-model="activeName" @tab-click="tabActiveHandle" type="border-card">
<!--通用-->
<el-tab-pane :label="$t('General')" name="general">
<el-form :model="userInfo" class="setting-form" ref="setting-form" label-width="200px" :rules="rules"
<el-form :model="userInfo" class="setting-form" ref="setting-form" label-width="200px"
:rules="rulesNotification"
inline-message>
<el-form-item prop="username" :label="$t('Username')">
<el-input v-model="userInfo.username" disabled></el-input>
@@ -67,8 +69,12 @@
</el-form-item>
</el-form>
</el-tab-pane>
<!--./通用-->
<!--消息通知-->
<el-tab-pane :label="$t('Notifications')" name="notify">
<el-form :model="userInfo" class="setting-form" ref="setting-form" label-width="200px" :rules="rules"
<el-form :model="userInfo" class="setting-form" ref="setting-form" label-width="200px"
:rules="rulesNotification"
inline-message>
<el-form-item :label="$t('Notification Trigger Timing')">
<el-radio-group v-model="userInfo.setting.notification_trigger">
@@ -110,6 +116,119 @@
</el-form-item>
</el-form>
</el-tab-pane>
<!--./消息通知-->
<!--日志-->
<el-tab-pane :label="$t('Log')" name="log">
<el-form :model="userInfo" class="setting-form" ref="log-form" label-width="200px" :rules="rulesLog"
inline-message>
<el-form-item :label="$t('Error Regex Pattern')" prop="setting.error_regex_pattern">
<el-input
v-model="userInfo.setting.error_regex_pattern"
:placeholder="$t('By default: ') + $utils.log.errorRegex.source"
clearable
/>
</el-form-item>
<el-form-item :label="$t('Max Error Logs Display')" prop="setting.max_error_log">
<el-select
v-model="userInfo.setting.max_error_log"
clearable
>
<el-option :value="100" label="100"/>
<el-option :value="500" label="500"/>
<el-option :value="1000" label="1000"/>
<el-option :value="5000" label="5000"/>
<el-option :value="10000" label="10000"/>
</el-select>
</el-form-item>
<el-form-item :label="$t('Log Expire Duration')" prop="setting.log_expire_duration">
<el-select
v-model="userInfo.setting.log_expire_duration"
clearable
>
<el-option :value="0" :label="$t('No Expire')"/>
<el-option :value="3600" :label="'1 ' + $t('Hour')"/>
<el-option :value="3600 * 6" :label="'6 ' + $t('Hours')"/>
<el-option :value="3600 * 12" :label="'12 ' + $t('Hours')"/>
<el-option :value="3600 * 24" :label="'1 ' + $t('Day')"/>
<el-option :value="3600 * 24 * 7" :label="'7 ' + $t('Days')"/>
<el-option :value="3600 * 24 * 14" :label="'14 ' + $t('Days')"/>
<el-option :value="3600 * 24 * 30" :label="'30 ' + $t('Days')"/>
<el-option :value="3600 * 24 * 30 * 3" :label="'90 ' + $t('Days')"/>
<el-option :value="3600 * 24 * 30 * 6" :label="'180 ' + $t('Days')"/>
</el-select>
</el-form-item>
<el-form-item>
<div style="text-align: right">
<el-button type="success" size="small" @click="saveUserInfo">
{{$t('Save')}}
</el-button>
</div>
</el-form-item>
</el-form>
</el-tab-pane>
<!--./日志-->
<!--API Token-->
<el-tab-pane label="API Token" name="api-token">
<input id="clipboard">
<el-alert
type="primary"
>
</el-alert>
<div class="actions">
<el-button
size="small"
type="primary"
@click="onAddApiToken"
>
{{$t('Add')}}
</el-button>
</div>
<el-table
:data="apiTokens"
border
>
<el-table-column
label="Token"
>
<template slot-scope="scope">
{{scope.row.visible ? scope.row.token : getMaskValue(scope.row.token)}}
</template>
</el-table-column>
<el-table-column
:label="$t('Action')"
width="200px"
>
<template slot-scope="scope">
<el-button
type="warning"
size="mini"
icon="el-icon-view"
circle
@click="toggleTokenVisible(scope.row)"
/>
<el-button
type="primary"
size="mini"
icon="el-icon-document-copy"
@click="copyToken(scope.row.token)"
circle
/>
<el-button
type="danger"
size="mini"
icon="el-icon-delete"
@click="onDeleteToken(scope.row)"
circle
/>
</template>
</el-table-column>
</el-table>
</el-tab-pane>
<!--./API Token-->
<!--全局变量-->
<el-tab-pane :label="$t('Global Variable')" name="global-variable">
<div style="text-align: right;margin-bottom: 10px">
<el-button size="small" @click="addGlobalVariableHandle(true)"
@@ -131,6 +250,7 @@
</el-table-column>
</el-table>
</el-tab-pane>
<!--./全局变量-->
</el-tabs>
</div>
</template>
@@ -175,12 +295,13 @@ export default {
}
return {
userInfo: { setting: { enabled_notifications: [] } },
rules: {
rulesNotification: {
password: [{ trigger: 'blur', validator: validatePass }],
email: [{ trigger: 'blur', validator: validateEmail }],
'setting.ding_talk_robot_webhook': [{ trigger: 'blur', validator: validateDingTalkRobotWebhook }],
'setting.wechat_robot_webhook': [{ trigger: 'blur', validator: validateWechatRobotWebhook }]
},
rulesLog: {},
isShowDingTalkAppSecret: false,
activeName: 'general',
addDialogVisible: false,
@@ -223,7 +344,8 @@ export default {
}
},
isAllowSendingStatistics: localStorage.getItem('useStats') === '1',
isEnableTutorial: localStorage.getItem('enableTutorial') === '1'
isEnableTutorial: localStorage.getItem('enableTutorial') === '1',
apiTokens: []
}
},
computed: {
@@ -233,8 +355,9 @@ export default {
])
},
watch: {
userInfoStr () {
this.saveUserInfo()
async userInfoStr () {
await this.saveUserInfo()
await this.$store.dispatch('user/getInfo')
}
},
methods: {
@@ -305,12 +428,63 @@ export default {
onEnableTutorialChange (value) {
this.$message.success(this.$t('Saved successfully'))
localStorage.setItem('enableTutorial', value ? '1' : '0')
},
onAddApiToken () {
this.$confirm(this.$t('Are you sure to add an API token?'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(async () => {
const res = await this.$request.put('/tokens')
if (!res.data.error) {
this.$message.success(this.$t('Added API token successfully'))
await this.getApiTokens()
}
})
},
onDeleteToken (row) {
this.$confirm(this.$t('Are you sure to delete this API token?'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(async () => {
const res = await this.$request.delete(`/tokens/${row._id}`)
if (!res.data.error) {
this.$message.success(this.$t('Deleted API token successfully'))
await this.getApiTokens()
}
})
},
async addApiToken () {
await this.$request.put('/tokens')
},
async getApiTokens () {
const res = await this.$request.get('/tokens')
this.apiTokens = res.data.data
},
toggleTokenVisible (row) {
this.$set(row, 'visible', !row.visible)
},
getMaskValue (str) {
let s = ''
for (let i = 0; i < str.length; i++) {
s += '*'
}
return s
},
copyToken (str) {
const input = document.getElementById('clipboard')
input.value = str
input.select()
document.execCommand('copy')
this.$message.success(this.$t('Token copied'))
}
},
async created () {
await this.$store.dispatch('user/getInfo')
await this.$store.dispatch('user/getGlobalVariable')
this.getUserInfo()
await this.getApiTokens()
},
mounted () {
if (!this.$utils.tour.isFinishedTour('setting')) {
@@ -339,4 +513,16 @@ export default {
.setting-form >>> .el-form-item__label {
height: 40px;
}
.actions {
margin-bottom: 10px;
text-align: right;
}
#clipboard {
position: fixed;
z-index: -99999;
top: 9999px;
right: 9999px;
}
</style>

View File

@@ -52,8 +52,12 @@
:disabled="spiderForm.is_scrapy"
/>
</el-form-item>
<el-form-item :label="$t('Results')" prop="col" required>
<el-input id="col" v-model="spiderForm.col" :placeholder="$t('Results')"/>
<el-form-item :label="$t('Results')" prop="col">
<el-input
id="col"
v-model="spiderForm.col"
:placeholder="$t('By default: ') + 'results_<spider_name>'"
/>
</el-form-item>
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
<el-upload
@@ -104,17 +108,17 @@
>
<p>{{$t('You can click "Add" to create an empty spider and upload files later.')}}</p>
<p>{{$t('OR, you can also click "Upload" and upload a zip file containing your spider project.')}}</p>
<p style="font-weight: bolder">
<p>
<i class="fa fa-exclamation-triangle"></i> {{$t('NOTE: When uploading a zip file, please zip your' +
' spider files from the ROOT DIRECTORY.')}}
</p>
<p>
<p style="font-weight: bolder">
<template v-if="lang === 'en'">
You can also upload spiders using <a href="https://docs.crawlab.cn/SDK/CLI.html" target="_blank"
Recommend uploading spiders using <a href="https://docs.crawlab.cn/SDK/CLI.html" target="_blank"
style="color: #409eff;font-weight: bolder">CLI Tool</a>.
</template>
<template v-else-if="lang === 'zh'">
您也可以利 <a href="https://docs.crawlab.cn/SDK/CLI.html" target="_blank"
推荐使 <a href="https://docs.crawlab.cn/SDK/CLI.html" target="_blank"
style="color: #409eff;font-weight: bolder">CLI 工具</a> 上传爬虫
</template>
</p>

View File

@@ -15,7 +15,7 @@
<task-overview @click-log="activeTabName = 'log'"/>
</el-tab-pane>
<el-tab-pane :label="$t('Log')" name="log">
<log-view/>
<log-view @search="getTaskLog(true)"/>
</el-tab-pane>
<el-tab-pane :label="$t('Results')" name="results">
<div class="button-group">
@@ -136,10 +136,15 @@ export default {
'taskForm',
'taskResultsData',
'taskResultsTotalCount',
'taskLog'
'taskLog',
'logKeyword',
'isLogAutoFetch',
'currentLogIndex',
'activeErrorLogItem'
]),
...mapGetters('task', [
'taskResultsColumns'
'taskResultsColumns',
'logData'
]),
...mapState('file', [
'currentPath'
@@ -163,6 +168,45 @@ export default {
this.$store.commit('task/SET_RESULTS_PAGE_SIZE', value)
}
},
isLogAutoScroll: {
get () {
return this.$store.state.task.isLogAutoScroll
},
set (value) {
this.$store.commit('task/SET_IS_LOG_AUTO_SCROLL', value)
}
},
isLogAutoFetch: {
get () {
return this.$store.state.task.isLogAutoFetch
},
set (value) {
this.$store.commit('task/SET_IS_LOG_AUTO_FETCH', value)
}
},
isLogFetchLoading: {
get () {
return this.$store.state.task.isLogFetchLoading
},
set (value) {
this.$store.commit('task/SET_IS_LOG_FETCH_LOADING', value)
}
},
currentLogIndex: {
get () {
return this.$store.state.task.currentLogIndex
},
set (value) {
this.$store.commit('task/SET_CURRENT_LOG_INDEX', value)
}
},
logIndexMap () {
const map = new Map()
this.logData.forEach((d, index) => {
map.set(d._id, index)
})
return map
},
isRunning () {
return ['pending', 'running'].includes(this.taskForm.status)
}
@@ -184,20 +228,31 @@ export default {
this.$store.dispatch('task/getTaskResultExcel', this.$route.params.id)
this.$st.sendEv('任务详情', '结果', '下载CSV')
},
getTaskLog () {
this.$store.dispatch('task/getTaskLog', this.$route.params.id)
async getTaskLog (showLoading) {
if (showLoading) {
this.isLogFetchLoading = true
}
await this.$store.dispatch('task/getTaskLog', { id: this.$route.params.id, keyword: this.logKeyword })
this.currentLogIndex = (this.logIndexMap.get(this.activeErrorLogItem.log_id) + 1) || 0
this.isLogFetchLoading = false
await this.$store.dispatch('task/getTaskErrorLog', this.$route.params.id)
}
},
created () {
this.$store.dispatch('task/getTaskData', this.$route.params.id)
this.$store.dispatch('task/getTaskResults', this.$route.params.id)
async created () {
await this.$store.dispatch('task/getTaskData', this.$route.params.id)
this.isLogAutoFetch = !!this.isRunning
this.isLogAutoScroll = !!this.isRunning
await this.$store.dispatch('task/getTaskResults', this.$route.params.id)
this.getTaskLog()
this.handle = setInterval(() => {
if (!this.isRunning) return
this.$store.dispatch('task/getTaskData', this.$route.params.id)
this.$store.dispatch('task/getTaskResults', this.$route.params.id)
this.getTaskLog()
if (this.isLogAutoFetch) {
this.$store.dispatch('task/getTaskData', this.$route.params.id)
this.$store.dispatch('task/getTaskResults', this.$route.params.id)
this.getTaskLog()
}
}, 5000)
},
mounted () {

View File

@@ -21,7 +21,8 @@
</el-select>
</el-form-item>
<el-form-item prop="spider_id" :label="$t('Spider')">
<el-select v-model="filter.spider_id" size="small" :placeholder="$t('Spider')" @change="onFilterChange" :disabled="isFilterSpiderDisabled">
<el-select v-model="filter.spider_id" size="small" :placeholder="$t('Spider')" @change="onFilterChange"
:disabled="isFilterSpiderDisabled">
<el-option value="" :label="$t('All')"/>
<el-option v-for="spider in spiderList" :key="spider._id" :value="spider._id" :label="spider.name"/>
</el-select>
@@ -132,6 +133,19 @@
:width="col.width">
<template slot-scope="scope">
<status-tag :status="scope.row[col.name]"/>
<template
v-if="scope.row.error_log_count > 0"
>
<el-tooltip :content="$t('Log Errors') + ': ' + scope.row.error_log_count" placement="top">
<el-tag
type="danger"
style="margin-left: 10px"
>
<i class="el-icon-warning"></i>
<i class="el-icon-tickets"></i>
</el-tag>
</el-tooltip>
</template>
</template>
</el-table-column>
<el-table-column v-else
@@ -201,7 +215,7 @@ export default {
columns: [
{ name: 'node_name', label: 'Node', width: '120' },
{ name: 'spider_name', label: 'Spider', width: '120' },
{ name: 'status', label: 'Status', width: '120' },
{ name: 'status', label: 'Status', width: '180' },
{ name: 'param', label: 'Parameters', width: '120' },
// { name: 'create_ts', label: 'Create Time', width: '100' },
{ name: 'start_ts', label: 'Start Time', width: '100' },
@@ -499,4 +513,8 @@ export default {
.el-table >>> tr {
cursor: pointer;
}
.el-table >>> .el-badge .el-badge__content {
font-size: 7px;
}
</style>