diff --git a/CHANGELOG.md b/CHANGELOG.md index b4204f16..1cd54497 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +# 0.3.1 (2019-08-25) +### Features / Enhancement +- **Docker Image Optimization**. Split docker further into master, worker, frontend with alpine image. +- **Unit Tests**. Covered part of the backend code with unit tests. +- **Frontend Optimization**. Login page, button size, hints of upload UI optimization. +- **More Flexible Node Registration**. Allow users to pass a variable as key for node registration instead of MAC by default. + +### Bug Fixes +- **Uploading Large Spider Files Error**. Memory crash issue when uploading large spider files. [#150](https://github.com/crawlab-team/crawlab/issues/150) +- **Unable to Sync Spiders**. Fixes through increasing level of write permission when synchronizing spider files. [#114](https://github.com/crawlab-team/crawlab/issues/114) +- **Spider Page Issue**. Fixes through removing the field "Site". [#112](https://github.com/crawlab-team/crawlab/issues/112) +- **Node Display Issue**. Nodes do not display correctly when running docker containers on multiple machines. [#99](https://github.com/crawlab-team/crawlab/issues/99) + # 0.3.0 (2019-07-31) ### Features / Enhancement - **Golang Backend**: Refactored code from Python backend to Golang, much more stability and performance. diff --git a/Dockerfile b/Dockerfile index 893cf6fe..0809a0ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,8 @@ ADD ./frontend /app WORKDIR /app # install frontend -RUN npm install -g yarn && yarn install +RUN npm config set unsafe-perm true +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org RUN npm run build:prod @@ -56,4 +57,4 @@ EXPOSE 8080 EXPOSE 8000 # start backend -CMD ["/bin/sh", "/app/docker_init.sh"] \ No newline at end of file +CMD ["/bin/sh", "/app/docker_init.sh"] diff --git a/Jenkinsfile b/Jenkinsfile index 1188848d..b7441bea 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,8 +48,11 @@ pipeline { sh """ # 重启docker compose cd ./jenkins/${ENV:GIT_BRANCH} - docker-compose stop | true - docker-compose up -d + docker-compose stop master | true + docker-compose rm -f master | true + docker-compose stop worker | true + docker-compose rm -f worker | true + docker-compose up -d | true """ } } @@ -57,7 +60,7 @@ pipeline { steps { echo 'Cleanup...' sh """ - docker image prune -f + docker rmi -f `docker images | grep '' | grep -v IMAGE | awk '{ print \$3 }' | xargs` """ } } diff --git a/README-zh.md b/README-zh.md index 24fef25e..b4e2b469 100644 --- a/README-zh.md +++ b/README-zh.md @@ -1,14 +1,14 @@ # Crawlab ![](http://114.67.75.98:8082/buildStatus/icon?job=crawlab%2Fmaster) -![](https://img.shields.io/github/release/tikazyq/crawlab.svg) -![](https://img.shields.io/github/last-commit/tikazyq/crawlab.svg) -![](https://img.shields.io/github/issues/tikazyq/crawlab.svg) -![](https://img.shields.io/github/contributors/tikazyq/crawlab.svg) +![](https://img.shields.io/github/release/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/last-commit/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/issues/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/contributors/crawlab-team/crawlab.svg) ![](https://img.shields.io/docker/pulls/tikazyq/crawlab) -![](https://img.shields.io/github/license/tikazyq/crawlab.svg) +![](https://img.shields.io/github/license/crawlab-team/crawlab.svg) -中文 | [English](https://github.com/tikazyq/crawlab) +中文 | [English](https://github.com/crawlab-team/crawlab) [安装](#安装) | [运行](#运行) | [截图](#截图) | [架构](#架构) | [集成](#与其他框架的集成) | [比较](#与其他框架比较) | [相关文章](#相关文章) | [社区&赞助](#社区--赞助) @@ -21,6 +21,7 @@ 三种方式: 1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)(推荐) 2. [直接部署](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)(了解内核) +3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA) ### 要求(Docker) - Docker 18.03+ @@ -202,7 +203,7 @@ Crawlab使用起来很方便,也很通用,可以适用于几乎任何主流 |框架 | 类型 | 分布式 | 前端 | 依赖于Scrapyd | |:---:|:---:|:---:|:---:|:---:| -| [Crawlab](https://github.com/tikazyq/crawlab) | 管理平台 | Y | Y | N +| [Crawlab](https://github.com/crawlab-team/crawlab) | 管理平台 | Y | Y | N | [ScrapydWeb](https://github.com/my8100/scrapydweb) | 管理平台 | Y | Y | Y | [SpiderKeeper](https://github.com/DormyMo/SpiderKeeper) | 管理平台 | Y | Y | Y | [Gerapy](https://github.com/Gerapy/Gerapy) | 管理平台 | Y | Y | Y diff --git a/README.md b/README.md index ca0df99a..91a30b34 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ # Crawlab ![](http://114.67.75.98:8082/buildStatus/icon?job=crawlab%2Fmaster) -![](https://img.shields.io/github/release/tikazyq/crawlab.svg) -![](https://img.shields.io/github/last-commit/tikazyq/crawlab.svg) -![](https://img.shields.io/github/issues/tikazyq/crawlab.svg) -![](https://img.shields.io/github/contributors/tikazyq/crawlab.svg) +![](https://img.shields.io/github/release/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/last-commit/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/issues/crawlab-team/crawlab.svg) +![](https://img.shields.io/github/contributors/crawlab-team/crawlab.svg) ![](https://img.shields.io/docker/pulls/tikazyq/crawlab) -![](https://img.shields.io/github/license/tikazyq/crawlab.svg) +![](https://img.shields.io/github/license/crawlab-team/crawlab.svg) -[中文](https://github.com/tikazyq/crawlab/blob/master/README-zh.md) | English +[中文](https://github.com/crawlab-team/crawlab/blob/master/README-zh.md) | English [Installation](#installation) | [Run](#run) | [Screenshot](#screenshot) | [Architecture](#architecture) | [Integration](#integration-with-other-frameworks) | [Compare](#comparison-with-other-frameworks) | [Community & Sponsorship](#community--sponsorship) @@ -21,6 +21,7 @@ Golang-based distributed web crawler management platform, supporting various lan Two methods: 1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html) (Recommended) 2. [Direct Deploy](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html) (Check Internal Kernel) +3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA) ### Pre-requisite (Docker) - Docker 18.03+ @@ -199,7 +200,7 @@ Crawlab is easy to use, general enough to adapt spiders in any language and any |Framework | Type | Distributed | Frontend | Scrapyd-Dependent | |:---:|:---:|:---:|:---:|:---:| -| [Crawlab](https://github.com/tikazyq/crawlab) | Admin Platform | Y | Y | N +| [Crawlab](https://github.com/crawlab-team/crawlab) | Admin Platform | Y | Y | N | [ScrapydWeb](https://github.com/my8100/scrapydweb) | Admin Platform | Y | Y | Y | [SpiderKeeper](https://github.com/DormyMo/SpiderKeeper) | Admin Platform | Y | Y | Y | [Gerapy](https://github.com/Gerapy/Gerapy) | Admin Platform | Y | Y | Y diff --git a/backend/conf/config.yml b/backend/conf/config.yml index f1042ca6..3805762a 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -15,6 +15,8 @@ redis: log: level: info path: "/var/logs/crawlab" + isDeletePeriodically: "Y" + deleteFrequency: "@hourly" server: host: 0.0.0.0 port: 8000 diff --git a/backend/config/config_test.go b/backend/config/config_test.go index ee966877..0068e6ad 100644 --- a/backend/config/config_test.go +++ b/backend/config/config_test.go @@ -7,7 +7,7 @@ import ( func TestInitConfig(t *testing.T) { Convey("Test InitConfig func", t, func() { - x := InitConfig("") + x := InitConfig("../conf/config.yml") Convey("The value should be nil", func() { So(x, ShouldEqual, nil) diff --git a/backend/constants/channels.go b/backend/constants/channels.go new file mode 100644 index 00000000..c38a5ac9 --- /dev/null +++ b/backend/constants/channels.go @@ -0,0 +1,9 @@ +package constants + +const ( + ChannelAllNode = "nodes:public" + + ChannelWorkerNode = "nodes:" + + ChannelMasterNode = "nodes:master" +) diff --git a/backend/constants/context.go b/backend/constants/context.go new file mode 100644 index 00000000..0759b54b --- /dev/null +++ b/backend/constants/context.go @@ -0,0 +1,5 @@ +package constants + +const ( + ContextUser = "currentUser" +) diff --git a/backend/constants/errors.go b/backend/constants/errors.go new file mode 100644 index 00000000..a273cb75 --- /dev/null +++ b/backend/constants/errors.go @@ -0,0 +1,13 @@ +package constants + +import ( + "crawlab/errors" + "net/http" +) + +var ( + ErrorMongoError = errors.NewSystemOPError(1001, "system error:[mongo]%s", http.StatusInternalServerError) + //users + ErrorUserNotFound = errors.NewBusinessError(10001, "user not found.", http.StatusUnauthorized) + ErrorUsernameOrPasswordInvalid = errors.NewBusinessError(11001, "username or password invalid", http.StatusUnauthorized) +) diff --git a/backend/constants/message.go b/backend/constants/message.go index 521a2019..72e5fab2 100644 --- a/backend/constants/message.go +++ b/backend/constants/message.go @@ -4,4 +4,6 @@ const ( MsgTypeGetLog = "get-log" MsgTypeGetSystemInfo = "get-sys-info" MsgTypeCancelTask = "cancel-task" + MsgTypeRemoveLog = "remove-log" + MsgTypeRemoveSpider = "remove-spider" ) diff --git a/backend/constants/task.go b/backend/constants/task.go index 5eeee967..b6fb615c 100644 --- a/backend/constants/task.go +++ b/backend/constants/task.go @@ -1,11 +1,18 @@ package constants const ( - StatusPending string = "pending" - StatusRunning string = "running" - StatusFinished string = "finished" - StatusError string = "error" + // 调度中 + StatusPending string = "pending" + // 运行中 + StatusRunning string = "running" + // 已完成 + StatusFinished string = "finished" + // 错误 + StatusError string = "error" + // 取消 StatusCancelled string = "cancelled" + // 节点重启导致的异常终止 + StatusAbnormal string = "abnormal" ) const ( diff --git a/backend/database/mongo.go b/backend/database/mongo.go index 6b155791..e72baeaa 100644 --- a/backend/database/mongo.go +++ b/backend/database/mongo.go @@ -3,6 +3,8 @@ package database import ( "github.com/globalsign/mgo" "github.com/spf13/viper" + "net" + "time" ) var Session *mgo.Session @@ -38,13 +40,28 @@ func InitMongo() error { var mongoAuth = viper.GetString("mongo.authSource") if Session == nil { - var uri string - if mongoUsername == "" { - uri = "mongodb://" + mongoHost + ":" + mongoPort + "/" + mongoDb - } else { - uri = "mongodb://" + mongoUsername + ":" + mongoPassword + "@" + mongoHost + ":" + mongoPort + "/" + mongoDb + "?authSource=" + mongoAuth + var dialInfo mgo.DialInfo + addr := net.JoinHostPort(mongoHost, mongoPort) + timeout := time.Second * 10 + dialInfo = mgo.DialInfo{ + Addrs: []string{addr}, + Timeout: timeout, + Database: mongoDb, + PoolLimit: 100, + PoolTimeout: timeout, + ReadTimeout: timeout, + WriteTimeout: timeout, + AppName: "crawlab", + FailFast: true, + MinPoolSize: 10, + MaxIdleTimeMS: 1000 * 30, } - sess, err := mgo.Dial(uri) + if mongoUsername != "" { + dialInfo.Username = mongoUsername + dialInfo.Password = mongoPassword + dialInfo.Source = mongoAuth + } + sess, err := mgo.DialWithInfo(&dialInfo) if err != nil { return err } diff --git a/backend/database/pubsub.go b/backend/database/pubsub.go index 4570e7b4..0eb8639b 100644 --- a/backend/database/pubsub.go +++ b/backend/database/pubsub.go @@ -1,72 +1,96 @@ package database import ( + "context" "fmt" "github.com/apex/log" "github.com/gomodule/redigo/redis" - "unsafe" + errors2 "github.com/pkg/errors" + "time" ) -type SubscribeCallback func(channel, message string) +type ConsumeFunc func(message redis.Message) error -type Subscriber struct { - client redis.PubSubConn - cbMap map[string]SubscribeCallback -} - -func (c *Subscriber) Connect() { - conn, err := GetRedisConn() - if err != nil { - log.Fatalf("redis dial failed.") - } - - c.client = redis.PubSubConn{Conn: conn} - c.cbMap = make(map[string]SubscribeCallback) - - go func() { - for { - log.Debug("wait...") - switch res := c.client.Receive().(type) { - case redis.Message: - channel := (*string)(unsafe.Pointer(&res.Channel)) - message := (*string)(unsafe.Pointer(&res.Data)) - c.cbMap[*channel](*channel, *message) - case redis.Subscription: - fmt.Printf("%s: %s %d\n", res.Channel, res.Kind, res.Count) - case error: - log.Error("error handle...") - continue - } - } - }() - -} - -func (c *Subscriber) Close() { - err := c.client.Close() +func (r *Redis) Close() { + err := r.pool.Close() if err != nil { log.Errorf("redis close error.") } } +func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error { + psc := redis.PubSubConn{Conn: r.pool.Get()} + if err := psc.Subscribe(redis.Args{}.AddFlat(channel)...); err != nil { + return err + } + done := make(chan error, 1) + tick := time.NewTicker(time.Second * 3) + defer tick.Stop() + go func() { + defer func() { _ = psc.Close() }() + for { + switch msg := psc.Receive().(type) { + case error: + done <- fmt.Errorf("redis pubsub receive err: %v", msg) + return + case redis.Message: + if err := consume(msg); err != nil { + fmt.Printf("redis pubsub consume message err: %v", err) + continue + } + case redis.Subscription: + fmt.Println(msg) -func (c *Subscriber) Subscribe(channel interface{}, cb SubscribeCallback) { - err := c.client.Subscribe(channel) - if err != nil { - log.Fatalf("redis Subscribe error.") + if msg.Count == 0 { + // all channels are unsubscribed + return + } + } + + } + }() + // start a new goroutine to receive message + for { + select { + case <-ctx.Done(): + if err := psc.Unsubscribe(); err != nil { + fmt.Printf("redis pubsub unsubscribe err: %v \n", err) + } + done <- nil + case <-tick.C: + //fmt.Printf("ping message \n") + if err := psc.Ping(""); err != nil { + done <- err + } + case err := <-done: + close(done) + return err + } } - c.cbMap[channel.(string)] = cb } +func (r *Redis) Subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error { + index := 0 + go func() { + for { + err := r.subscribe(ctx, consume, channel...) + fmt.Println(err) -func Publish(channel string, msg string) error { - c, err := GetRedisConn() - if err != nil { - return err - } - - if _, err := c.Do("PUBLISH", channel, msg); err != nil { - return err - } - + if err == nil { + break + } + time.Sleep(5 * time.Second) + index += 1 + fmt.Printf("try reconnect %d times \n", index) + } + }() return nil } +func (r *Redis) Publish(channel, message string) (n int, err error) { + conn := r.pool.Get() + defer func() { _ = conn.Close() }() + n, err = redis.Int(conn.Do("PUBLISH", channel, message)) + if err != nil { + return 0, errors2.Wrapf(err, "redis publish %s %s", channel, message) + } + return +} diff --git a/backend/database/redis.go b/backend/database/redis.go index ffebf776..ede229a2 100644 --- a/backend/database/redis.go +++ b/backend/database/redis.go @@ -4,21 +4,20 @@ import ( "github.com/gomodule/redigo/redis" "github.com/spf13/viper" "runtime/debug" + "time" ) -var RedisClient = Redis{} - -type ConsumeFunc func(channel string, message []byte) error +var RedisClient *Redis type Redis struct { + pool *redis.Pool } +func NewRedisClient() *Redis { + return &Redis{pool: NewRedisPool()} +} func (r *Redis) RPush(collection string, value interface{}) error { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return err - } + c := r.pool.Get() defer c.Close() if _, err := c.Do("RPUSH", collection, value); err != nil { @@ -29,11 +28,7 @@ func (r *Redis) RPush(collection string, value interface{}) error { } func (r *Redis) LPop(collection string) (string, error) { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return "", err - } + c := r.pool.Get() defer c.Close() value, err2 := redis.String(c.Do("LPOP", collection)) @@ -44,11 +39,7 @@ func (r *Redis) LPop(collection string) (string, error) { } func (r *Redis) HSet(collection string, key string, value string) error { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return err - } + c := r.pool.Get() defer c.Close() if _, err := c.Do("HSET", collection, key, value); err != nil { @@ -59,11 +50,7 @@ func (r *Redis) HSet(collection string, key string, value string) error { } func (r *Redis) HGet(collection string, key string) (string, error) { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return "", err - } + c := r.pool.Get() defer c.Close() value, err2 := redis.String(c.Do("HGET", collection, key)) @@ -74,11 +61,7 @@ func (r *Redis) HGet(collection string, key string) (string, error) { } func (r *Redis) HDel(collection string, key string) error { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return err - } + c := r.pool.Get() defer c.Close() if _, err := c.Do("HDEL", collection, key); err != nil { @@ -88,11 +71,7 @@ func (r *Redis) HDel(collection string, key string) error { } func (r *Redis) HKeys(collection string) ([]string, error) { - c, err := GetRedisConn() - if err != nil { - debug.PrintStack() - return []string{}, err - } + c := r.pool.Get() defer c.Close() value, err2 := redis.Strings(c.Do("HKeys", collection)) @@ -102,7 +81,7 @@ func (r *Redis) HKeys(collection string) ([]string, error) { return value, nil } -func GetRedisConn() (redis.Conn, error) { +func NewRedisPool() *redis.Pool { var address = viper.GetString("redis.address") var port = viper.GetString("redis.port") var database = viper.GetString("redis.database") @@ -114,14 +93,30 @@ func GetRedisConn() (redis.Conn, error) { } else { url = "redis://x:" + password + "@" + address + ":" + port + "/" + database } - c, err := redis.DialURL(url) - if err != nil { - debug.PrintStack() - return c, err + return &redis.Pool{ + Dial: func() (conn redis.Conn, e error) { + return redis.DialURL(url, + redis.DialConnectTimeout(time.Second*10), + redis.DialReadTimeout(time.Second*10), + redis.DialWriteTimeout(time.Second*10), + ) + }, + TestOnBorrow: func(c redis.Conn, t time.Time) error { + if time.Since(t) < time.Minute { + return nil + } + _, err := c.Do("PING") + return err + }, + MaxIdle: 10, + MaxActive: 0, + IdleTimeout: 300 * time.Second, + Wait: false, + MaxConnLifetime: 0, } - return c, nil } func InitRedis() error { + RedisClient = NewRedisClient() return nil } diff --git a/backend/entity/common.go b/backend/entity/common.go new file mode 100644 index 00000000..332cc494 --- /dev/null +++ b/backend/entity/common.go @@ -0,0 +1,17 @@ +package entity + +import "strconv" + +type Page struct { + Skip int + Limit int + PageNum int + PageSize int +} + +func (p *Page)GetPage(pageNum string, pageSize string) { + p.PageNum, _ = strconv.Atoi(pageNum) + p.PageSize, _ = strconv.Atoi(pageSize) + p.Skip = p.PageSize * (p.PageNum - 1) + p.Limit = p.PageSize +} \ No newline at end of file diff --git a/backend/entity/node.go b/backend/entity/node.go new file mode 100644 index 00000000..cf52fafb --- /dev/null +++ b/backend/entity/node.go @@ -0,0 +1,25 @@ +package entity + +type NodeMessage struct { + // 通信类别 + Type string `json:"type"` + + // 任务相关 + TaskId string `json:"task_id"` // 任务ID + + // 节点相关 + NodeId string `json:"node_id"` // 节点ID + + // 日志相关 + LogPath string `json:"log_path"` // 日志路径 + Log string `json:"log"` // 日志 + + // 系统信息 + SysInfo SystemInfo `json:"sys_info"` + + // 爬虫相关 + SpiderId string `json:"spider_id"` //爬虫ID + + // 错误相关 + Error string `json:"error"` +} diff --git a/backend/entity/spider.go b/backend/entity/spider.go new file mode 100644 index 00000000..7f5e02b4 --- /dev/null +++ b/backend/entity/spider.go @@ -0,0 +1,6 @@ +package entity + +type SpiderType struct { + Type string `json:"type" bson:"_id"` + Count int `json:"count" bson:"count"` +} diff --git a/backend/entity/system.go b/backend/entity/system.go new file mode 100644 index 00000000..dff637b7 --- /dev/null +++ b/backend/entity/system.go @@ -0,0 +1,15 @@ +package entity + +type SystemInfo struct { + ARCH string `json:"arch"` + OS string `json:"os"` + Hostname string `json:"host_name"` + NumCpu int `json:"num_cpu"` + Executables []Executable `json:"executables"` +} + +type Executable struct { + Path string `json:"path"` + FileName string `json:"file_name"` + DisplayName string `json:"display_name"` +} diff --git a/backend/errors/errors.go b/backend/errors/errors.go new file mode 100644 index 00000000..f191cd3e --- /dev/null +++ b/backend/errors/errors.go @@ -0,0 +1,55 @@ +package errors + +import ( + "fmt" + "net/http" +) + +type Scope int + +const ( + ScopeSystem Scope = 1 + ScopeBusiness Scope = 2 +) + +type OPError struct { + HttpCode int + Message string + Code int + Scope Scope +} + +func (O OPError) Error() string { + var scope string + switch O.Scope { + case ScopeSystem: + scope = "system" + break + case ScopeBusiness: + scope = "business" + } + return fmt.Sprintf("%s error: [%d]%s.", scope, O.Code, O.Message) +} + +func NewSystemOPError(code int, message string, httpCodes ...int) *OPError { + httpCode := http.StatusOK + if len(httpCodes) > 0 { + httpCode = httpCodes[0] + } + return NewOpError(code, message, ScopeSystem, httpCode) +} +func NewOpError(code int, message string, scope Scope, httpCode int) *OPError { + return &OPError{ + Message: message, + Code: code, + Scope: scope, + HttpCode: httpCode, + } +} +func NewBusinessError(code int, message string, httpCodes ...int) *OPError { + httpCode := http.StatusOK + if len(httpCodes) > 0 { + httpCode = httpCodes[0] + } + return NewOpError(code, message, ScopeBusiness, httpCode) +} diff --git a/backend/go.mod b/backend/go.mod index 5a575910..428c2fd3 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -8,9 +8,13 @@ require ( github.com/fsnotify/fsnotify v1.4.7 github.com/gin-gonic/gin v1.4.0 github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 + github.com/go-playground/locales v0.12.1 // indirect + github.com/go-playground/universal-translator v0.16.0 // indirect github.com/gomodule/redigo v2.0.0+incompatible + github.com/leodido/go-urn v1.1.0 // indirect github.com/pkg/errors v0.8.1 github.com/satori/go.uuid v1.2.0 github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 github.com/spf13/viper v1.4.0 + gopkg.in/go-playground/validator.v9 v9.29.1 ) diff --git a/backend/go.sum b/backend/go.sum index 910e18be..55a56852 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -39,6 +39,10 @@ github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-playground/locales v0.12.1 h1:2FITxuFt/xuCNP1Acdhv62OzaCiviiE4kotfhkmOqEc= +github.com/go-playground/locales v0.12.1/go.mod h1:IUMDtCfWo/w/mtMfIE/IG2K+Ey3ygWanZIBtBW0W2TM= +github.com/go-playground/universal-translator v0.16.0 h1:X++omBR/4cE2MNg91AoC3rmGrCjJ8eAeUP/K/EKx4DM= +github.com/go-playground/universal-translator v0.16.0/go.mod h1:1AnU7NaIRDWWzGEKwgtJRd2xk99HeFyHw3yid4rvQIY= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= @@ -53,6 +57,7 @@ github.com/gomodule/redigo v2.0.0+incompatible/go.mod h1:B4C85qUVwatsJoIUNIfCRsp github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= @@ -66,6 +71,7 @@ github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22 github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= github.com/json-iterator/go v1.1.6 h1:MrUvLMLTMxbqFJ9kzlvat/rYZqZnW3u4wkLzWTaFwKs= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= @@ -77,6 +83,8 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/leodido/go-urn v1.1.0 h1:Sm1gr51B1kKyfD2BlRcLSiEkffoG96g6TPv6eRoEiB8= +github.com/leodido/go-urn v1.1.0/go.mod h1:+cyI34gQWZcE1eQU7NVgKkkzdXDQHr1dBMtdAPozLkw= github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY= github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= @@ -120,8 +128,10 @@ github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdh github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/assertions v1.0.0 h1:UVQPSSmc3qtTi+zPPkCXvZX9VvW/xT/NsRvKfwY81a8= github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= +github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 h1:WN9BUFbdyOsSH/XohnWpXOlq9NBD5sGAB2FciQMUEe8= github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/gunit v1.0.0/go.mod h1:qwPWnhz6pn0NnRBP++URONOVyNkPyr4SauJk4cUOwJs= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= @@ -202,6 +212,8 @@ gopkg.in/go-playground/assert.v1 v1.2.1 h1:xoYuJVE7KT85PYWrN730RguIQO0ePzVRfFMXa gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/go-playground/validator.v8 v8.18.2 h1:lFB4DoMU6B626w8ny76MV7VX6W2VHct2GVOI3xgiMrQ= gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/go-playground/validator.v9 v9.29.1 h1:SvGtYmN60a5CVKTOzMSyfzWDeZRxRuGvRQyEAKbw1xc= +gopkg.in/go-playground/validator.v9 v9.29.1/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= diff --git a/backend/lib/validate_bridge/validator.go b/backend/lib/validate_bridge/validator.go new file mode 100644 index 00000000..509dc475 --- /dev/null +++ b/backend/lib/validate_bridge/validator.go @@ -0,0 +1,54 @@ +package validate_bridge + +import ( + "reflect" + "sync" + + "github.com/gin-gonic/gin/binding" + "gopkg.in/go-playground/validator.v9" +) + +type DefaultValidator struct { + once sync.Once + validate *validator.Validate +} + +var _ binding.StructValidator = &DefaultValidator{validate: validator.New()} + +func (v *DefaultValidator) ValidateStruct(obj interface{}) error { + if kindOfData(obj) == reflect.Struct { + + v.lazyinit() + + if err := v.validate.Struct(obj); err != nil { + return err + } + } + + return nil +} + +func (v *DefaultValidator) Engine() interface{} { + v.lazyinit() + return v.validate +} + +func (v *DefaultValidator) lazyinit() { + v.once.Do(func() { + v.validate = validator.New() + v.validate.SetTagName("binding") + + // add any custom validations etc. here + }) +} + +func kindOfData(data interface{}) reflect.Kind { + + value := reflect.ValueOf(data) + valueType := value.Kind() + + if valueType == reflect.Ptr { + valueType = value.Elem().Kind() + } + return valueType +} diff --git a/backend/main.go b/backend/main.go index 489a17ce..5d95dbaf 100644 --- a/backend/main.go +++ b/backend/main.go @@ -3,16 +3,20 @@ package main import ( "crawlab/config" "crawlab/database" + "crawlab/lib/validate_bridge" "crawlab/middlewares" + "crawlab/model" "crawlab/routes" "crawlab/services" "github.com/apex/log" "github.com/gin-gonic/gin" + "github.com/gin-gonic/gin/binding" "github.com/spf13/viper" "runtime/debug" ) func main() { + binding.Validator = new(validate_bridge.DefaultValidator) app := gin.Default() // 初始化配置 @@ -29,6 +33,15 @@ func main() { } log.Info("初始化日志设置成功") + if viper.GetString("log.isDeletePeriodically") == "Y" { + err := services.InitDeleteLogPeriodically() + if err != nil { + log.Error("Init DeletePeriodically Failed") + panic(err) + } + log.Info("初始化定期清理日志配置成功") + } + // 初始化Mongodb数据库 if err := database.InitMongo(); err != nil { log.Error("init mongodb error:" + err.Error()) @@ -45,7 +58,7 @@ func main() { } log.Info("初始化Redis数据库成功") - if services.IsMaster() { + if model.IsMaster() { // 初始化定时任务 if err := services.InitScheduler(); err != nil { log.Error("init scheduler error:" + err.Error()) @@ -87,56 +100,64 @@ func main() { log.Info("初始化用户服务成功") // 以下为主节点服务 - if services.IsMaster() { + if model.IsMaster() { // 中间件 app.Use(middlewares.CORSMiddleware()) - app.Use(middlewares.AuthorizationMiddleware()) + //app.Use(middlewares.AuthorizationMiddleware()) + anonymousGroup := app.Group("/") + { + anonymousGroup.POST("/login", routes.Login) // 用户登录 + anonymousGroup.PUT("/users", routes.PutUser) // 添加用户 + + } + authGroup := app.Group("/", middlewares.AuthorizationMiddleware()) + { + // 路由 + // 节点 + authGroup.GET("/nodes", routes.GetNodeList) // 节点列表 + authGroup.GET("/nodes/:id", routes.GetNode) // 节点详情 + authGroup.POST("/nodes/:id", routes.PostNode) // 修改节点 + authGroup.GET("/nodes/:id/tasks", routes.GetNodeTaskList) // 节点任务列表 + authGroup.GET("/nodes/:id/system", routes.GetSystemInfo) // 节点任务列表 + authGroup.DELETE("/nodes/:id", routes.DeleteNode) // 删除节点 + // 爬虫 + authGroup.GET("/spiders", routes.GetSpiderList) // 爬虫列表 + authGroup.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 + authGroup.POST("/spiders", routes.PutSpider) // 上传爬虫 + authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 + authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 + authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 + authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 + authGroup.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 + authGroup.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入 + authGroup.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 + authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 + authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型 + // 任务 + authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 + authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 + authGroup.PUT("/tasks", routes.PutTask) // 派发任务 + authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务 + authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务 + authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志 + authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果 + authGroup.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果 + // 定时任务 + authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表 + authGroup.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情 + authGroup.PUT("/schedules", routes.PutSchedule) // 创建定时任务 + authGroup.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务 + authGroup.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务 + // 统计数据 + authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据 + // 用户 + authGroup.GET("/users", routes.GetUserList) // 用户列表 + authGroup.GET("/users/:id", routes.GetUser) // 用户详情 + authGroup.POST("/users/:id", routes.PostUser) // 更改用户 + authGroup.DELETE("/users/:id", routes.DeleteUser) // 删除用户 + authGroup.GET("/me", routes.GetMe) // 获取自己账户 + } - // 路由 - // 节点 - app.GET("/nodes", routes.GetNodeList) // 节点列表 - app.GET("/nodes/:id", routes.GetNode) // 节点详情 - app.POST("/nodes/:id", routes.PostNode) // 修改节点 - app.GET("/nodes/:id/tasks", routes.GetNodeTaskList) // 节点任务列表 - app.GET("/nodes/:id/system", routes.GetSystemInfo) // 节点任务列表 - app.DELETE("/nodes/:id", routes.DeleteNode) // 删除节点 - // 爬虫 - app.GET("/spiders", routes.GetSpiderList) // 爬虫列表 - app.GET("/spiders/:id", routes.GetSpider) // 爬虫详情 - app.POST("/spiders", routes.PutSpider) // 上传爬虫 - app.POST("/spiders/:id", routes.PostSpider) // 修改爬虫 - app.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫 - app.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫 - app.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表 - app.GET("/spiders/:id/file", routes.GetSpiderFile) // 爬虫文件读取 - app.POST("/spiders/:id/file", routes.PostSpiderFile) // 爬虫目录写入 - app.GET("/spiders/:id/dir", routes.GetSpiderDir) // 爬虫目录 - app.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据 - // 任务 - app.GET("/tasks", routes.GetTaskList) // 任务列表 - app.GET("/tasks/:id", routes.GetTask) // 任务详情 - app.PUT("/tasks", routes.PutTask) // 派发任务 - app.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务 - app.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务 - app.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志 - app.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果 - app.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果 - // 定时任务 - app.GET("/schedules", routes.GetScheduleList) // 定时任务列表 - app.GET("/schedules/:id", routes.GetSchedule) // 定时任务详情 - app.PUT("/schedules", routes.PutSchedule) // 创建定时任务 - app.POST("/schedules/:id", routes.PostSchedule) // 修改定时任务 - app.DELETE("/schedules/:id", routes.DeleteSchedule) // 删除定时任务 - // 统计数据 - app.GET("/stats/home", routes.GetHomeStats) // 首页统计数据 - // 用户 - app.GET("/users", routes.GetUserList) // 用户列表 - app.GET("/users/:id", routes.GetUser) // 用户详情 - app.PUT("/users", routes.PutUser) // 添加用户 - app.POST("/users/:id", routes.PostUser) // 更改用户 - app.DELETE("/users/:id", routes.DeleteUser) // 删除用户 - app.POST("/login", routes.Login) // 用户登录 - app.GET("/me", routes.GetMe) // 获取自己账户 } // 路由ping diff --git a/backend/middlewares/auth.go b/backend/middlewares/auth.go index 977fea78..07249e82 100644 --- a/backend/middlewares/auth.go +++ b/backend/middlewares/auth.go @@ -12,12 +12,12 @@ import ( func AuthorizationMiddleware() gin.HandlerFunc { return func(c *gin.Context) { // 如果为登录或注册,不用校验 - if c.Request.URL.Path == "/login" || - (c.Request.URL.Path == "/users" && c.Request.Method == "PUT") || - strings.HasSuffix(c.Request.URL.Path, "download") { - c.Next() - return - } + //if c.Request.URL.Path == "/login" || + // (c.Request.URL.Path == "/users" && c.Request.Method == "PUT") || + // strings.HasSuffix(c.Request.URL.Path, "download") { + // c.Next() + // return + //} // 获取token string tokenStr := c.GetHeader("Authorization") @@ -46,6 +46,7 @@ func AuthorizationMiddleware() gin.HandlerFunc { return } } + c.Set(constants.ContextUser, &user) // 校验成功 c.Next() diff --git a/backend/mock/node.go b/backend/mock/node.go index 878dbcfa..789d0a9a 100644 --- a/backend/mock/node.go +++ b/backend/mock/node.go @@ -1,6 +1,7 @@ package mock import ( + "crawlab/entity" "crawlab/model" "crawlab/services" "github.com/apex/log" @@ -42,7 +43,7 @@ var NodeList = []model.Node{ var TaskList = []model.Task{ { Id: "1234", - SpiderId: bson.ObjectId("xx429e6c19f7abede924fee2"), + SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"), StartTs: time.Now(), FinishTs: time.Now(), Status: "进行中", @@ -61,7 +62,7 @@ var TaskList = []model.Task{ }, { Id: "5678", - SpiderId: bson.ObjectId("xx429e6c19f7abede924fddf"), + SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"), StartTs: time.Now(), FinishTs: time.Now(), Status: "进行中", @@ -97,14 +98,14 @@ var dataList = []services.Data{ }, } -var executeble = []model.Executable{ +var executeble = []entity.Executable{ { Path: "/test", FileName: "test.py", DisplayName: "test.py", }, } -var systemInfo = model.SystemInfo{ARCH: "x86", +var systemInfo = entity.SystemInfo{ARCH: "x86", OS: "linux", Hostname: "test", NumCpu: 4, diff --git a/backend/mock/node_test.go b/backend/mock/node_test.go index 9d7096b3..669cafc5 100644 --- a/backend/mock/node_test.go +++ b/backend/mock/node_test.go @@ -1,6 +1,7 @@ package mock import ( + "bytes" "crawlab/model" "encoding/json" "github.com/gin-gonic/gin" @@ -8,13 +9,12 @@ import ( . "github.com/smartystreets/goconvey/convey" "net/http" "net/http/httptest" - "strings" "testing" "time" - "ucloudBilling/ucloud/log" ) var app *gin.Engine + // 本测试依赖MongoDB的服务,所以在测试之前需要启动MongoDB及相关服务 func init() { app = gin.Default() @@ -29,12 +29,25 @@ func init() { app.GET("/nodes/:id/system", GetSystemInfo) // 节点任务列表 app.DELETE("/nodes/:id", DeleteNode) // 删除节点 //// 爬虫 + app.GET("/stats/home", GetHomeStats) // 首页统计数据 // 定时任务 - app.GET("/schedules", GetScheduleList) // 定时任务列表 - app.GET("/schedules/:id", GetSchedule) // 定时任务详情 - app.PUT("/schedules", PutSchedule) // 创建定时任务 - app.POST("/schedules/:id", PostSchedule) // 修改定时任务 - app.DELETE("/schedules/:id", DeleteSchedule) // 删除定时任务 + app.GET("/schedules", GetScheduleList) // 定时任务列表 + app.GET("/schedules/:id", GetSchedule) // 定时任务详情 + app.PUT("/schedules", PutSchedule) // 创建定时任务 + app.POST("/schedules/:id", PostSchedule) // 修改定时任务 + app.DELETE("/schedules/:id", DeleteSchedule) // 删除定时任务 + app.GET("/tasks", GetTaskList) // 任务列表 + app.GET("/tasks/:id", GetTask) // 任务详情 + app.PUT("/tasks", PutTask) // 派发任务 + app.DELETE("/tasks/:id", DeleteTask) // 删除任务 + app.GET("/tasks/:id/results", GetTaskResults) // 任务结果 + app.GET("/tasks/:id/results/download", DownloadTaskResultsCsv) // 下载任务结果 + app.GET("/spiders", GetSpiderList) // 爬虫列表 + app.GET("/spiders/:id", GetSpider) // 爬虫详情 + app.POST("/spiders/:id", PostSpider) // 修改爬虫 + app.DELETE("/spiders/:id",DeleteSpider) // 删除爬虫 + app.GET("/spiders/:id/tasks",GetSpiderTasks) // 爬虫任务列表 + app.GET("/spiders/:id/dir",GetSpiderDir) // 爬虫目录 } //mock test, test data in ./mock @@ -43,8 +56,7 @@ func TestGetNodeList(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/nodes", nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) - t.Log(resp.Data) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -63,7 +75,7 @@ func TestGetNode(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/nodes/"+mongoId, nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -82,7 +94,7 @@ func TestPing(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/ping", nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -100,7 +112,7 @@ func TestGetNodeTaskList(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/tasks", nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -119,7 +131,7 @@ func TestDeleteNode(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("DELETE", "nodes/"+mongoId, nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -148,14 +160,13 @@ func TestPostNode(t *testing.T) { var resp Response body, _ := json.Marshal(newItem) - log.Info(strings.NewReader(string(body))) var mongoId = "5d429e6c19f7abede924fee2" w := httptest.NewRecorder() - req, _ := http.NewRequest("POST", "nodes/"+mongoId, strings.NewReader(string(body))) + req, _ := http.NewRequest("POST", "nodes/"+mongoId, bytes.NewReader(body)) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) t.Log(resp) if err != nil { t.Fatal("Unmarshal resp failed") @@ -174,7 +185,7 @@ func TestGetSystemInfo(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/system", nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } diff --git a/backend/mock/schedule.go b/backend/mock/schedule.go index ae982ca6..702e8754 100644 --- a/backend/mock/schedule.go +++ b/backend/mock/schedule.go @@ -113,7 +113,7 @@ func PutSchedule(c *gin.Context) { func DeleteSchedule(c *gin.Context) { id := bson.ObjectIdHex("5d429e6c19f7abede924fee2") for _, sch := range scheduleList { - if sch.Id == bson.ObjectId(id) { + if sch.Id == id { fmt.Println("delete a schedule") } } diff --git a/backend/mock/schedule_test.go b/backend/mock/schedule_test.go index d26a08d8..12843c75 100644 --- a/backend/mock/schedule_test.go +++ b/backend/mock/schedule_test.go @@ -1,7 +1,9 @@ package mock import ( + "bytes" "crawlab/model" + "crawlab/utils" "encoding/json" "github.com/globalsign/mgo/bson" . "github.com/smartystreets/goconvey/convey" @@ -10,7 +12,6 @@ import ( "strings" "testing" "time" - "ucloudBilling/ucloud/log" ) func TestGetScheduleList(t *testing.T) { @@ -18,7 +19,7 @@ func TestGetScheduleList(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/schedules", nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -37,7 +38,7 @@ func TestGetSchedule(t *testing.T) { w := httptest.NewRecorder() req, _ := http.NewRequest("GET", "/schedules/"+mongoId, nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -57,8 +58,7 @@ func TestDeleteSchedule(t *testing.T) { req, _ := http.NewRequest("DELETE", "/schedules/"+mongoId, nil) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()), &resp) - log.Info(w.Body.String()) + err := json.Unmarshal(w.Body.Bytes(), &resp) if err != nil { t.Fatal("Unmarshal resp failed") } @@ -88,12 +88,12 @@ func TestPostSchedule(t *testing.T) { var resp Response var mongoId = "5d429e6c19f7abede924fee2" - body,_ := json.Marshal(newItem) - log.Info(strings.NewReader(string(body))) + body, _ := json.Marshal(newItem) w := httptest.NewRecorder() - req,_ := http.NewRequest("POST", "/schedules/"+mongoId,strings.NewReader(string(body))) + req, _ := http.NewRequest("POST", "/schedules/"+mongoId, strings.NewReader(utils.BytesToString(body))) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()),&resp) + + err := json.Unmarshal(w.Body.Bytes(), &resp) t.Log(resp) if err != nil { t.Fatal("unmarshal resp failed") @@ -124,12 +124,11 @@ func TestPutSchedule(t *testing.T) { } var resp Response - body,_ := json.Marshal(newItem) - log.Info(strings.NewReader(string(body))) + body, _ := json.Marshal(newItem) w := httptest.NewRecorder() - req,_ := http.NewRequest("PUT", "/schedules",strings.NewReader(string(body))) + req, _ := http.NewRequest("PUT", "/schedules", bytes.NewReader(body)) app.ServeHTTP(w, req) - err := json.Unmarshal([]byte(w.Body.String()),&resp) + err := json.Unmarshal(w.Body.Bytes(), &resp) t.Log(resp) if err != nil { t.Fatal("unmarshal resp failed") diff --git a/backend/mock/spider.go b/backend/mock/spider.go index c4807247..ef3e6104 100644 --- a/backend/mock/spider.go +++ b/backend/mock/spider.go @@ -1 +1,178 @@ -package mock \ No newline at end of file +package mock + +import ( + "crawlab/model" + "github.com/apex/log" + "github.com/gin-gonic/gin" + "github.com/globalsign/mgo/bson" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "time" +) + +var SpiderList = []model.Spider{ + { + Id: bson.ObjectId("5d429e6c19f7abede924fee2"), + Name: "For test", + DisplayName: "test", + Type: "test", + Col: "test", + Site: "www.baidu.com", + Envs: nil, + Src: "../app/spiders", + Cmd: "scrapy crawl test", + LastRunTs: time.Now(), + CreateTs: time.Now(), + UpdateTs: time.Now(), + }, +} + +func GetSpiderList(c *gin.Context) { + + // mock get spider list from database + results := SpiderList + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: results, + }) +} + +func GetSpider(c *gin.Context) { + id := c.Param("id") + var result model.Spider + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + } + + for _, spider := range SpiderList { + if spider.Id == bson.ObjectId(id) { + result = spider + } + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: result, + }) +} + +func PostSpider(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + } + + var item model.Spider + if err := c.ShouldBindJSON(&item); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + log.Info("modify the item") + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} +func GetSpiderDir(c *gin.Context) { + // 爬虫ID + id := c.Param("id") + + // 目录相对路径 + path := c.Query("path") + var spi model.Spider + + // 获取爬虫 + for _, spider := range SpiderList { + if spider.Id == bson.ObjectId(id) { + spi = spider + } + } + + // 获取目录下文件列表 + f, err := ioutil.ReadDir(filepath.Join(spi.Src, path)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 遍历文件列表 + var fileList []model.File + for _, file := range f { + fileList = append(fileList, model.File{ + Name: file.Name(), + IsDir: file.IsDir(), + Size: file.Size(), + Path: filepath.Join(path, file.Name()), + }) + } + + // 返回结果 + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: fileList, + }) +} + +func GetSpiderTasks(c *gin.Context) { + id := c.Param("id") + + var spider model.Spider + for _, spi := range SpiderList { + if spi.Id == bson.ObjectId(id) { + spider = spi + } + } + + var tasks model.Task + for _, task := range TaskList { + if task.SpiderId == spider.Id { + tasks = task + } + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: tasks, + }) +} + +func DeleteSpider(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + return + } + + // 获取该爬虫,get this spider + var spider model.Spider + for _, spi := range SpiderList { + if spi.Id == bson.ObjectId(id) { + spider = spi + } + } + + // 删除爬虫文件目录,delete the spider dir + if err := os.RemoveAll(spider.Src); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 从数据库中删除该爬虫,delete this spider from database + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} diff --git a/backend/mock/spider_test.go b/backend/mock/spider_test.go new file mode 100644 index 00000000..f4dbea63 --- /dev/null +++ b/backend/mock/spider_test.go @@ -0,0 +1,137 @@ +package mock + +import ( + "bytes" + "crawlab/model" + "encoding/json" + "github.com/globalsign/mgo/bson" + . "github.com/smartystreets/goconvey/convey" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestGetSpiderList(t *testing.T) { + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/spiders", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp faild") + } + Convey("Test API GetSpiderList", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestGetSpider(t *testing.T) { + var resp Response + var spiderId = "5d429e6c19f7abede924fee2" + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/spiders/"+spiderId, nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API GetSpider", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestPostSpider(t *testing.T) { + var spider = model.Spider{ + Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), + Name: "For test", + DisplayName: "test", + Type: "test", + Col: "test", + Site: "www.baidu.com", + Envs: nil, + Src: "/app/spider", + Cmd: "scrapy crawl test", + LastRunTs: time.Now(), + CreateTs: time.Now(), + UpdateTs: time.Now(), + } + var resp Response + var spiderId = "5d429e6c19f7abede924fee2" + w := httptest.NewRecorder() + body, _ := json.Marshal(spider) + req, _ := http.NewRequest("POST", "/spiders/"+spiderId, bytes.NewReader(body)) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API PostSpider", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) + +} + +func TestGetSpiderDir(t *testing.T) { + var spiderId = "5d429e6c19f7abede924fee2" + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/dir", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API GetSpiderDir", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) + +} + +func TestGetSpiderTasks(t *testing.T) { + var spiderId = "5d429e6c19f7abede924fee2" + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/tasks", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API GetSpiderTasks", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestDeleteSpider(t *testing.T) { + var spiderId = "5d429e6c19f7abede924fee2" + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("DELETE", "/spiders/"+spiderId, nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API DeleteSpider", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} diff --git a/backend/mock/stats.go b/backend/mock/stats.go new file mode 100644 index 00000000..db2348c6 --- /dev/null +++ b/backend/mock/stats.go @@ -0,0 +1,64 @@ +package mock + +import ( + "crawlab/model" + "github.com/gin-gonic/gin" + "net/http" +) + + + +var taskDailyItems = []model.TaskDailyItem{ + { + Date: "2019/08/19", + TaskCount: 2, + AvgRuntimeDuration: 1000, + }, + { + Date: "2019/08/20", + TaskCount: 3, + AvgRuntimeDuration: 10130, + }, +} + +func GetHomeStats(c *gin.Context) { + type DataOverview struct { + TaskCount int `json:"task_count"` + SpiderCount int `json:"spider_count"` + ActiveNodeCount int `json:"active_node_count"` + ScheduleCount int `json:"schedule_count"` + } + + type Data struct { + Overview DataOverview `json:"overview"` + Daily []model.TaskDailyItem `json:"daily"` + } + + // 任务总数 + taskCount := 10 + + // 在线节点总数 + activeNodeCount := 4 + + // 爬虫总数 + spiderCount := 5 + // 定时任务数 + scheduleCount := 2 + + // 每日任务数 + items := taskDailyItems + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: Data{ + Overview: DataOverview{ + ActiveNodeCount: activeNodeCount, + TaskCount: taskCount, + SpiderCount: spiderCount, + ScheduleCount: scheduleCount, + }, + Daily: items, + }, + }) +} diff --git a/backend/mock/stats_test.go b/backend/mock/stats_test.go new file mode 100644 index 00000000..a94e52d4 --- /dev/null +++ b/backend/mock/stats_test.go @@ -0,0 +1,29 @@ +package mock + +import ( + "encoding/json" + "fmt" + . "github.com/smartystreets/goconvey/convey" + "net/http" + "net/http/httptest" + "testing" +) + +func TestGetHomeStats(t *testing.T) { + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/stats/home", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + fmt.Println(resp.Data) + if err != nil { + t.Fatal("Unmarshal resp failed") + } + + Convey("Test API GetHomeStats", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} diff --git a/backend/mock/task.go b/backend/mock/task.go index c4807247..7b77d07e 100644 --- a/backend/mock/task.go +++ b/backend/mock/task.go @@ -1 +1,224 @@ -package mock \ No newline at end of file +package mock + +import ( + "bytes" + "crawlab/constants" + "crawlab/model" + "crawlab/utils" + "encoding/csv" + "fmt" + "github.com/gin-gonic/gin" + "github.com/globalsign/mgo/bson" + "github.com/satori/go.uuid" + "net/http" +) + +type TaskListRequestData struct { + PageNum int `form:"page_num"` + PageSize int `form:"page_size"` + NodeId string `form:"node_id"` + SpiderId string `form:"spider_id"` +} + +type TaskResultsRequestData struct { + PageNum int `form:"page_num"` + PageSize int `form:"page_size"` +} + +func GetTaskList(c *gin.Context) { + // 绑定数据 + data := TaskListRequestData{} + + if err := c.ShouldBindQuery(&data); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + if data.PageNum == 0 { + data.PageNum = 1 + } + if data.PageSize == 0 { + data.PageNum = 10 + } + + // 过滤条件 + query := bson.M{} + if data.NodeId != "" { + query["node_id"] = bson.ObjectIdHex(data.NodeId) + } + if data.SpiderId != "" { + query["spider_id"] = bson.ObjectIdHex(data.SpiderId) + } + + // 获取任务列表 + tasks := TaskList + + // 获取总任务数 + total := len(TaskList) + + c.JSON(http.StatusOK, ListResponse{ + Status: "ok", + Message: "success", + Total: total, + Data: tasks, + }) +} + +func GetTask(c *gin.Context) { + id := c.Param("id") + + var result model.Task + for _, task := range TaskList { + if task.Id == id { + result = task + } + } + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: result, + }) +} + +func PutTask(c *gin.Context) { + // 生成任务ID,generate task ID + id := uuid.NewV4() + + // 绑定数据 + var t model.Task + if err := c.ShouldBindJSON(&t); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + t.Id = id.String() + t.Status = constants.StatusPending + + // 如果没有传入node_id,则置为null + if t.NodeId.Hex() == "" { + t.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) + } + + // 将任务存入数据库,put the task into database + fmt.Println("put the task into database") + + // 加入任务队列, put the task into task queue + fmt.Println("put the task into task queue") + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func DeleteTask(c *gin.Context) { + id := c.Param("id") + + for _, task := range TaskList { + if task.Id == id { + fmt.Println("delete the task") + } + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func GetTaskResults(c *gin.Context) { + id := c.Param("id") + + // 绑定数据 + data := TaskResultsRequestData{} + if err := c.ShouldBindQuery(&data); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + // 获取任务 + var task model.Task + for _, ta := range TaskList { + if ta.Id == id { + task = ta + } + } + + fmt.Println(task) + // 获取结果 + var results interface{} + total := len(TaskList) + + c.JSON(http.StatusOK, ListResponse{ + Status: "ok", + Message: "success", + Data: results, + Total: total, + }) +} + +func DownloadTaskResultsCsv(c *gin.Context) { + id := c.Param("id") + + // 获取任务 + var task model.Task + for _, ta := range TaskList { + if ta.Id == id { + task = ta + } + } + fmt.Println(task) + + // 获取结果 + var results []interface { + } + + // 字段列表 + var columns []string + if len(results) == 0 { + columns = []string{} + } else { + item := results[0].(bson.M) + for key := range item { + columns = append(columns, key) + } + } + + // 缓冲 + bytesBuffer := &bytes.Buffer{} + + // 写入UTF-8 BOM,避免使用Microsoft Excel打开乱码 + bytesBuffer.WriteString("\xEF\xBB\xBF") + + writer := csv.NewWriter(bytesBuffer) + + // 写入表头 + if err := writer.Write(columns); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 写入内容 + for _, result := range results { + // 将result转换为[]string + item := result.(bson.M) + var values []string + for _, col := range columns { + value := utils.InterfaceToString(item[col]) + values = append(values, value) + } + + // 写入数据 + if err := writer.Write(values); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + } + + // 此时才会将缓冲区数据写入 + writer.Flush() + + // 设置下载的文件名 + c.Writer.Header().Set("Content-Disposition", "attachment;filename=data.csv") + + // 设置文件类型以及输出数据 + c.Data(http.StatusOK, "text/csv", bytesBuffer.Bytes()) +} diff --git a/backend/mock/task_test.go b/backend/mock/task_test.go new file mode 100644 index 00000000..1cd4ccfa --- /dev/null +++ b/backend/mock/task_test.go @@ -0,0 +1,138 @@ +package mock + +import ( + "bytes" + "crawlab/model" + "encoding/json" + "github.com/globalsign/mgo/bson" + . "github.com/smartystreets/goconvey/convey" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func TestGetTaskList(t *testing.T) { + //var teskListRequestFrom = TaskListRequestData{ + // PageNum: 2, + // PageSize: 10, + // NodeId: "434221grfsf", + // SpiderId: "fdfewqrftea", + //} + + var resp ListResponse + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/tasks?PageNum=2&PageSize=10&NodeId=342dfsff&SpiderId=f8dsf", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("Unmarshal resp failed") + } + + Convey("Test API GetNodeList", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + So(resp.Total, ShouldEqual, 2) + }) + }) +} + +func TestGetTask(t *testing.T) { + var resp Response + var taskId = "1234" + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/tasks/"+taskId, nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("Unmarshal resp failed") + } + Convey("Test API GetTask", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestPutTask(t *testing.T) { + var newItem = model.Task{ + Id: "1234", + SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), + StartTs: time.Now(), + FinishTs: time.Now(), + Status: "online", + NodeId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), + LogPath: "./log", + Cmd: "scrapy crawl test", + Error: "", + ResultCount: 0, + WaitDuration: 10.0, + RuntimeDuration: 10, + TotalDuration: 20, + SpiderName: "test", + NodeName: "test", + CreateTs: time.Now(), + UpdateTs: time.Now(), + } + + var resp Response + body, _ := json.Marshal(&newItem) + w := httptest.NewRecorder() + req, _ := http.NewRequest("PUT", "/tasks", bytes.NewReader(body)) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API PutTask", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestDeleteTask(t *testing.T) { + taskId := "1234" + var resp Response + w := httptest.NewRecorder() + req, _ := http.NewRequest("DELETE", "/tasks/"+taskId, nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("unmarshal resp failed") + } + Convey("Test API DeleteTask", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + }) + }) +} + +func TestGetTaskResults(t *testing.T) { + //var teskListResultFrom = TaskResultsRequestData{ + // PageNum: 2, + // PageSize: 1, + //} + taskId := "1234" + + var resp ListResponse + w := httptest.NewRecorder() + req, _ := http.NewRequest("GET", "/tasks/"+taskId+"/results?PageNum=2&PageSize=1", nil) + app.ServeHTTP(w, req) + err := json.Unmarshal(w.Body.Bytes(), &resp) + if err != nil { + t.Fatal("Unmarshal resp failed") + } + + Convey("Test API GetNodeList", t, func() { + Convey("Test response status", func() { + So(resp.Status, ShouldEqual, "ok") + So(resp.Message, ShouldEqual, "success") + So(resp.Total, ShouldEqual, 2) + }) + }) +} diff --git a/backend/model/file.go b/backend/model/file.go index f8963d06..fe3ece0e 100644 --- a/backend/model/file.go +++ b/backend/model/file.go @@ -1,8 +1,75 @@ package model +import ( + "crawlab/database" + "crawlab/utils" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "os" + "runtime/debug" + "time" +) + +type GridFs struct { + Id bson.ObjectId `json:"_id" bson:"_id"` + ChunkSize int32 `json:"chunk_size" bson:"chunkSize"` + UploadDate time.Time `json:"upload_date" bson:"uploadDate"` + Length int32 `json:"length" bson:"length"` + Md5 string `json:"md_5" bson:"md5"` + Filename string `json:"filename" bson:"filename"` +} + type File struct { Name string `json:"name"` Path string `json:"path"` IsDir bool `json:"is_dir"` Size int64 `json:"size"` } + +func (f *GridFs) Remove() { + s, gf := database.GetGridFs("files") + defer s.Close() + if err := gf.RemoveId(f.Id); err != nil { + log.Errorf("remove file id error: %s, id: %s", err.Error(), f.Id.Hex()) + debug.PrintStack() + } +} + +func GetAllGridFs() []*GridFs { + s, gf := database.GetGridFs("files") + defer s.Close() + + var files []*GridFs + if err := gf.Find(nil).All(&files); err != nil { + log.Errorf("get all files error: {}", err.Error()) + debug.PrintStack() + return nil + } + return files +} + +func GetGridFs(id bson.ObjectId) *GridFs { + s, gf := database.GetGridFs("files") + defer s.Close() + + var gfFile GridFs + err := gf.Find(bson.M{"_id": id}).One(&gfFile) + if err != nil { + log.Errorf("get gf file error: %s, file_id: %s", err.Error(), id.Hex()) + debug.PrintStack() + return nil + } + return &gfFile +} + +func RemoveFile(path string) error { + if !utils.Exists(path) { + log.Info("file not found: " + path) + debug.PrintStack() + return nil + } + if err := os.RemoveAll(path); err != nil { + return err + } + return nil +} diff --git a/backend/model/log.go b/backend/model/log.go new file mode 100644 index 00000000..ae6973b1 --- /dev/null +++ b/backend/model/log.go @@ -0,0 +1,43 @@ +package model + +import ( + "github.com/apex/log" + "os" + "runtime/debug" +) + +// 获取本地日志 +func GetLocalLog(logPath string) (fileBytes []byte, err error) { + + f, err := os.Open(logPath) + if err != nil { + log.Error(err.Error()) + debug.PrintStack() + return nil, err + } + fi, err := f.Stat() + if err != nil { + log.Error(err.Error()) + debug.PrintStack() + return nil, err + } + defer f.Close() + + const bufLen = 2 * 1024 * 1024 + logBuf := make([]byte, bufLen) + + off := int64(0) + if fi.Size() > int64(len(logBuf)) { + off = fi.Size() - int64(len(logBuf)) + } + n, err := f.ReadAt(logBuf, off) + + //到文件结尾会有EOF标识 + if err != nil && err.Error() != "EOF" { + log.Error(err.Error()) + debug.PrintStack() + return nil, err + } + logBuf = logBuf[:n] + return logBuf, nil +} diff --git a/backend/model/node.go b/backend/model/node.go index 61c20473..1a1ebce5 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -1,10 +1,13 @@ package model import ( + "crawlab/constants" "crawlab/database" + "crawlab/services/register" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" + "github.com/spf13/viper" "runtime/debug" "time" ) @@ -28,6 +31,73 @@ type Node struct { UpdateTsUnix int64 `json:"update_ts_unix" bson:"update_ts_unix"` } +const ( + Yes = "Y" + No = "N" +) + +// 当前节点是否为主节点 +func IsMaster() bool { + return viper.GetString("server.master") == Yes +} + +// 获取本机节点 +func GetCurrentNode() (Node, error) { + // 获得注册的key值 + key, err := register.GetRegister().GetKey() + if err != nil { + return Node{}, err + } + + // 从数据库中获取当前节点 + var node Node + errNum := 0 + for { + // 如果错误次数超过10次 + if errNum >= 10 { + panic("cannot get current node") + } + + // 尝试获取节点 + node, err = GetNodeByKey(key) + // 如果获取失败 + if err != nil { + // 如果为主节点,表示为第一次注册,插入节点信息 + if IsMaster() { + // 获取本机信息 + ip, mac, key, err := GetNodeBaseInfo() + if err != nil { + debug.PrintStack() + return node, err + } + + // 生成节点 + node = Node{ + Key: key, + Id: bson.NewObjectId(), + Ip: ip, + Name: ip, + Mac: mac, + IsMaster: true, + } + if err := node.Add(); err != nil { + return node, err + } + return node, nil + } + // 增加错误次数 + errNum++ + + // 5秒后重试 + time.Sleep(5 * time.Second) + continue + } + // 跳出循环 + break + } + return node, nil +} + func (n *Node) Save() error { s, c := database.GetCol("nodes") defer s.Close() @@ -79,6 +149,7 @@ func GetNodeList(filter interface{}) ([]Node, error) { var results []Node if err := c.Find(filter).All(&results); err != nil { + log.Error("get node list error: " + err.Error()) debug.PrintStack() return results, err } @@ -86,10 +157,12 @@ func GetNodeList(filter interface{}) ([]Node, error) { } func GetNode(id bson.ObjectId) (Node, error) { + var node Node + if id.Hex() == "" { + return node, nil + } s, c := database.GetCol("nodes") defer s.Close() - - var node Node if err := c.FindId(id).One(&node); err != nil { if err != mgo.ErrNotFound { log.Errorf(err.Error()) @@ -153,3 +226,47 @@ func GetNodeCount(query interface{}) (int, error) { return count, nil } + +// 节点基本信息 +func GetNodeBaseInfo() (ip string, mac string, key string, error error) { + ip, err := register.GetRegister().GetIp() + if err != nil { + debug.PrintStack() + return "", "", "", err + } + + mac, err = register.GetRegister().GetMac() + if err != nil { + debug.PrintStack() + return "", "", "", err + } + + key, err = register.GetRegister().GetKey() + if err != nil { + debug.PrintStack() + return "", "", "", err + } + return ip, mac, key, nil +} + +// 根据redis的key值,重置node节点为offline +func ResetNodeStatusToOffline(list []string) { + nodes, _ := GetNodeList(nil) + for _, node := range nodes { + hasNode := false + for _, key := range list { + if key == node.Key { + hasNode = true + break + } + } + if !hasNode || node.Status == "" { + node.Status = constants.StatusOffline + if err := node.Save(); err != nil { + log.Errorf(err.Error()) + return + } + continue + } + } +} diff --git a/backend/model/node_test.go b/backend/model/node_test.go new file mode 100644 index 00000000..ba3f4aaa --- /dev/null +++ b/backend/model/node_test.go @@ -0,0 +1,50 @@ +package model + +import ( + "crawlab/config" + "crawlab/constants" + "crawlab/database" + "github.com/apex/log" + . "github.com/smartystreets/goconvey/convey" + "runtime/debug" + "testing" +) + +func TestAddNode(t *testing.T) { + Convey("Test AddNode", t, func() { + if err := config.InitConfig("../conf/config.yml"); err != nil { + log.Error("init config error:" + err.Error()) + panic(err) + } + log.Info("初始化配置成功") + + // 初始化Mongodb数据库 + if err := database.InitMongo(); err != nil { + log.Error("init mongodb error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("初始化Mongodb数据库成功") + + // 初始化Redis数据库 + if err := database.InitRedis(); err != nil { + log.Error("init redis error:" + err.Error()) + debug.PrintStack() + panic(err) + } + + var node = Node{ + Key: "c4:b3:01:bd:b5:e7", + Name: "10.27.238.101", + Ip: "10.27.238.101", + Port: "8000", + Mac: "c4:b3:01:bd:b5:e7", + Status: constants.StatusOnline, + IsMaster: true, + } + if err := node.Add(); err != nil { + log.Error("add node error:" + err.Error()) + panic(err) + } + }) +} diff --git a/backend/model/schedule.go b/backend/model/schedule.go index 9f77c452..bcd051e3 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -16,8 +16,10 @@ type Schedule struct { Description string `json:"description" bson:"description"` SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` NodeId bson.ObjectId `json:"node_id" bson:"node_id"` + NodeKey string `json:"node_key" bson:"node_key"` Cron string `json:"cron" bson:"cron"` EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` + Param string `json:"param" bson:"param"` // 前端展示 SpiderName string `json:"spider_name" bson:"spider_name"` @@ -37,6 +39,12 @@ func (sch *Schedule) Save() error { return nil } +func (sch *Schedule) Delete() error { + s, c := database.GetCol("schedules") + defer s.Close() + return c.RemoveId(sch.Id) +} + func GetScheduleList(filter interface{}) ([]Schedule, error) { s, c := database.GetCol("schedules") defer s.Close() @@ -46,11 +54,12 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { return schedules, err } - for i, schedule := range schedules { + var schs []Schedule + for _, schedule := range schedules { // 获取节点名称 if schedule.NodeId == bson.ObjectIdHex(constants.ObjectIdNull) { // 选择所有节点 - schedules[i].NodeName = "All Nodes" + schedule.NodeName = "All Nodes" } else { // 选择单一节点 node, err := GetNode(schedule.NodeId) @@ -58,18 +67,21 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { log.Errorf(err.Error()) continue } - schedules[i].NodeName = node.Name + schedule.NodeName = node.Name } // 获取爬虫名称 spider, err := GetSpider(schedule.SpiderId) if err != nil { - log.Errorf(err.Error()) + log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error()) + debug.PrintStack() + _ = schedule.Delete() continue } - schedules[i].SpiderName = spider.Name + schedule.SpiderName = spider.Name + schs = append(schs, schedule) } - return schedules, nil + return schs, nil } func GetSchedule(id bson.ObjectId) (Schedule, error) { @@ -92,6 +104,13 @@ func UpdateSchedule(id bson.ObjectId, item Schedule) error { return err } + node, err := GetNode(item.NodeId) + if err != nil { + log.Errorf("get node error: %s", err.Error()) + debug.PrintStack() + return nil + } + item.NodeKey = node.Key if err := item.Save(); err != nil { return err } @@ -102,9 +121,17 @@ func AddSchedule(item Schedule) error { s, c := database.GetCol("schedules") defer s.Close() + node, err := GetNode(item.NodeId) + if err != nil { + log.Errorf("get node error: %s", err.Error()) + debug.PrintStack() + return nil + } + item.Id = bson.NewObjectId() item.CreateTs = time.Now() item.UpdateTs = time.Now() + item.NodeKey = node.Key if err := c.Insert(&item); err != nil { debug.PrintStack() diff --git a/backend/model/spider.go b/backend/model/spider.go index c4c94edf..efd93c3d 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -2,6 +2,7 @@ package model import ( "crawlab/database" + "crawlab/entity" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" @@ -18,18 +19,19 @@ type Spider struct { Id bson.ObjectId `json:"_id" bson:"_id"` // 爬虫ID Name string `json:"name" bson:"name"` // 爬虫名称(唯一) DisplayName string `json:"display_name" bson:"display_name"` // 爬虫显示名称 - Type string `json:"type"` // 爬虫类别 + Type string `json:"type" bson:"type"` // 爬虫类别 FileId bson.ObjectId `json:"file_id" bson:"file_id"` // GridFS文件ID - Col string `json:"col"` // 结果储存位置 - Site string `json:"site"` // 爬虫网站 + Col string `json:"col" bson:"col"` // 结果储存位置 + Site string `json:"site" bson:"site"` // 爬虫网站 Envs []Env `json:"envs" bson:"envs"` // 环境变量 - + Remark string `json:"remark" bson:"remark"` // 备注 // 自定义爬虫 Src string `json:"src" bson:"src"` // 源码位置 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 // 前端展示 - LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 + LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 + LastStatus string `json:"last_status"` // 最后执行状态 // TODO: 可配置爬虫 //Fields []interface{} `json:"fields"` @@ -46,6 +48,7 @@ type Spider struct { UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } +// 更新爬虫 func (spider *Spider) Save() error { s, c := database.GetCol("spiders") defer s.Close() @@ -59,6 +62,7 @@ func (spider *Spider) Save() error { return nil } +// 新增爬虫 func (spider *Spider) Add() error { s, c := database.GetCol("spiders") defer s.Close() @@ -73,6 +77,7 @@ func (spider *Spider) Add() error { return nil } +// 获取爬虫的任务 func (spider *Spider) GetTasks() ([]Task, error) { tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 10, "-create_ts") if err != nil { @@ -81,6 +86,7 @@ func (spider *Spider) GetTasks() ([]Task, error) { return tasks, nil } +// 爬虫最新的任务 func (spider *Spider) GetLastTask() (Task, error) { tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 1, "-create_ts") if err != nil { @@ -92,17 +98,22 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } +func (spider *Spider) Delete() error { + s, c := database.GetCol("spiders") + defer s.Close() + return c.RemoveId(spider.Id) +} - -func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { +// 爬虫列表 +func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, error) { s, c := database.GetCol("spiders") defer s.Close() // 获取爬虫列表 spiders := []Spider{} - if err := c.Find(filter).Skip(skip).Limit(limit).All(&spiders); err != nil { + if err := c.Find(filter).Skip(skip).Limit(limit).Sort("+name").All(&spiders); err != nil { debug.PrintStack() - return spiders, err + return spiders, 0, err } // 遍历爬虫列表 @@ -117,11 +128,43 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) { // 赋值 spiders[i].LastRunTs = task.CreateTs + spiders[i].LastStatus = task.Status } - return spiders, nil + count, _ := c.Find(filter).Count() + + return spiders, count, nil } +// 获取爬虫 +func GetSpiderByFileId(fileId bson.ObjectId) *Spider { + s, c := database.GetCol("spiders") + defer s.Close() + + var result *Spider + if err := c.Find(bson.M{"file_id": fileId}).One(&result); err != nil { + log.Errorf("get spider error: %s, file_id: %s", err.Error(), fileId.Hex()) + debug.PrintStack() + return nil + } + return result +} + +// 获取爬虫 +func GetSpiderByName(name string) *Spider { + s, c := database.GetCol("spiders") + defer s.Close() + + var result *Spider + if err := c.Find(bson.M{"name": name}).One(&result); err != nil { + log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) + debug.PrintStack() + return nil + } + return result +} + +// 获取爬虫 func GetSpider(id bson.ObjectId) (Spider, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -129,6 +172,7 @@ func GetSpider(id bson.ObjectId) (Spider, error) { var result Spider if err := c.FindId(id).One(&result); err != nil { if err != mgo.ErrNotFound { + log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex()) debug.PrintStack() } return result, err @@ -136,6 +180,7 @@ func GetSpider(id bson.ObjectId) (Spider, error) { return result, nil } +// 更新爬虫 func UpdateSpider(id bson.ObjectId, item Spider) error { s, c := database.GetCol("spiders") defer s.Close() @@ -152,6 +197,7 @@ func UpdateSpider(id bson.ObjectId, item Spider) error { return nil } +// 删除爬虫 func RemoveSpider(id bson.ObjectId) error { s, c := database.GetCol("spiders") defer s.Close() @@ -162,12 +208,44 @@ func RemoveSpider(id bson.ObjectId) error { } if err := c.RemoveId(id); err != nil { + log.Errorf("remove spider error: %s, id:%s", err.Error(), id.Hex()) + debug.PrintStack() + return err + } + + // gf上的文件 + s, gf := database.GetGridFs("files") + defer s.Close() + + if err := gf.RemoveId(result.FileId); err != nil { + log.Error("remove file error, id:" + result.FileId.Hex()) + debug.PrintStack() return err } return nil } +// 删除所有爬虫 +func RemoveAllSpider() error { + s, c := database.GetCol("spiders") + defer s.Close() + + spiders := []Spider{} + err := c.Find(nil).All(&spiders) + if err != nil { + log.Error("get all spiders error:" + err.Error()) + return err + } + for _, spider := range spiders { + if err := RemoveSpider(spider.Id); err != nil { + log.Error("remove spider error:" + err.Error()) + } + } + return nil +} + +// 爬虫总数 func GetSpiderCount() (int, error) { s, c := database.GetCol("spiders") defer s.Close() @@ -176,6 +254,26 @@ func GetSpiderCount() (int, error) { if err != nil { return 0, err } - return count, nil } + +// 爬虫类型 +func GetSpiderTypes() ([]*entity.SpiderType, error) { + s, c := database.GetCol("spiders") + defer s.Close() + + group := bson.M{ + "$group": bson.M{ + "_id": "$type", + "count": bson.M{"$sum": 1}, + }, + } + var types []*entity.SpiderType + if err := c.Pipe([]bson.M{group}).All(&types); err != nil { + log.Errorf("get spider types error: %s", err.Error()) + debug.PrintStack() + return nil, err + } + + return types, nil +} diff --git a/backend/model/system.go b/backend/model/system.go index c4865a24..5c2f5997 100644 --- a/backend/model/system.go +++ b/backend/model/system.go @@ -1,15 +1,98 @@ package model -type SystemInfo struct { - ARCH string `json:"arch"` - OS string `json:"os"` - Hostname string `json:"host_name"` - NumCpu int `json:"num_cpu"` - Executables []Executable `json:"executables"` +import ( + "crawlab/entity" + "github.com/apex/log" + "io/ioutil" + "os" + "path/filepath" + "runtime" + "runtime/debug" + "strings" +) + +var executableNameMap = map[string]string{ + // python + "python": "Python", + "python2": "Python 2", + "python2.7": "Python 2.7", + "python3": "Python 3", + "python3.5": "Python 3.5", + "python3.6": "Python 3.6", + "python3.7": "Python 3.7", + "python3.8": "Python 3.8", + // java + "java": "Java", + // go + "go": "Go", + // node + "node": "NodeJS", + // php + "php": "PHP", + // windows command + "cmd": "Windows Command Prompt", + // linux shell + "sh": "Shell", + "bash": "bash", } -type Executable struct { - Path string `json:"path"` - FileName string `json:"file_name"` - DisplayName string `json:"display_name"` +func GetLocalSystemInfo() (sysInfo entity.SystemInfo, err error) { + executables, err := GetExecutables() + if err != nil { + return sysInfo, err + } + hostname, err := os.Hostname() + if err != nil { + debug.PrintStack() + return sysInfo, err + } + + return entity.SystemInfo{ + ARCH: runtime.GOARCH, + OS: runtime.GOOS, + NumCpu: runtime.GOMAXPROCS(0), + Hostname: hostname, + Executables: executables, + }, nil +} + +func GetSystemEnv(key string) string { + return os.Getenv(key) +} + +func GetPathValues() (paths []string) { + pathEnv := GetSystemEnv("PATH") + return strings.Split(pathEnv, ":") +} + +func GetExecutables() (executables []entity.Executable, err error) { + pathValues := GetPathValues() + + cache := map[string]string{} + + for _, path := range pathValues { + fileList, err := ioutil.ReadDir(path) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + + for _, file := range fileList { + displayName := executableNameMap[file.Name()] + filePath := filepath.Join(path, file.Name()) + + if cache[filePath] == "" { + if displayName != "" { + executables = append(executables, entity.Executable{ + Path: filePath, + FileName: file.Name(), + DisplayName: displayName, + }) + } + cache[filePath] = filePath + } + } + } + return executables, nil } diff --git a/backend/model/task.go b/backend/model/task.go index 8ae782b5..df046ecc 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -4,7 +4,6 @@ import ( "crawlab/constants" "crawlab/database" "github.com/apex/log" - "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "runtime/debug" "time" @@ -19,11 +18,13 @@ type Task struct { NodeId bson.ObjectId `json:"node_id" bson:"node_id"` LogPath string `json:"log_path" bson:"log_path"` Cmd string `json:"cmd" bson:"cmd"` + Param string `json:"param" bson:"param"` Error string `json:"error" bson:"error"` ResultCount int `json:"result_count" bson:"result_count"` WaitDuration float64 `json:"wait_duration" bson:"wait_duration"` RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` TotalDuration float64 `json:"total_duration" bson:"total_duration"` + Pid int `json:"pid" bson:"pid"` // 前端数据 SpiderName string `json:"spider_name"` @@ -116,20 +117,16 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas for i, task := range tasks { // 获取爬虫名称 spider, err := task.GetSpider() - if err == mgo.ErrNotFound { - // do nothing - } else if err != nil { - return tasks, err + if spider.Id.Hex() == "" || err != nil { + _ = spider.Delete() } else { tasks[i].SpiderName = spider.DisplayName } // 获取节点名称 node, err := task.GetNode() - if err == mgo.ErrNotFound { - // do nothing - } else if err != nil { - return tasks, err + if node.Id.Hex() == "" || err != nil { + _ = task.Delete() } else { tasks[i].NodeName = node.Name } @@ -190,6 +187,23 @@ func RemoveTask(id string) error { return nil } +// 删除task by spider_id +func RemoveTaskBySpiderId(id bson.ObjectId) error { + tasks, err := GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") + if err != nil { + log.Error("get tasks error:" + err.Error()) + } + + for _, task := range tasks { + if err := RemoveTask(task.Id); err != nil { + log.Error("remove task error:" + err.Error()) + continue + } + } + return nil +} + +// task 总数 func GetTaskCount(query interface{}) (int, error) { s, c := database.GetCol("tasks") defer s.Close() @@ -207,7 +221,7 @@ func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) { defer s.Close() // 起始日期 - startDate := time.Now().Add(- 30 * 24 * time.Hour) + startDate := time.Now().Add(-30 * 24 * time.Hour) endDate := time.Now() // query @@ -292,6 +306,7 @@ func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) { return dailyItems, nil } +// 更新task的结果数 func UpdateTaskResultCount(id string) (err error) { // 获取任务 task, err := GetTask(id) @@ -327,3 +342,25 @@ func UpdateTaskResultCount(id string) (err error) { } return nil } + +func UpdateTaskToAbnormal(nodeId bson.ObjectId) error { + s, c := database.GetCol("tasks") + defer s.Close() + + selector := bson.M{ + "node_id": nodeId, + "status": constants.StatusRunning, + } + update := bson.M{ + "$set": bson.M{ + "status": constants.StatusAbnormal, + }, + } + _, err := c.UpdateAll(selector, update) + if err != nil { + log.Errorf("update task to abnormal error: %s, node_id : %s", err.Error(), nodeId.Hex()) + debug.PrintStack() + return err + } + return nil +} diff --git a/backend/routes/file.go b/backend/routes/file.go index 435f1fba..eaf43ab5 100644 --- a/backend/routes/file.go +++ b/backend/routes/file.go @@ -1,6 +1,7 @@ package routes import ( + "crawlab/utils" "github.com/gin-gonic/gin" "io/ioutil" "net/http" @@ -15,6 +16,6 @@ func GetFile(c *gin.Context) { c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: string(fileBytes), + Data: utils.BytesToString(fileBytes), }) } diff --git a/backend/routes/node.go b/backend/routes/node.go index f86c152d..7d030773 100644 --- a/backend/routes/node.go +++ b/backend/routes/node.go @@ -15,9 +15,9 @@ func GetNodeList(c *gin.Context) { return } - for i, node := range nodes { - nodes[i].IsMaster = services.IsMasterNode(node.Id.Hex()) - } + //for i, node := range nodes { + // nodes[i].IsMaster = services.IsMasterNode(node.Id.Hex()) + //} c.JSON(http.StatusOK, Response{ Status: "ok", @@ -109,11 +109,11 @@ func GetSystemInfo(c *gin.Context) { }) } -func DeleteNode(c *gin.Context) { +func DeleteNode(c *gin.Context) { id := c.Param("id") node, err := model.GetNode(bson.ObjectIdHex(id)) if err != nil { - HandleError(http.StatusInternalServerError, c ,err) + HandleError(http.StatusInternalServerError, c, err) return } err = node.Delete() diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go index b447abb5..24df0c0f 100644 --- a/backend/routes/schedule.go +++ b/backend/routes/schedule.go @@ -1,7 +1,6 @@ package routes import ( - "crawlab/constants" "crawlab/model" "crawlab/services" "github.com/gin-gonic/gin" @@ -49,9 +48,9 @@ func PostSchedule(c *gin.Context) { newItem.Id = bson.ObjectIdHex(id) // 如果node_id为空,则置为空ObjectId - if newItem.NodeId == "" { - newItem.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } + //if newItem.NodeId == "" { + // newItem.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) + //} // 更新数据库 if err := model.UpdateSchedule(bson.ObjectIdHex(id), newItem); err != nil { @@ -81,9 +80,9 @@ func PutSchedule(c *gin.Context) { } // 如果node_id为空,则置为空ObjectId - if item.NodeId == "" { - item.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } + //if item.NodeId == "" { + // item.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) + //} // 更新数据库 if err := model.AddSchedule(item); err != nil { diff --git a/backend/routes/spider.go b/backend/routes/spider.go index dceb2651..4c26fcee 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -3,6 +3,7 @@ package routes import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/model" "crawlab/services" "crawlab/utils" @@ -11,7 +12,7 @@ import ( "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "github.com/pkg/errors" - uuid "github.com/satori/go.uuid" + "github.com/satori/go.uuid" "github.com/spf13/viper" "io/ioutil" "net/http" @@ -24,7 +25,22 @@ import ( ) func GetSpiderList(c *gin.Context) { - results, err := model.GetSpiderList(nil, 0, 0) + pageNum, _ := c.GetQuery("pageNum") + pageSize, _ := c.GetQuery("pageSize") + keyword, _ := c.GetQuery("keyword") + t, _ := c.GetQuery("type") + + filter := bson.M{ + "name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}, + } + + if t != "" { + filter["type"] = t + } + + page := &entity.Page{} + page.GetPage(pageNum, pageSize) + results, count, err := model.GetSpiderList(filter, page.Skip, page.Limit) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -32,7 +48,7 @@ func GetSpiderList(c *gin.Context) { c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: results, + Data: bson.M{"list": results, "total": count}, }) } @@ -79,18 +95,6 @@ func PostSpider(c *gin.Context) { }) } -func PublishAllSpiders(c *gin.Context) { - if err := services.PublishAllSpiders(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - func PublishSpider(c *gin.Context) { id := c.Param("id") @@ -104,10 +108,7 @@ func PublishSpider(c *gin.Context) { return } - if err := services.PublishSpider(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } + services.PublishSpider(spider) c.JSON(http.StatusOK, Response{ Status: "ok", @@ -117,7 +118,7 @@ func PublishSpider(c *gin.Context) { func PutSpider(c *gin.Context) { // 从body中获取文件 - file, err := c.FormFile("file") + uploadFile, err := c.FormFile("file") if err != nil { debug.PrintStack() HandleError(http.StatusInternalServerError, c, err) @@ -125,7 +126,7 @@ func PutSpider(c *gin.Context) { } // 如果不为zip文件,返回错误 - if !strings.HasSuffix(file.Filename, ".zip") { + if !strings.HasSuffix(uploadFile.Filename, ".zip") { debug.PrintStack() HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file")) return @@ -134,7 +135,7 @@ func PutSpider(c *gin.Context) { // 以防tmp目录不存在 tmpPath := viper.GetString("other.tmppath") if !utils.Exists(tmpPath) { - if err := os.Mkdir(tmpPath, os.ModePerm); err != nil { + if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { log.Error("mkdir other.tmppath dir error:" + err.Error()) debug.PrintStack() HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error")) @@ -145,57 +146,54 @@ func PutSpider(c *gin.Context) { // 保存到本地临时文件 randomId := uuid.NewV4() tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - if err := c.SaveUploadedFile(file, tmpFilePath); err != nil { + if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil { log.Error("save upload file error: " + err.Error()) debug.PrintStack() HandleError(http.StatusInternalServerError, c, err) return } - // 读取临时文件 - tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777) + s, gf := database.GetGridFs("files") + defer s.Close() + + // 判断文件是否已经存在 + var gfFile model.GridFs + if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { + // 已经存在文件,则删除 + _ = gf.RemoveId(gfFile.Id) + } + + // 上传到GridFs + fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) if err != nil { + log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - if err = tmpFile.Close(); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) return } - // 目标目录 - dstPath := filepath.Join( - viper.GetString("spider.path"), - strings.Replace(file.Filename, ".zip", "", 1), - ) + idx := strings.LastIndex(uploadFile.Filename, "/") + targetFilename := uploadFile.Filename[idx+1:] - // 如果目标目录已存在,删除目标目录 - if utils.Exists(dstPath) { - if err := os.RemoveAll(dstPath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) + // 判断爬虫是否存在 + spiderName := strings.Replace(targetFilename, ".zip", "", 1) + spider := model.GetSpiderByName(spiderName) + if spider == nil { + // 保存爬虫信息 + srcPath := viper.GetString("spider.path") + spider := model.Spider{ + Name: spiderName, + DisplayName: spiderName, + Type: constants.Customized, + Src: filepath.Join(srcPath, spiderName), + FileId: fid, } + _ = spider.Add() + } else { + // 更新file_id + spider.FileId = fid + _ = spider.Save() } - // 将临时文件解压到爬虫目录 - if err := utils.DeCompress(tmpFile, dstPath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除临时文件 - if err = os.Remove(tmpFilePath); err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新爬虫 - services.UpdateSpiders() - c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", @@ -210,21 +208,7 @@ func DeleteSpider(c *gin.Context) { return } - // 获取该爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除爬虫文件目录 - if err := os.RemoveAll(spider.Src); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 从数据库中删除该爬虫 - if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil { + if err := services.RemoveSpider(id); err != nil { HandleError(http.StatusInternalServerError, c, err) return } @@ -272,7 +256,8 @@ func GetSpiderDir(c *gin.Context) { } // 获取目录下文件列表 - f, err := ioutil.ReadDir(filepath.Join(spider.Src, path)) + spiderPath := viper.GetString("spider.path") + f, err := ioutil.ReadDir(filepath.Join(spiderPath, spider.Name, path)) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -321,7 +306,7 @@ func GetSpiderFile(c *gin.Context) { c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", - Data: string(fileBytes), + Data: utils.BytesToString(fileBytes), }) } @@ -361,6 +346,20 @@ func PostSpiderFile(c *gin.Context) { }) } +// 爬虫类型 +func GetSpiderTypes(c *gin.Context) { + types, err := model.GetSpiderTypes() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: types, + }) +} + func GetSpiderStats(c *gin.Context) { type Overview struct { TaskCount int `json:"task_count" bson:"task_count"` diff --git a/backend/routes/task.go b/backend/routes/task.go index e5efa425..c84ea210 100644 --- a/backend/routes/task.go +++ b/backend/routes/task.go @@ -124,6 +124,13 @@ func PutTask(c *gin.Context) { func DeleteTask(c *gin.Context) { id := c.Param("id") + // 删除日志文件 + if err := services.RemoveLogByTaskId(id); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 删除task if err := model.RemoveTask(id); err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -215,7 +222,7 @@ func DownloadTaskResultsCsv(c *gin.Context) { bytesBuffer := &bytes.Buffer{} // 写入UTF-8 BOM,避免使用Microsoft Excel打开乱码 - bytesBuffer.Write([]byte("\xEF\xBB\xBF")) + bytesBuffer.WriteString("\xEF\xBB\xBF") writer := csv.NewWriter(bytesBuffer) diff --git a/backend/routes/user.go b/backend/routes/user.go index a3d5a431..a6d44cae 100644 --- a/backend/routes/user.go +++ b/backend/routes/user.go @@ -4,6 +4,7 @@ import ( "crawlab/constants" "crawlab/model" "crawlab/services" + "crawlab/services/context" "crawlab/utils" "github.com/gin-gonic/gin" "github.com/globalsign/mgo/bson" @@ -171,7 +172,7 @@ func Login(c *gin.Context) { } // 获取token - tokenStr, err := services.GetToken(user.Username) + tokenStr, err := services.MakeToken(&user) if err != nil { HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) return @@ -185,20 +186,16 @@ func Login(c *gin.Context) { } func GetMe(c *gin.Context) { - // 获取token string - tokenStr := c.GetHeader("Authorization") - - // 校验token - user, err := services.CheckToken(tokenStr) - if err != nil { - HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) + ctx := context.WithGinContext(c) + user := ctx.User() + if user == nil { + ctx.FailedWithError(constants.ErrorUserNotFound, http.StatusUnauthorized) return } - user.Password = "" - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: user, - }) + ctx.Success(struct { + *model.User + Password string `json:"password,omitempty"` + }{ + User: user, + }, nil) } diff --git a/backend/routes/utils.go b/backend/routes/utils.go index 14c5853e..38ca35bb 100644 --- a/backend/routes/utils.go +++ b/backend/routes/utils.go @@ -1,13 +1,15 @@ package routes import ( + "github.com/apex/log" "github.com/gin-gonic/gin" "runtime/debug" ) func HandleError(statusCode int, c *gin.Context, err error) { + log.Errorf("handle error:" + err.Error()) debug.PrintStack() - c.JSON(statusCode, Response{ + c.AbortWithStatusJSON(statusCode, Response{ Status: "ok", Message: "error", Error: err.Error(), @@ -16,7 +18,7 @@ func HandleError(statusCode int, c *gin.Context, err error) { func HandleErrorF(statusCode int, c *gin.Context, err string) { debug.PrintStack() - c.JSON(statusCode, Response{ + c.AbortWithStatusJSON(statusCode, Response{ Status: "ok", Message: "error", Error: err, diff --git a/backend/services/context/context.go b/backend/services/context/context.go new file mode 100644 index 00000000..ce8eb72e --- /dev/null +++ b/backend/services/context/context.go @@ -0,0 +1,100 @@ +package context + +import ( + "crawlab/constants" + "crawlab/errors" + "crawlab/model" + "fmt" + "github.com/apex/log" + "github.com/gin-gonic/gin" + errors2 "github.com/pkg/errors" + "gopkg.in/go-playground/validator.v9" + "net/http" + "runtime/debug" +) + +type Context struct { + *gin.Context +} + +func (c *Context) User() *model.User { + userIfe, exists := c.Get(constants.ContextUser) + if !exists { + return nil + } + user, ok := userIfe.(*model.User) + if !ok { + return nil + } + return user +} +func (c *Context) Success(data interface{}, metas ...interface{}) { + var meta interface{} + if len(metas) == 0 { + meta = gin.H{} + } else { + meta = metas[0] + } + if data == nil { + data = gin.H{} + } + c.JSON(http.StatusOK, gin.H{ + "status": "ok", + "message": "success", + "data": data, + "meta": meta, + "error": "", + }) +} +func (c *Context) Failed(err error, variables ...interface{}) { + c.failed(err, http.StatusOK, variables...) +} +func (c *Context) failed(err error, httpCode int, variables ...interface{}) { + errStr := err.Error() + if len(variables) > 0 { + errStr = fmt.Sprintf(errStr, variables...) + } + log.Errorf("handle error:" + errStr) + debug.PrintStack() + causeError := errors2.Cause(err) + switch causeError.(type) { + case errors.OPError: + opError := causeError.(errors.OPError) + + c.AbortWithStatusJSON(opError.HttpCode, gin.H{ + "status": "ok", + "message": "error", + "error": errStr, + }) + break + case validator.ValidationErrors: + validatorErrors := causeError.(validator.ValidationErrors) + //firstError := validatorErrors[0].(validator.FieldError) + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{ + "status": "ok", + "message": "error", + "error": validatorErrors.Error(), + }) + break + default: + fmt.Println("deprecated....") + c.AbortWithStatusJSON(httpCode, gin.H{ + "status": "ok", + "message": "error", + "error": errStr, + }) + } +} +func (c *Context) FailedWithError(err error, httpCode ...int) { + + var code = 200 + if len(httpCode) > 0 { + code = httpCode[0] + } + c.failed(err, code) + +} + +func WithGinContext(context *gin.Context) *Context { + return &Context{Context: context} +} diff --git a/backend/services/log.go b/backend/services/log.go index d59e463e..81140c0a 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -3,32 +3,28 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" + "crawlab/lib/cron" "crawlab/model" "crawlab/utils" "encoding/json" "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "github.com/spf13/viper" "io/ioutil" + "os" + "path/filepath" "runtime/debug" + "time" ) // 任务日志频道映射 var TaskLogChanMap = utils.NewChanMap() -// 获取本地日志 -func GetLocalLog(logPath string) (fileBytes []byte, err error) { - fileBytes, err = ioutil.ReadFile(logPath) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return fileBytes, err - } - return fileBytes, nil -} - // 获取远端日志 func GetRemoteLog(task model.Task) (logStr string, err error) { // 序列化消息 - msg := NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeGetLog, LogPath: task.LogPath, TaskId: task.Id, @@ -42,7 +38,7 @@ func GetRemoteLog(task model.Task) (logStr string, err error) { // 发布获取日志消息 channel := "nodes:" + task.NodeId.Hex() - if err := database.Publish(channel, string(msgBytes)); err != nil { + if _, err := database.RedisClient.Publish(channel, utils.BytesToString(msgBytes)); err != nil { log.Errorf(err.Error()) return "", err } @@ -50,8 +46,111 @@ func GetRemoteLog(task model.Task) (logStr string, err error) { // 生成频道,等待获取log ch := TaskLogChanMap.ChanBlocked(task.Id) - // 此处阻塞,等待结果 - logStr = <-ch + select { + case logStr = <-ch: + log.Infof("get remote log") + break + case <-time.After(5 * time.Second): + logStr = "get remote log timeout" + break + } return logStr, nil } + +// 定时删除日志 +func DeleteLogPeriodically() { + logDir := viper.GetString("log.path") + if !utils.Exists(logDir) { + log.Error("Can Not Set Delete Logs Periodically,No Log Dir") + return + } + rd, err := ioutil.ReadDir(logDir) + if err != nil { + log.Error("Read Log Dir Failed") + return + } + + for _, fi := range rd { + if fi.IsDir() { + log.Info(filepath.Join(logDir, fi.Name())) + os.RemoveAll(filepath.Join(logDir, fi.Name())) + log.Info("Delete Log File Success") + } + } + +} + +// 删除本地日志 +func RemoveLocalLog(path string) error { + if err := model.RemoveFile(path); err != nil { + log.Error("remove local file error: " + err.Error()) + return err + } + return nil +} + +// 删除远程日志 +func RemoveRemoteLog(task model.Task) error { + msg := entity.NodeMessage{ + Type: constants.MsgTypeRemoveLog, + LogPath: task.LogPath, + TaskId: task.Id, + } + // 发布获取日志消息 + channel := "nodes:" + task.NodeId.Hex() + if _, err := database.RedisClient.Publish(channel, utils.GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} + +// 删除日志文件 +func RemoveLogByTaskId(id string) error { + t, err := model.GetTask(id) + if err != nil { + log.Error("get task error:" + err.Error()) + return err + } + removeLog(t) + + return nil +} + +func removeLog(t model.Task) { + if err := RemoveLocalLog(t.LogPath); err != nil { + log.Errorf("remove local log error: %s", err.Error()) + debug.PrintStack() + } + if err := RemoveRemoteLog(t); err != nil { + log.Errorf("remove remote log error: %s", err.Error()) + debug.PrintStack() + } +} + +// 删除日志文件 +func RemoveLogBySpiderId(id bson.ObjectId) error { + tasks, err := model.GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") + if err != nil { + log.Errorf("get tasks error: %s", err.Error()) + debug.PrintStack() + } + for _, task := range tasks { + removeLog(task) + } + return nil +} + +// 初始化定时删除日志 +func InitDeleteLogPeriodically() error { + c := cron.New(cron.WithSeconds()) + if _, err := c.AddFunc(viper.GetString("log.deleteFrequency"), DeleteLogPeriodically); err != nil { + return err + } + + c.Start() + return nil + +} diff --git a/backend/services/log_test.go b/backend/services/log_test.go new file mode 100644 index 00000000..1e9a21c7 --- /dev/null +++ b/backend/services/log_test.go @@ -0,0 +1,51 @@ +package services + +import ( + "crawlab/config" + "crawlab/utils" + "fmt" + "github.com/apex/log" + . "github.com/smartystreets/goconvey/convey" + "github.com/spf13/viper" + "os" + "testing" +) + +func TestDeleteLogPeriodically(t *testing.T) { + Convey("Test DeleteLogPeriodically", t, func() { + if err := config.InitConfig("../conf/config.yml"); err != nil { + log.Error("init config error:" + err.Error()) + panic(err) + } + log.Info("初始化配置成功") + logDir := viper.GetString("log.path") + log.Info(logDir) + DeleteLogPeriodically() + }) +} + +func TestGetLocalLog(t *testing.T) { + //create a log file for test + logPath := "../logs/crawlab/test.log" + f, err := os.Create(logPath) + defer f.Close() + if err != nil { + fmt.Println(err.Error()) + + } else { + _, err = f.WriteString("This is for test") + } + + Convey("Test GetLocalLog", t, func() { + Convey("Test response", func() { + logStr, err := GetLocalLog(logPath) + log.Info(utils.BytesToString(logStr)) + fmt.Println(err) + So(err, ShouldEqual, nil) + + }) + }) + //delete the test log file + os.Remove(logPath) + +} diff --git a/backend/services/msg_handler/handler.go b/backend/services/msg_handler/handler.go new file mode 100644 index 00000000..b8b8e231 --- /dev/null +++ b/backend/services/msg_handler/handler.go @@ -0,0 +1,37 @@ +package msg_handler + +import ( + "crawlab/constants" + "crawlab/entity" + "github.com/apex/log" +) + +type Handler interface { + Handle() error +} + +func GetMsgHandler(msg entity.NodeMessage) Handler { + log.Infof("received msg , type is : %s", msg.Type) + if msg.Type == constants.MsgTypeGetLog || msg.Type == constants.MsgTypeRemoveLog { + // 日志相关 + return &Log{ + msg: msg, + } + } else if msg.Type == constants.MsgTypeCancelTask { + // 任务相关 + return &Task{ + msg: msg, + } + } else if msg.Type == constants.MsgTypeGetSystemInfo { + // 系统信息相关 + return &SystemInfo{ + msg: msg, + } + } else if msg.Type == constants.MsgTypeRemoveSpider { + // 爬虫相关 + return &Spider{ + SpiderId: msg.SpiderId, + } + } + return nil +} diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go new file mode 100644 index 00000000..b865f4e3 --- /dev/null +++ b/backend/services/msg_handler/msg_log.go @@ -0,0 +1,53 @@ +package msg_handler + +import ( + "crawlab/constants" + "crawlab/entity" + "crawlab/model" + "crawlab/utils" + "github.com/apex/log" + "runtime/debug" +) + +type Log struct { + msg entity.NodeMessage +} + +func (g *Log) Handle() error { + if g.msg.Type == constants.MsgTypeGetLog { + return g.get() + } else if g.msg.Type == constants.MsgTypeRemoveLog { + return g.remove() + } + return nil +} + +func (g *Log) get() error { + // 发出的消息 + msgSd := entity.NodeMessage{ + Type: constants.MsgTypeGetLog, + TaskId: g.msg.TaskId, + } + // 获取本地日志 + logStr, err := model.GetLocalLog(g.msg.LogPath) + if err != nil { + log.Errorf("get node local log error: %s", err.Error()) + debug.PrintStack() + msgSd.Error = err.Error() + msgSd.Log = err.Error() + } else { + msgSd.Log = utils.BytesToString(logStr) + } + // 发布消息给主节点 + if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + log.Errorf("pub log to master node error: %s", err.Error()) + debug.PrintStack() + return err + } + log.Infof(msgSd.Log) + return nil +} + +func (g *Log) remove() error { + return model.RemoveFile(g.msg.LogPath) +} diff --git a/backend/services/msg_handler/msg_spider.go b/backend/services/msg_handler/msg_spider.go new file mode 100644 index 00000000..dcd6ce06 --- /dev/null +++ b/backend/services/msg_handler/msg_spider.go @@ -0,0 +1,24 @@ +package msg_handler + +import ( + "crawlab/model" + "crawlab/utils" + "github.com/globalsign/mgo/bson" + "github.com/spf13/viper" + "path/filepath" +) + +type Spider struct { + SpiderId string +} + +func (s *Spider) Handle() error { + // 移除本地的爬虫目录 + spider, err := model.GetSpider(bson.ObjectIdHex(s.SpiderId)) + if err != nil { + return err + } + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + utils.RemoveFiles(path) + return nil +} diff --git a/backend/services/msg_handler/msg_system_info.go b/backend/services/msg_handler/msg_system_info.go new file mode 100644 index 00000000..6b88e2cf --- /dev/null +++ b/backend/services/msg_handler/msg_system_info.go @@ -0,0 +1,29 @@ +package msg_handler + +import ( + "crawlab/constants" + "crawlab/entity" + "crawlab/model" + "crawlab/utils" +) + +type SystemInfo struct { + msg entity.NodeMessage +} + +func (s *SystemInfo) Handle() error { + // 获取环境信息 + sysInfo, err := model.GetLocalSystemInfo() + if err != nil { + return err + } + msgSd := entity.NodeMessage{ + Type: constants.MsgTypeGetSystemInfo, + NodeId: s.msg.NodeId, + SysInfo: sysInfo, + } + if err := utils.Pub(constants.ChannelMasterNode, msgSd); err != nil { + return err + } + return nil +} diff --git a/backend/services/msg_handler/msg_task.go b/backend/services/msg_handler/msg_task.go new file mode 100644 index 00000000..21b95430 --- /dev/null +++ b/backend/services/msg_handler/msg_task.go @@ -0,0 +1,40 @@ +package msg_handler + +import ( + "crawlab/constants" + "crawlab/entity" + "crawlab/model" + "crawlab/utils" + "github.com/apex/log" + "runtime/debug" + "time" +) + +type Task struct { + msg entity.NodeMessage +} + +func (t *Task) Handle() error { + log.Infof("received cancel task msg, task_id: %s", t.msg.TaskId) + // 取消任务 + ch := utils.TaskExecChanMap.ChanBlocked(t.msg.TaskId) + if ch != nil { + ch <- constants.TaskCancel + } else { + log.Infof("chan is empty, update status to abnormal") + // 节点可能被重启,找不到chan + task, err := model.GetTask(t.msg.TaskId) + if err != nil { + log.Errorf("task not found, task_id: %s", t.msg.TaskId) + debug.PrintStack() + return err + } + task.Status = constants.StatusAbnormal + task.FinishTs = time.Now() + if err := task.Save(); err != nil { + debug.PrintStack() + log.Infof("cancel task error: %s", err.Error()) + } + } + return nil +} diff --git a/backend/services/node.go b/backend/services/node.go index 1fa2370c..04cbc0ef 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -3,14 +3,17 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" + "crawlab/services/msg_handler" "crawlab/services/register" + "crawlab/utils" "encoding/json" "fmt" "github.com/apex/log" "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" + "github.com/gomodule/redigo/redis" "runtime/debug" "time" ) @@ -24,109 +27,10 @@ type Data struct { UpdateTsUnix int64 `json:"update_ts_unix"` } -type NodeMessage struct { - // 通信类别 - Type string `json:"type"` - - // 任务相关 - TaskId string `json:"task_id"` // 任务ID - - // 节点相关 - NodeId string `json:"node_id"` // 节点ID - - // 日志相关 - LogPath string `json:"log_path"` // 日志路径 - Log string `json:"log"` // 日志 - - // 系统信息 - SysInfo model.SystemInfo `json:"sys_info"` - - // 错误相关 - Error string `json:"error"` -} - -const ( - Yes = "Y" - No = "N" -) - -// 获取本机节点 -func GetCurrentNode() (model.Node, error) { - // 获得注册的key值 - key, err := register.GetRegister().GetKey() - if err != nil { - return model.Node{}, err - } - - // 从数据库中获取当前节点 - var node model.Node - errNum := 0 - for { - // 如果错误次数超过10次 - if errNum >= 10 { - panic("cannot get current node") - } - - // 尝试获取节点 - node, err = model.GetNodeByKey(key) - // 如果获取失败 - if err != nil { - // 如果为主节点,表示为第一次注册,插入节点信息 - if IsMaster() { - // 获取本机IP地址 - ip, err := register.GetRegister().GetIp() - if err != nil { - debug.PrintStack() - return model.Node{}, err - } - - mac, err := register.GetRegister().GetMac() - if err != nil { - debug.PrintStack() - return model.Node{}, err - } - - key, err := register.GetRegister().GetKey() - if err != nil { - debug.PrintStack() - return model.Node{}, err - } - - // 生成节点 - node = model.Node{ - Key: key, - Id: bson.NewObjectId(), - Ip: ip, - Name: key, - Mac: mac, - IsMaster: true, - } - if err := node.Add(); err != nil { - return node, err - } - return node, nil - } - // 增加错误次数 - errNum++ - - // 5秒后重试 - time.Sleep(5 * time.Second) - continue - } - // 跳出循环 - break - } - return node, nil -} - -// 当前节点是否为主节点 -func IsMaster() bool { - return viper.GetString("server.master") == Yes -} - +// 所有调用IsMasterNode的方法,都永远会在master节点执行,所以GetCurrentNode方法返回永远是master节点 // 该ID的节点是否为主节点 func IsMasterNode(id string) bool { - curNode, _ := GetCurrentNode() + curNode, _ := model.GetCurrentNode() node, _ := model.GetNode(bson.ObjectIdHex(id)) return curNode.Id == node.Id } @@ -176,72 +80,60 @@ func UpdateNodeStatus() { // 在Redis中删除该节点 if err := database.RedisClient.HDel("nodes", data.Key); err != nil { log.Errorf(err.Error()) - return - } - - // 在MongoDB中该节点设置状态为离线 - s, c := database.GetCol("nodes") - defer s.Close() - var node model.Node - if err := c.Find(bson.M{"key": key}).One(&node); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - node.Status = constants.StatusOffline - if err := node.Save(); err != nil { - log.Errorf(err.Error()) - return } continue } - // 更新节点信息到数据库 - s, c := database.GetCol("nodes") - defer s.Close() - var node model.Node - if err := c.Find(bson.M{"key": key}).One(&node); err != nil { - // 数据库不存在该节点 - node = model.Node{ - Key: key, - Name: key, - Ip: data.Ip, - Port: "8000", - Mac: data.Mac, - Status: constants.StatusOnline, - IsMaster: data.Master, - } - if err := node.Add(); err != nil { - log.Errorf(err.Error()) - return - } - } else { - // 数据库存在该节点 - node.Status = constants.StatusOnline - if err := node.Save(); err != nil { - log.Errorf(err.Error()) - return - } - } + // 处理node信息 + handleNodeInfo(key, data) } - // 遍历数据库中的节点列表 - nodes, err := model.GetNodeList(nil) - for _, node := range nodes { - hasNode := false - for _, key := range list { - if key == node.Key { - hasNode = true - break - } + // 重新获取list + list, _ = database.RedisClient.HKeys("nodes") + // 重置不在redis的key为offline + model.ResetNodeStatusToOffline(list) +} + +func handleNodeInfo(key string, data Data) { + // 更新节点信息到数据库 + s, c := database.GetCol("nodes") + defer s.Close() + + // 同个key可能因为并发,被注册多次 + //var nodes []model.Node + //_ = c.Find(bson.M{"key": key}).All(&nodes) + //if nodes != nil && len(nodes) > 1 { + // for _, node := range nodes { + // _ = c.RemoveId(node.Id) + // } + //} + + var node model.Node + if err := c.Find(bson.M{"key": key}).One(&node); err != nil { + // 数据库不存在该节点 + node = model.Node{ + Key: key, + Name: data.Ip, + Ip: data.Ip, + Port: "8000", + Mac: data.Mac, + Status: constants.StatusOnline, + IsMaster: data.Master, + UpdateTs: time.Now(), + UpdateTsUnix: time.Now().Unix(), } - if !hasNode { - node.Status = constants.StatusOffline - if err := node.Save(); err != nil { - log.Errorf(err.Error()) - return - } - continue + if err := node.Add(); err != nil { + log.Errorf(err.Error()) + return + } + } else { + // 数据库存在该节点 + node.Status = constants.StatusOnline + node.UpdateTs = time.Now() + node.UpdateTsUnix = time.Now().Unix() + if err := node.Save(); err != nil { + log.Errorf(err.Error()) + return } } } @@ -269,7 +161,7 @@ func UpdateNodeData() { Key: key, Mac: mac, Ip: ip, - Master: IsMaster(), + Master: model.IsMaster(), UpdateTs: time.Now(), UpdateTsUnix: time.Now().Unix(), } @@ -281,24 +173,22 @@ func UpdateNodeData() { debug.PrintStack() return } - if err := database.RedisClient.HSet("nodes", key, string(dataBytes)); err != nil { + if err := database.RedisClient.HSet("nodes", key, utils.BytesToString(dataBytes)); err != nil { log.Errorf(err.Error()) return } } -func MasterNodeCallback(channel string, msgStr string) { +func MasterNodeCallback(message redis.Message) (err error) { // 反序列化 - var msg NodeMessage - if err := json.Unmarshal([]byte(msgStr), &msg); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + var msg entity.NodeMessage + if err := json.Unmarshal(message.Data, &msg); err != nil { + + return err } if msg.Type == constants.MsgTypeGetLog { // 获取日志 - fmt.Println(msg) time.Sleep(10 * time.Millisecond) ch := TaskLogChanMap.ChanBlocked(msg.TaskId) ch <- msg.Log @@ -308,80 +198,20 @@ func MasterNodeCallback(channel string, msgStr string) { time.Sleep(10 * time.Millisecond) ch := SystemInfoChanMap.ChanBlocked(msg.NodeId) sysInfoBytes, _ := json.Marshal(&msg.SysInfo) - ch <- string(sysInfoBytes) + ch <- utils.BytesToString(sysInfoBytes) } + return nil } -func WorkerNodeCallback(channel string, msgStr string) { +func WorkerNodeCallback(message redis.Message) (err error) { // 反序列化 - msg := NodeMessage{} - fmt.Println(msgStr) - if err := json.Unmarshal([]byte(msgStr), &msg); err != nil { - log.Errorf(err.Error()) + msg := utils.GetMessage(message) + if err := msg_handler.GetMsgHandler(*msg).Handle(); err != nil { + log.Errorf("msg handler error: %s", err.Error()) debug.PrintStack() - return - } - - if msg.Type == constants.MsgTypeGetLog { - // 消息类型为获取日志 - - // 发出的消息 - msgSd := NodeMessage{ - Type: constants.MsgTypeGetLog, - TaskId: msg.TaskId, - } - - // 获取本地日志 - logStr, err := GetLocalLog(msg.LogPath) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - msgSd.Error = err.Error() - } - msgSd.Log = string(logStr) - - // 序列化 - msgSdBytes, err := json.Marshal(&msgSd) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 发布消息给主节点 - fmt.Println(msgSd) - if err := database.Publish("nodes:master", string(msgSdBytes)); err != nil { - log.Errorf(err.Error()) - return - } - } else if msg.Type == constants.MsgTypeCancelTask { - // 取消任务 - ch := TaskExecChanMap.ChanBlocked(msg.TaskId) - ch <- constants.TaskCancel - } else if msg.Type == constants.MsgTypeGetSystemInfo { - // 获取环境信息 - sysInfo, err := GetLocalSystemInfo() - if err != nil { - log.Errorf(err.Error()) - return - } - msgSd := NodeMessage{ - Type: constants.MsgTypeGetSystemInfo, - NodeId: msg.NodeId, - SysInfo: sysInfo, - } - msgSdBytes, err := json.Marshal(&msgSd) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - fmt.Println(msgSd) - if err := database.Publish("nodes:master", string(msgSdBytes)); err != nil { - log.Errorf(err.Error()) - return - } + return err } + return nil } // 初始化节点服务 @@ -397,31 +227,35 @@ func InitNodeService() error { } // 首次更新节点数据(注册到Redis) - UpdateNodeData() - - // 消息订阅 - var sub database.Subscriber - sub.Connect() + // UpdateNodeData() // 获取当前节点 - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { log.Errorf(err.Error()) return err } - if IsMaster() { + if model.IsMaster() { // 如果为主节点,订阅主节点通信频道 - channel := "nodes:master" - sub.Subscribe(channel, MasterNodeCallback) + if err := utils.Sub(constants.ChannelMasterNode, MasterNodeCallback); err != nil { + return err + } } else { // 若为工作节点,订阅单独指定通信频道 - channel := "nodes:" + node.Id.Hex() - sub.Subscribe(channel, WorkerNodeCallback) + channel := constants.ChannelWorkerNode + node.Id.Hex() + if err := utils.Sub(channel, WorkerNodeCallback); err != nil { + return err + } + } + + // 订阅全通道 + if err := utils.Sub(constants.ChannelAllNode, WorkerNodeCallback); err != nil { + return err } // 如果为主节点,每30秒刷新所有节点信息 - if IsMaster() { + if model.IsMaster() { spec := "*/10 * * * * *" if _, err := c.AddFunc(spec, UpdateNodeStatus); err != nil { debug.PrintStack() @@ -429,6 +263,12 @@ func InitNodeService() error { } } + // 更新在当前节点执行中的任务状态为:abnormal + if err := model.UpdateTaskToAbnormal(node.Id); err != nil { + debug.PrintStack() + return err + } + c.Start() return nil } diff --git a/backend/services/schedule.go b/backend/services/schedule.go index 916e42d0..f011f02a 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -17,7 +17,19 @@ type Scheduler struct { func AddTask(s model.Schedule) func() { return func() { - nodeId := s.NodeId + node, err := model.GetNodeByKey(s.NodeKey) + if err != nil || node.Id.Hex() == "" { + log.Errorf("get node by key error: %s", err.Error()) + debug.PrintStack() + return + } + + spider := model.GetSpiderByName(s.SpiderName) + if spider == nil || spider.Id.Hex() == "" { + log.Errorf("get spider by name error: %s", err.Error()) + debug.PrintStack() + return + } // 生成任务ID id := uuid.NewV4() @@ -25,9 +37,10 @@ func AddTask(s model.Schedule) func() { // 生成任务模型 t := model.Task{ Id: id.String(), - SpiderId: s.SpiderId, - NodeId: nodeId, + SpiderId: spider.Id, + NodeId: node.Id, Status: constants.StatusPending, + Param: s.Param, } // 将任务存入数据库 @@ -61,12 +74,16 @@ func (s *Scheduler) Start() error { // 更新任务列表 if err := s.Update(); err != nil { + log.Errorf("update scheduler error: %s", err.Error()) + debug.PrintStack() return err } // 每30秒更新一次任务列表 spec := "*/30 * * * * *" if _, err := exec.AddFunc(spec, UpdateSchedules); err != nil { + log.Errorf("add func update schedulers error: %s", err.Error()) + debug.PrintStack() return err } @@ -79,12 +96,16 @@ func (s *Scheduler) AddJob(job model.Schedule) error { // 添加任务 eid, err := s.cron.AddFunc(spec, AddTask(job)) if err != nil { + log.Errorf("add func task error: %s", err.Error()) + debug.PrintStack() return err } // 更新EntryID job.EntryId = eid if err := job.Save(); err != nil { + log.Errorf("job save error: %s", err.Error()) + debug.PrintStack() return err } @@ -105,6 +126,8 @@ func (s *Scheduler) Update() error { // 获取所有定时任务 sList, err := model.GetScheduleList(nil) if err != nil { + log.Errorf("get scheduler list error: %s", err.Error()) + debug.PrintStack() return err } @@ -115,6 +138,8 @@ func (s *Scheduler) Update() error { // 添加到定时任务 if err := s.AddJob(job); err != nil { + log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), job.Name, job.Cron) + debug.PrintStack() return err } } @@ -127,6 +152,8 @@ func InitScheduler() error { cron: cron.New(cron.WithSeconds()), } if err := Sched.Start(); err != nil { + log.Errorf("start scheduler error: %s", err.Error()) + debug.PrintStack() return err } return nil diff --git a/backend/services/spider.go b/backend/services/spider.go index f4f856e6..7aea456f 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -3,19 +3,16 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" + "crawlab/services/spider_handler" "crawlab/utils" - "encoding/json" "fmt" "github.com/apex/log" "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" - "github.com/pkg/errors" - "github.com/satori/go.uuid" "github.com/spf13/viper" - "io" - "io/ioutil" "os" "path/filepath" "runtime/debug" @@ -30,151 +27,17 @@ type SpiderFileData struct { type SpiderUploadMessage struct { FileId string FileName string -} - -// 从项目目录中获取爬虫列表 -func GetSpidersFromDir() ([]model.Spider, error) { - // 爬虫项目目录路径 - srcPath := viper.GetString("spider.path") - - // 如果爬虫项目目录不存在,则创建一个 - if !utils.Exists(srcPath) { - if err := os.MkdirAll(srcPath, 0666); err != nil { - debug.PrintStack() - return []model.Spider{}, err - } - } - - // 获取爬虫项目目录下的所有子项 - items, err := ioutil.ReadDir(srcPath) - if err != nil { - debug.PrintStack() - return []model.Spider{}, err - } - - // 定义爬虫列表 - spiders := make([]model.Spider, 0) - - // 遍历所有子项 - for _, item := range items { - // 忽略不为目录的子项 - if !item.IsDir() { - continue - } - - // 忽略隐藏目录 - if strings.HasPrefix(item.Name(), ".") { - continue - } - - // 构造爬虫 - spider := model.Spider{ - Name: item.Name(), - DisplayName: item.Name(), - Type: constants.Customized, - Src: filepath.Join(srcPath, item.Name()), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - } - - // 将爬虫加入列表 - spiders = append(spiders, spider) - } - - return spiders, nil -} - -// 将爬虫保存到数据库 -func SaveSpiders(spiders []model.Spider) error { - // 遍历爬虫列表 - for _, spider := range spiders { - // 忽略非自定义爬虫 - if spider.Type != constants.Customized { - continue - } - - // 如果该爬虫不存在于数据库,则保存爬虫到数据库 - s, c := database.GetCol("spiders") - defer s.Close() - var spider_ *model.Spider - if err := c.Find(bson.M{"src": spider.Src}).One(&spider_); err != nil { - // 不存在 - if err := spider.Add(); err != nil { - debug.PrintStack() - return err - } - } else { - // 存在 - } - } - - return nil -} - -// 更新爬虫 -func UpdateSpiders() { - // 从项目目录获取爬虫列表 - spiders, err := GetSpidersFromDir() - if err != nil { - log.Errorf(err.Error()) - return - } - - // 储存爬虫 - if err := SaveSpiders(spiders); err != nil { - log.Errorf(err.Error()) - return - } -} - -// 打包爬虫目录为zip文件 -func ZipSpider(spider model.Spider) (filePath string, err error) { - // 如果源文件夹不存在,抛错 - if !utils.Exists(spider.Src) { - debug.PrintStack() - return "", errors.New("source path does not exist") - } - - // 临时文件路径 - randomId := uuid.NewV4() - if err != nil { - debug.PrintStack() - return "", err - } - filePath = filepath.Join( - viper.GetString("other.tmppath"), - randomId.String()+".zip", - ) - - // 将源文件夹打包为zip文件 - d, err := os.Open(spider.Src) - if err != nil { - debug.PrintStack() - return filePath, err - } - var files []*os.File - files = append(files, d) - if err := utils.Compress(files, filePath); err != nil { - return filePath, err - } - - return filePath, nil + SpiderId string } // 上传zip文件到GridFS -func UploadToGridFs(spider model.Spider, fileName string, filePath string) (fid bson.ObjectId, err error) { +func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" // 获取MongoDB GridFS连接 s, gf := database.GetGridFs("files") defer s.Close() - // 如果存在FileId删除GridFS上的老文件 - if !utils.IsObjectIdNull(spider.FileId) { - if err = gf.RemoveId(spider.FileId); err != nil { - debug.PrintStack() - } - } - // 创建一个新GridFS文件 f, err := gf.Create(fileName) if err != nil { @@ -204,6 +67,7 @@ func UploadToGridFs(spider model.Spider, fileName string, filePath string) (fid return fid, nil } +// 写入grid fs func WriteToGridFS(content []byte, f *mgo.GridFile) { if _, err := f.Write(content); err != nil { debug.PrintStack() @@ -223,7 +87,7 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre for { switch nr, err := f.Read(s[:]); true { case nr < 0: - fmt.Fprintf(os.Stderr, "cat: error reading: %s\n", err.Error()) + _, _ = fmt.Fprintf(os.Stderr, "cat: error reading: %s\n", err.Error()) debug.PrintStack() case nr == 0: // EOF return nil @@ -231,174 +95,114 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre handle(s[0:nr], fileCreate) } } - return nil } // 发布所有爬虫 -func PublishAllSpiders() error { +func PublishAllSpiders() { // 获取爬虫列表 - spiders, err := model.GetSpiderList(nil, 0, constants.Infinite) - if err != nil { - log.Errorf(err.Error()) - return err + spiders, _, _ := model.GetSpiderList(nil, 0, constants.Infinite) + if len(spiders) == 0 { + return } - + log.Infof("start sync spider to local, total: %d", len(spiders)) // 遍历爬虫列表 for _, spider := range spiders { - // 发布爬虫 - if err := PublishSpider(spider); err != nil { - log.Errorf(err.Error()) - return err - } - } - - return nil -} - -func PublishAllSpidersJob() { - if err := PublishAllSpiders(); err != nil { - log.Errorf(err.Error()) + // 异步发布爬虫 + go func(s model.Spider) { + PublishSpider(s) + }(spider) } } // 发布爬虫 -// 1. 将源文件夹打包为zip文件 -// 2. 上传zip文件到GridFS -// 3. 发布消息给工作节点 -func PublishSpider(spider model.Spider) (err error) { - // 将源文件夹打包为zip文件 - filePath, err := ZipSpider(spider) - if err != nil { - return err - } - - // 上传zip文件到GridFS - fileName := filepath.Base(spider.Src) + ".zip" - fid, err := UploadToGridFs(spider, fileName, filePath) - if err != nil { - return err - } - - // 保存FileId - spider.FileId = fid - if err := spider.Save(); err != nil { - return err - } - - // 发布消息给工作节点 - msg := SpiderUploadMessage{ - FileId: fid.Hex(), - FileName: fileName, - } - msgStr, err := json.Marshal(msg) - if err != nil { +func PublishSpider(spider model.Spider) { + // 查询gf file,不存在则删除 + gfFile := model.GetGridFs(spider.FileId) + if gfFile == nil { + _ = model.RemoveSpider(spider.Id) return } - channel := "files:upload" - if err = database.Publish(channel, string(msgStr)); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + spiderSync := spider_handler.SpiderSync{ + Spider: spider, } - return + //目录不存在,则直接下载 + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + if !utils.Exists(path) { + log.Infof("path not found: %s", path) + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) + return + } + // md5文件不存在,则下载 + md5 := filepath.Join(path, spider_handler.Md5File) + if !utils.Exists(md5) { + log.Infof("md5 file not found: %s", md5) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) + return + } + // md5值不一样,则下载 + md5Str := utils.ReadFileOneLine(md5) + // 去掉空格以及换行符 + md5Str = strings.Replace(md5Str, " ", "", -1) + md5Str = strings.Replace(md5Str, "\n", "", -1) + if gfFile.Md5 != md5Str { + log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str) + spiderSync.RemoveSpiderFile() + spiderSync.Download() + spiderSync.CreateMd5File(gfFile.Md5) + return + } } -// 上传爬虫回调 -func OnFileUpload(channel string, msgStr string) { - s, gf := database.GetGridFs("files") - defer s.Close() - - // 反序列化消息 - var msg SpiderUploadMessage - if err := json.Unmarshal([]byte(msgStr), &msg); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 从GridFS获取该文件 - f, err := gf.OpenId(bson.ObjectIdHex(msg.FileId)) +func RemoveSpider(id string) error { + // 获取该爬虫 + spider, err := model.GetSpider(bson.ObjectIdHex(id)) if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - defer f.Close() - - // 生成唯一ID - randomId := uuid.NewV4() - - // 创建临时文件 - tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), randomId.String()+".zip") - tmpFile, err := os.OpenFile(tmpFilePath, os.O_CREATE|os.O_WRONLY, os.ModePerm) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - defer tmpFile.Close() - - // 将该文件写入临时文件 - if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + return err } - // 解压缩临时文件到目标文件夹 - dstPath := filepath.Join( - viper.GetString("spider.path"), - //strings.Replace(msg.FileName, ".zip", "", -1), - ) - if err := utils.DeCompress(tmpFile, dstPath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + // 删除爬虫文件目录 + path := filepath.Join(viper.GetString("spider.path"), spider.Name) + utils.RemoveFiles(path) + + // 删除其他节点的爬虫目录 + msg := entity.NodeMessage{ + Type: constants.MsgTypeRemoveSpider, + SpiderId: id, + } + if err := utils.Pub(constants.ChannelAllNode, msg); err != nil { + return err } - // 关闭临时文件 - if err := tmpFile.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + // 从数据库中删除该爬虫 + if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil { + return err } - // 删除临时文件 - if err := os.Remove(tmpFilePath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return + // 删除日志文件 + if err := RemoveLogBySpiderId(spider.Id); err != nil { + return err } + + // 删除爬虫对应的task任务 + if err := model.RemoveTaskBySpiderId(spider.Id); err != nil { + return err + } + + // TODO 定时任务如何处理 + return nil } // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 c := cron.New(cron.WithSeconds()) - - if IsMaster() { - // 主节点 - - // 每5秒更新一次爬虫信息 - if _, err := c.AddFunc("*/5 * * * * *", UpdateSpiders); err != nil { - return err - } - - // 每60秒同步爬虫给工作节点 - if _, err := c.AddFunc("0 * * * * *", PublishAllSpidersJob); err != nil { - return err - } - } else { - // 非主节点 - - // 订阅文件上传 - channel := "files:upload" - var sub database.Subscriber - sub.Connect() - sub.Subscribe(channel, OnFileUpload) + if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { + return err } - // 启动定时任务 c.Start() diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go new file mode 100644 index 00000000..53c83b9a --- /dev/null +++ b/backend/services/spider_handler/spider.go @@ -0,0 +1,137 @@ +package spider_handler + +import ( + "crawlab/database" + "crawlab/model" + "crawlab/utils" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "github.com/satori/go.uuid" + "github.com/spf13/viper" + "io" + "os" + "path/filepath" + "runtime/debug" +) + +const ( + Md5File = "md5.txt" +) + +type SpiderSync struct { + Spider model.Spider +} + +func (s *SpiderSync) CreateMd5File(md5 string) { + path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name) + utils.CreateFilePath(path) + + fileName := filepath.Join(path, Md5File) + file := utils.OpenFile(fileName) + defer file.Close() + if file != nil { + if _, err := file.WriteString(md5 + "\n"); err != nil { + log.Errorf("file write string error: %s", err.Error()) + debug.PrintStack() + } + } +} + +// 获得下载锁的key +func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { + node, _ := model.GetCurrentNode() + return node.Id.Hex() + "#" + spiderId +} + +// 删除本地文件 +func (s *SpiderSync) RemoveSpiderFile() { + path := filepath.Join( + viper.GetString("spider.path"), + s.Spider.Name, + ) + //爬虫文件有变化,先删除本地文件 + if err := os.RemoveAll(path); err != nil { + log.Errorf("remove spider files error: %s, path: %s", err.Error(), path) + debug.PrintStack() + } +} + +// 检测是否已经下载中 +func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, string) { + key := s.GetLockDownloadKey(spiderId) + if _, err := database.RedisClient.HGet("spider", key); err == nil { + return true, key + } + return false, key +} + +// 下载爬虫 +func (s *SpiderSync) Download() { + spiderId := s.Spider.Id.Hex() + fileId := s.Spider.FileId.Hex() + isDownloading, key := s.CheckDownLoading(spiderId, fileId) + if isDownloading { + return + } else { + _ = database.RedisClient.HSet("spider", key, key) + } + + session, gf := database.GetGridFs("files") + defer session.Close() + + f, err := gf.OpenId(bson.ObjectIdHex(fileId)) + defer f.Close() + if err != nil { + log.Errorf("open file id: " + fileId + ", spider id:" + spiderId + ", error: " + err.Error()) + debug.PrintStack() + return + } + + // 生成唯一ID + randomId := uuid.NewV4() + tmpPath := viper.GetString("other.tmppath") + if !utils.Exists(tmpPath) { + if err := os.MkdirAll(tmpPath, 0777); err != nil { + log.Errorf("mkdir other.tmppath error: %v", err.Error()) + return + } + } + // 创建临时文件 + tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") + tmpFile := utils.OpenFile(tmpFilePath) + defer tmpFile.Close() + + // 将该文件写入临时文件 + if _, err := io.Copy(tmpFile, f); err != nil { + log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id()) + debug.PrintStack() + return + } + + // 解压缩临时文件到目标文件夹 + dstPath := filepath.Join( + viper.GetString("spider.path"), + s.Spider.Name, + ) + if err := utils.DeCompress(tmpFile, dstPath); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + // 关闭临时文件 + if err := tmpFile.Close(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + // 删除临时文件 + if err := os.Remove(tmpFilePath); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + + _ = database.RedisClient.HDel("spider", key) +} diff --git a/backend/services/spider_handler/spider_test.go b/backend/services/spider_handler/spider_test.go new file mode 100644 index 00000000..66d47455 --- /dev/null +++ b/backend/services/spider_handler/spider_test.go @@ -0,0 +1,53 @@ +package spider_handler + +import ( + "crawlab/config" + "crawlab/database" + "crawlab/model" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "runtime/debug" + "testing" +) + +var s SpiderSync + +func init() { + if err := config.InitConfig("../../conf/config.yml"); err != nil { + log.Fatal("Init config failed") + } + log.Infof("初始化配置成功") + + // 初始化Mongodb数据库 + if err := database.InitMongo(); err != nil { + log.Error("init mongodb error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("初始化Mongodb数据库成功") + + // 初始化Redis数据库 + if err := database.InitRedis(); err != nil { + log.Error("init redis error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("初始化Redis数据库成功") + + s = SpiderSync{ + Spider: model.Spider{ + Id: bson.ObjectIdHex("5d8d8326bc3c4f000186e5df"), + Name: "scrapy-pre_sale", + FileId: bson.ObjectIdHex("5d8d8326bc3c4f000186e5db"), + Src: "/opt/crawlab/spiders/scrapy-pre_sale", + }, + } +} + +func TestSpiderSync_CreateMd5File(t *testing.T) { + s.CreateMd5File("this is md5") +} + +func TestSpiderSync_Download(t *testing.T) { + s.Download() +} diff --git a/backend/services/system.go b/backend/services/system.go index 5f50dec9..92f9cf96 100644 --- a/backend/services/system.go +++ b/backend/services/system.go @@ -3,117 +3,25 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/model" "crawlab/utils" "encoding/json" - "github.com/apex/log" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "runtime/debug" - "strings" ) var SystemInfoChanMap = utils.NewChanMap() -var executableNameMap = map[string]string{ - // python - "python": "Python", - "python2": "Python 2", - "python2.7": "Python 2.7", - "python3": "Python 3", - "python3.5": "Python 3.5", - "python3.6": "Python 3.6", - "python3.7": "Python 3.7", - "python3.8": "Python 3.8", - // java - "java": "Java", - // go - "go": "Go", - // node - "node": "NodeJS", - // php - "php": "PHP", - // windows command - "cmd": "Windows Command Prompt", - // linux shell - "sh": "Shell", - "bash": "bash", -} - -func GetSystemEnv(key string) string { - return os.Getenv(key) -} - -func GetPathValues() (paths []string) { - pathEnv := GetSystemEnv("PATH") - return strings.Split(pathEnv, ":") -} - -func GetExecutables() (executables []model.Executable, err error) { - pathValues := GetPathValues() - - cache := map[string]string{} - - for _, path := range pathValues { - fileList, err := ioutil.ReadDir(path) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - - for _, file := range fileList { - displayName := executableNameMap[file.Name()] - filePath := filepath.Join(path, file.Name()) - - if cache[filePath] == "" { - if displayName != "" { - executables = append(executables, model.Executable{ - Path: filePath, - FileName: file.Name(), - DisplayName: displayName, - }) - } - cache[filePath] = filePath - } - } - } - return executables, nil -} - -func GetLocalSystemInfo() (sysInfo model.SystemInfo, err error) { - executables, err := GetExecutables() - if err != nil { - return sysInfo, err - } - hostname, err := os.Hostname() - if err != nil { - debug.PrintStack() - return sysInfo, err - } - - return model.SystemInfo{ - ARCH: runtime.GOARCH, - OS: runtime.GOOS, - NumCpu: runtime.GOMAXPROCS(0), - Hostname: hostname, - Executables: executables, - }, nil -} - -func GetRemoteSystemInfo(id string) (sysInfo model.SystemInfo, err error) { +func GetRemoteSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { // 发送消息 - msg := NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeGetSystemInfo, NodeId: id, } // 序列化 msgBytes, _ := json.Marshal(&msg) - if err := database.Publish("nodes:"+id, string(msgBytes)); err != nil { - return model.SystemInfo{}, err + if _, err := database.RedisClient.Publish("nodes:"+id, utils.BytesToString(msgBytes)); err != nil { + return entity.SystemInfo{}, err } // 通道 @@ -130,9 +38,9 @@ func GetRemoteSystemInfo(id string) (sysInfo model.SystemInfo, err error) { return sysInfo, nil } -func GetSystemInfo(id string) (sysInfo model.SystemInfo, err error) { +func GetSystemInfo(id string) (sysInfo entity.SystemInfo, err error) { if IsMasterNode(id) { - sysInfo, err = GetLocalSystemInfo() + sysInfo, err = model.GetLocalSystemInfo() } else { sysInfo, err = GetRemoteSystemInfo(id) } diff --git a/backend/services/task.go b/backend/services/task.go index 8c0ff8a1..12f0330e 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -3,6 +3,7 @@ package services import ( "crawlab/constants" "crawlab/database" + "crawlab/entity" "crawlab/lib/cron" "crawlab/model" "crawlab/utils" @@ -16,13 +17,16 @@ import ( "runtime" "runtime/debug" "strconv" + "sync" + "syscall" "time" ) var Exec *Executor // 任务执行锁 -var LockList []bool +//Added by cloud: 2019/09/04,solve data race +var LockList sync.Map // 任务消息 type TaskMessage struct { @@ -36,7 +40,7 @@ func (m *TaskMessage) ToString() (string, error) { if err != nil { return "", err } - return string(data), err + return utils.BytesToString(data), err } // 任务执行器 @@ -56,7 +60,7 @@ func (ex *Executor) Start() error { id := i // 初始化任务锁 - LockList = append(LockList, false) + LockList.Store(id, false) // 加入定时任务 _, err := ex.Cron.AddFunc(spec, GetExecuteTaskFunc(id)) @@ -68,8 +72,6 @@ func (ex *Executor) Start() error { return nil } -var TaskExecChanMap = utils.NewChanMap() - // 派发任务 func AssignTask(task model.Task) error { // 生成任务信息 @@ -134,37 +136,67 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e } // 起一个goroutine来监控进程 - ch := TaskExecChanMap.ChanBlocked(t.Id) + ch := utils.TaskExecChanMap.ChanBlocked(t.Id) go func() { // 传入信号,此处阻塞 signal := <-ch - - if signal == constants.TaskCancel { + log.Infof("cancel process signal: %s", signal) + if signal == constants.TaskCancel && cmd.Process != nil { // 取消进程 - if err := cmd.Process.Kill(); err != nil { - log.Errorf(err.Error()) + if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil { + log.Errorf("process kill error: %s", err.Error()) debug.PrintStack() - return } t.Status = constants.StatusCancelled + t.Error = "user kill the process ..." + } else { + // 保存任务 + t.Status = constants.StatusFinished } - - // 保存任务 t.FinishTs = time.Now() if err := t.Save(); err != nil { - log.Infof(err.Error()) + log.Infof("save task error: %s", err.Error()) debug.PrintStack() return } }() - // 开始执行 - if err := cmd.Run(); err != nil { - HandleTaskError(t, err) + // 在选择所有节点执行的时候,实际就是随机一个节点执行的, + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + + // 异步启动进程 + if err := cmd.Start(); err != nil { + log.Errorf("start spider error:{}", err.Error()) + debug.PrintStack() + return err + } + + // 保存pid到task + t.Pid = cmd.Process.Pid + if err := t.Save(); err != nil { + log.Errorf("save task pid error: %s", err.Error()) + debug.PrintStack() + return err + } + // 同步等待进程完成 + if err := cmd.Wait(); err != nil { + log.Errorf("wait process finish error: %s", err.Error()) + debug.PrintStack() + if exitError, ok := err.(*exec.ExitError); ok { + exitCode := exitError.ExitCode() + log.Errorf("exit error, exit code: %d", exitCode) + // 非kill 的错误类型 + if exitCode != -1 { + // 非手动kill保存为错误状态 + t.Error = err.Error() + t.FinishTs = time.Now() + t.Status = constants.StatusError + _ = t.Save() + } + } return err } ch <- constants.TaskFinish - return nil } @@ -220,24 +252,25 @@ func SaveTaskResultCount(id string) func() { // 执行任务 func ExecuteTask(id int) { - if LockList[id] { + if flag, _ := LockList.Load(id); flag.(bool) { log.Debugf(GetWorkerPrefix(id) + "正在执行任务...") return } // 上锁 - LockList[id] = true + LockList.Store(id, true) // 解锁(延迟执行) defer func() { - LockList[id] = false + LockList.Delete(id) + LockList.Store(id, false) }() // 开始计时 tic := time.Now() // 获取当前节点 - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { log.Errorf(GetWorkerPrefix(id) + err.Error()) return @@ -252,6 +285,9 @@ func ExecuteTask(id int) { // 节点队列任务 var msg string msg, err = database.RedisClient.LPop(queueCur) + if msg != "" { + log.Infof("queue cur: %s", msg) + } if err != nil { if msg == "" { // 节点队列没有任务,获取公共队列任务 @@ -323,8 +359,10 @@ func ExecuteTask(id int) { // 执行命令 cmd := spider.Cmd - if t.Cmd != "" { - cmd = t.Cmd + + // 加入参数 + if t.Param != "" { + cmd += " " + t.Param } // 任务赋值 @@ -404,13 +442,16 @@ func GetTaskLog(id string) (logStr string, err error) { logStr = "" if IsMasterNode(task.NodeId.Hex()) { // 若为主节点,获取本机日志 - logBytes, err := GetLocalLog(task.LogPath) - logStr = string(logBytes) + logBytes, err := model.GetLocalLog(task.LogPath) + logStr = utils.BytesToString(logBytes) if err != nil { log.Errorf(err.Error()) - return "", err + logStr = err.Error() + // return "", err + } else { + logStr = utils.BytesToString(logBytes) } - logStr = string(logBytes) + } else { // 若不为主节点,获取远端日志 logStr, err = GetRemoteLog(task) @@ -427,6 +468,8 @@ func CancelTask(id string) (err error) { // 获取任务 task, err := model.GetTask(id) if err != nil { + log.Errorf("task not found, task id : %s, error: %s", id, err.Error()) + debug.PrintStack() return err } @@ -436,24 +479,36 @@ func CancelTask(id string) (err error) { } // 获取当前节点(默认当前节点为主节点) - node, err := GetCurrentNode() + node, err := model.GetCurrentNode() if err != nil { + log.Errorf("get current node error: %s", err.Error()) + debug.PrintStack() return err } + log.Infof("current node id is: %s", node.Id.Hex()) + log.Infof("task node id is: %s", task.NodeId.Hex()) + if node.Id == task.NodeId { // 任务节点为主节点 // 获取任务执行频道 - ch := TaskExecChanMap.ChanBlocked(id) - - // 发出取消进程信号 - ch <- constants.TaskCancel + ch := utils.TaskExecChanMap.ChanBlocked(id) + if ch != nil { + // 发出取消进程信号 + ch <- constants.TaskCancel + } else { + if err := model.UpdateTaskToAbnormal(node.Id); err != nil { + log.Errorf("update task to abnormal : {}", err.Error()) + debug.PrintStack() + return err + } + } } else { // 任务节点为工作节点 // 序列化消息 - msg := NodeMessage{ + msg := entity.NodeMessage{ Type: constants.MsgTypeCancelTask, TaskId: id, } @@ -463,7 +518,7 @@ func CancelTask(id string) (err error) { } // 发布消息 - if err := database.Publish("nodes:"+task.NodeId.Hex(), string(msgBytes)); err != nil { + if _, err := database.RedisClient.Publish("nodes:"+task.NodeId.Hex(), utils.BytesToString(msgBytes)); err != nil { return err } } @@ -472,6 +527,7 @@ func CancelTask(id string) (err error) { } func HandleTaskError(t model.Task, err error) { + log.Error("handle task error:" + err.Error()) t.Status = constants.StatusError t.Error = err.Error() t.FinishTs = time.Now() diff --git a/backend/services/user.go b/backend/services/user.go index fb688fd1..4811f767 100644 --- a/backend/services/user.go +++ b/backend/services/user.go @@ -5,11 +5,9 @@ import ( "crawlab/model" "crawlab/utils" "errors" - "github.com/apex/log" "github.com/dgrijalva/jwt-go" "github.com/globalsign/mgo/bson" "github.com/spf13/viper" - "runtime/debug" "time" ) @@ -24,28 +22,38 @@ func InitUserService() error { } return nil } - -func GetToken(username string) (tokenStr string, err error) { - user, err := model.GetUserByUsername(username) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - +func MakeToken(user *model.User) (tokenStr string, err error) { token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ "id": user.Id, "username": user.Username, "nbf": time.Now().Unix(), }) - tokenStr, err = token.SignedString([]byte(viper.GetString("server.secret"))) - if err != nil { - return - } - return + return token.SignedString([]byte(viper.GetString("server.secret"))) + } +//func GetToken(username string) (tokenStr string, err error) { +// user, err := model.GetUserByUsername(username) +// if err != nil { +// log.Errorf(err.Error()) +// debug.PrintStack() +// return +// } +// +// token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ +// "id": user.Id, +// "username": user.Username, +// "nbf": time.Now().Unix(), +// }) +// +// tokenStr, err = token.SignedString([]byte(viper.GetString("server.secret"))) +// if err != nil { +// return +// } +// return +//} + func SecretFunc() jwt.Keyfunc { return func(token *jwt.Token) (interface{}, error) { return []byte(viper.GetString("server.secret")), nil diff --git a/backend/utils/chan.go b/backend/utils/chan.go index 3e9fde61..7b63ac0f 100644 --- a/backend/utils/chan.go +++ b/backend/utils/chan.go @@ -1,5 +1,7 @@ package utils +var TaskExecChanMap = NewChanMap() + type ChanMap struct { m map[string]chan string } diff --git a/backend/utils/file.go b/backend/utils/file.go index 9a4300a1..d65e7ab1 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -2,6 +2,7 @@ package utils import ( "archive/zip" + "bufio" "github.com/apex/log" "io" "os" @@ -9,6 +10,49 @@ import ( "runtime/debug" ) +// 删除文件 +func RemoveFiles(path string) { + if err := os.RemoveAll(path); err != nil { + log.Errorf("remove files error: %s, path: %s", err.Error(), path) + debug.PrintStack() + } +} + +// 读取文件一行 +func ReadFileOneLine(fileName string) string { + file := OpenFile(fileName) + defer file.Close() + buf := bufio.NewReader(file) + line, err := buf.ReadString('\n') + if err != nil { + log.Errorf("read file error: %s", err.Error()) + return "" + } + return line + +} + +// 创建文件 +func OpenFile(fileName string) *os.File { + file, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, os.ModePerm) + if err != nil { + log.Errorf("create file error: %s, file_name: %s", err.Error(), fileName) + debug.PrintStack() + return nil + } + return file +} + +// 创建文件夹 +func CreateFilePath(filePath string) { + if !Exists(filePath) { + if err := os.MkdirAll(filePath, os.ModePerm); err != nil { + log.Errorf("create file error: %s, file_path: %s", err.Error(), filePath) + debug.PrintStack() + } + } +} + // 判断所给路径文件/文件夹是否存在 func Exists(path string) bool { _, err := os.Stat(path) //os.Stat获取文件信息 @@ -179,11 +223,11 @@ func _Compress(file *os.File, prefix string, zw *zip.Writer) error { } } else { header, err := zip.FileInfoHeader(info) - header.Name = prefix + "/" + header.Name if err != nil { debug.PrintStack() return err } + header.Name = prefix + "/" + header.Name writer, err := zw.CreateHeader(header) if err != nil { debug.PrintStack() diff --git a/backend/utils/file_test.go b/backend/utils/file_test.go index 484366f5..64f2df6d 100644 --- a/backend/utils/file_test.go +++ b/backend/utils/file_test.go @@ -1,8 +1,12 @@ package utils import ( + "archive/zip" . "github.com/smartystreets/goconvey/convey" + "io" + "log" "os" + "runtime/debug" "testing" ) @@ -38,9 +42,13 @@ func TestIsDir(t *testing.T) { } func TestCompress(t *testing.T) { - var pathString = "../utils" + err := os.Mkdir("testCompress", os.ModePerm) + if err != nil { + t.Error("create testCompress failed") + } + var pathString = "testCompress" var files []*os.File - var disPath = "../utils/test" + var disPath = "testCompress" file, err := os.Open(pathString) if err != nil { t.Error("open source path failed") @@ -52,15 +60,60 @@ func TestCompress(t *testing.T) { So(er, ShouldEqual, nil) }) }) + os.RemoveAll("testCompress") } - -// 测试之前需存在有效的test(.zip)文件 -func TestDeCompress(t *testing.T) { - var tmpFilePath = "./test" - tmpFile, err := os.OpenFile(tmpFilePath, os.O_RDONLY, 0777) +func Zip(zipFile string, fileList []string) error { + // 创建 zip 包文件 + fw, err := os.Create(zipFile) if err != nil { - t.Fatal("open zip file failed") + log.Fatal() + } + defer fw.Close() + + // 实例化新的 zip.Writer + zw := zip.NewWriter(fw) + defer func() { + // 检测一下是否成功关闭 + if err := zw.Close(); err != nil { + log.Fatalln(err) + } + }() + + for _, fileName := range fileList { + fr, err := os.Open(fileName) + if err != nil { + return err + } + fi, err := fr.Stat() + if err != nil { + return err + } + // 写入文件的头信息 + fh, err := zip.FileInfoHeader(fi) + w, err := zw.CreateHeader(fh) + if err != nil { + return err + } + // 写入文件内容 + _, err = io.Copy(w, fr) + if err != nil { + return err + } + } + return nil +} + +func TestDeCompress(t *testing.T) { + err := os.Mkdir("testDeCompress", os.ModePerm) + err = Zip("demo.zip", []string{}) + if err != nil { + t.Error("create zip file failed") + } + tmpFile, err := os.OpenFile("demo.zip", os.O_RDONLY, 0777) + if err != nil { + debug.PrintStack() + t.Error("open demo.zip failed") } var dstPath = "./testDeCompress" Convey("Test DeCopmress func", t, func() { @@ -68,5 +121,7 @@ func TestDeCompress(t *testing.T) { err := DeCompress(tmpFile, dstPath) So(err, ShouldEqual, nil) }) + os.RemoveAll("testDeCompress") + os.Remove("demo.zip") } diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go new file mode 100644 index 00000000..edc6200e --- /dev/null +++ b/backend/utils/helpers.go @@ -0,0 +1,55 @@ +package utils + +import ( + "context" + "crawlab/database" + "crawlab/entity" + "encoding/json" + "github.com/apex/log" + "github.com/gomodule/redigo/redis" + "runtime/debug" + "unsafe" +) + +func BytesToString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} + +func GetJson(message entity.NodeMessage) string { + msgBytes, err := json.Marshal(&message) + if err != nil { + log.Errorf("node message to json error: %s", err.Error()) + debug.PrintStack() + return "" + } + return BytesToString(msgBytes) +} + +func GetMessage(message redis.Message) *entity.NodeMessage { + msg := entity.NodeMessage{} + if err := json.Unmarshal(message.Data, &msg); err != nil { + log.Errorf("message byte to object error: %s", err.Error()) + debug.PrintStack() + return nil + } + return &msg +} + +func Pub(channel string, msg entity.NodeMessage) error { + if _, err := database.RedisClient.Publish(channel, GetJson(msg)); err != nil { + log.Errorf("publish redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} + +func Sub(channel string, consume database.ConsumeFunc) error { + ctx := context.Background() + if err := database.RedisClient.Subscribe(ctx, consume, channel); err != nil { + log.Errorf("subscribe redis error: %s", err.Error()) + debug.PrintStack() + return err + } + return nil +} diff --git a/backend/vendor/github.com/dgrijalva/jwt-go/request/doc.go b/backend/vendor/github.com/dgrijalva/jwt-go/request/doc.go deleted file mode 100644 index c01069c9..00000000 --- a/backend/vendor/github.com/dgrijalva/jwt-go/request/doc.go +++ /dev/null @@ -1,7 +0,0 @@ -// Utility package for extracting JWT tokens from -// HTTP requests. -// -// The main function is ParseFromRequest and it's WithClaims variant. -// See examples for how to use the various Extractor implementations -// or roll your own. -package request diff --git a/backend/vendor/github.com/dgrijalva/jwt-go/request/extractor.go b/backend/vendor/github.com/dgrijalva/jwt-go/request/extractor.go deleted file mode 100644 index 14414fe2..00000000 --- a/backend/vendor/github.com/dgrijalva/jwt-go/request/extractor.go +++ /dev/null @@ -1,81 +0,0 @@ -package request - -import ( - "errors" - "net/http" -) - -// Errors -var ( - ErrNoTokenInRequest = errors.New("no token present in request") -) - -// Interface for extracting a token from an HTTP request. -// The ExtractToken method should return a token string or an error. -// If no token is present, you must return ErrNoTokenInRequest. -type Extractor interface { - ExtractToken(*http.Request) (string, error) -} - -// Extractor for finding a token in a header. Looks at each specified -// header in order until there's a match -type HeaderExtractor []string - -func (e HeaderExtractor) ExtractToken(req *http.Request) (string, error) { - // loop over header names and return the first one that contains data - for _, header := range e { - if ah := req.Header.Get(header); ah != "" { - return ah, nil - } - } - return "", ErrNoTokenInRequest -} - -// Extract token from request arguments. This includes a POSTed form or -// GET URL arguments. Argument names are tried in order until there's a match. -// This extractor calls `ParseMultipartForm` on the request -type ArgumentExtractor []string - -func (e ArgumentExtractor) ExtractToken(req *http.Request) (string, error) { - // Make sure form is parsed - req.ParseMultipartForm(10e6) - - // loop over arg names and return the first one that contains data - for _, arg := range e { - if ah := req.Form.Get(arg); ah != "" { - return ah, nil - } - } - - return "", ErrNoTokenInRequest -} - -// Tries Extractors in order until one returns a token string or an error occurs -type MultiExtractor []Extractor - -func (e MultiExtractor) ExtractToken(req *http.Request) (string, error) { - // loop over header names and return the first one that contains data - for _, extractor := range e { - if tok, err := extractor.ExtractToken(req); tok != "" { - return tok, nil - } else if err != ErrNoTokenInRequest { - return "", err - } - } - return "", ErrNoTokenInRequest -} - -// Wrap an Extractor in this to post-process the value before it's handed off. -// See AuthorizationHeaderExtractor for an example -type PostExtractionFilter struct { - Extractor - Filter func(string) (string, error) -} - -func (e *PostExtractionFilter) ExtractToken(req *http.Request) (string, error) { - if tok, err := e.Extractor.ExtractToken(req); tok != "" { - return e.Filter(tok) - } else { - return "", err - } -} diff --git a/backend/vendor/github.com/dgrijalva/jwt-go/request/oauth2.go b/backend/vendor/github.com/dgrijalva/jwt-go/request/oauth2.go deleted file mode 100644 index 5948694a..00000000 --- a/backend/vendor/github.com/dgrijalva/jwt-go/request/oauth2.go +++ /dev/null @@ -1,28 +0,0 @@ -package request - -import ( - "strings" -) - -// Strips 'Bearer ' prefix from bearer token string -func stripBearerPrefixFromTokenString(tok string) (string, error) { - // Should be a bearer token - if len(tok) > 6 && strings.ToUpper(tok[0:7]) == "BEARER " { - return tok[7:], nil - } - return tok, nil -} - -// Extract bearer token from Authorization header -// Uses PostExtractionFilter to strip "Bearer " prefix from header -var AuthorizationHeaderExtractor = &PostExtractionFilter{ - HeaderExtractor{"Authorization"}, - stripBearerPrefixFromTokenString, -} - -// Extractor for OAuth2 access tokens. Looks in 'Authorization' -// header then 'access_token' argument for a token. -var OAuth2Extractor = &MultiExtractor{ - AuthorizationHeaderExtractor, - ArgumentExtractor{"access_token"}, -} diff --git a/backend/vendor/github.com/dgrijalva/jwt-go/request/request.go b/backend/vendor/github.com/dgrijalva/jwt-go/request/request.go deleted file mode 100644 index 70525cfa..00000000 --- a/backend/vendor/github.com/dgrijalva/jwt-go/request/request.go +++ /dev/null @@ -1,68 +0,0 @@ -package request - -import ( - "github.com/dgrijalva/jwt-go" - "net/http" -) - -// Extract and parse a JWT token from an HTTP request. -// This behaves the same as Parse, but accepts a request and an extractor -// instead of a token string. The Extractor interface allows you to define -// the logic for extracting a token. Several useful implementations are provided. -// -// You can provide options to modify parsing behavior -func ParseFromRequest(req *http.Request, extractor Extractor, keyFunc jwt.Keyfunc, options ...ParseFromRequestOption) (token *jwt.Token, err error) { - // Create basic parser struct - p := &fromRequestParser{req, extractor, nil, nil} - - // Handle options - for _, option := range options { - option(p) - } - - // Set defaults - if p.claims == nil { - p.claims = jwt.MapClaims{} - } - if p.parser == nil { - p.parser = &jwt.Parser{} - } - - // perform extract - tokenString, err := p.extractor.ExtractToken(req) - if err != nil { - return nil, err - } - - // perform parse - return p.parser.ParseWithClaims(tokenString, p.claims, keyFunc) -} - -// ParseFromRequest but with custom Claims type -// DEPRECATED: use ParseFromRequest and the WithClaims option -func ParseFromRequestWithClaims(req *http.Request, extractor Extractor, claims jwt.Claims, keyFunc jwt.Keyfunc) (token *jwt.Token, err error) { - return ParseFromRequest(req, extractor, keyFunc, WithClaims(claims)) -} - -type fromRequestParser struct { - req *http.Request - extractor Extractor - claims jwt.Claims - parser *jwt.Parser -} - -type ParseFromRequestOption func(*fromRequestParser) - -// Parse with custom claims -func WithClaims(claims jwt.Claims) ParseFromRequestOption { - return func(p *fromRequestParser) { - p.claims = claims - } -} - -// Parse using a custom parser -func WithParser(parser *jwt.Parser) ParseFromRequestOption { - return func(p *fromRequestParser) { - p.parser = parser - } -} diff --git a/backend/vendor/modules.txt b/backend/vendor/modules.txt index 57c7d3f1..634c337b 100644 --- a/backend/vendor/modules.txt +++ b/backend/vendor/modules.txt @@ -2,7 +2,6 @@ github.com/apex/log # github.com/dgrijalva/jwt-go v3.2.0+incompatible github.com/dgrijalva/jwt-go -github.com/dgrijalva/jwt-go/request # github.com/fsnotify/fsnotify v1.4.7 github.com/fsnotify/fsnotify # github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3 @@ -18,11 +17,18 @@ github.com/globalsign/mgo/bson github.com/globalsign/mgo/internal/sasl github.com/globalsign/mgo/internal/scram github.com/globalsign/mgo/internal/json +# github.com/go-playground/locales v0.12.1 +github.com/go-playground/locales +github.com/go-playground/locales/currency +# github.com/go-playground/universal-translator v0.16.0 +github.com/go-playground/universal-translator # github.com/golang/protobuf v1.3.1 github.com/golang/protobuf/proto # github.com/gomodule/redigo v2.0.0+incompatible github.com/gomodule/redigo/redis github.com/gomodule/redigo/internal +# github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 +github.com/gopherjs/gopherjs/js # github.com/hashicorp/hcl v1.0.0 github.com/hashicorp/hcl github.com/hashicorp/hcl/hcl/printer @@ -36,6 +42,10 @@ github.com/hashicorp/hcl/json/scanner github.com/hashicorp/hcl/json/token # github.com/json-iterator/go v1.1.6 github.com/json-iterator/go +# github.com/jtolds/gls v4.20.0+incompatible +github.com/jtolds/gls +# github.com/leodido/go-urn v1.1.0 +github.com/leodido/go-urn # github.com/magiconair/properties v1.8.0 github.com/magiconair/properties # github.com/mattn/go-isatty v0.0.8 @@ -52,6 +62,15 @@ github.com/pelletier/go-toml github.com/pkg/errors # github.com/satori/go.uuid v1.2.0 github.com/satori/go.uuid +# github.com/smartystreets/assertions v1.0.0 +github.com/smartystreets/assertions +github.com/smartystreets/assertions/internal/go-diff/diffmatchpatch +github.com/smartystreets/assertions/internal/go-render/render +github.com/smartystreets/assertions/internal/oglematchers +# github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 +github.com/smartystreets/goconvey/convey +github.com/smartystreets/goconvey/convey/reporting +github.com/smartystreets/goconvey/convey/gotest # github.com/spf13/afero v1.1.2 github.com/spf13/afero github.com/spf13/afero/mem @@ -72,5 +91,7 @@ golang.org/x/text/transform golang.org/x/text/unicode/norm # gopkg.in/go-playground/validator.v8 v8.18.2 gopkg.in/go-playground/validator.v8 +# gopkg.in/go-playground/validator.v9 v9.29.1 +gopkg.in/go-playground/validator.v9 # gopkg.in/yaml.v2 v2.2.2 gopkg.in/yaml.v2 diff --git a/docker/Dockerfile.frontend.alpine b/docker/Dockerfile.frontend.alpine index d313cdcb..f809ee7c 100644 --- a/docker/Dockerfile.frontend.alpine +++ b/docker/Dockerfile.frontend.alpine @@ -1,10 +1,20 @@ +FROM node:8.16.0 AS frontend-build + +ADD ./frontend /app +WORKDIR /app + +# install frontend +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org + +RUN npm run build:prod + FROM alpine:latest # 安装nginx RUN mkdir /run/nginx && apk add nginx # 拷贝编译文件 -COPY dist /app/dist +COPY --from=frontend-build /app/dist /app/dist # 拷贝nginx代理文件 COPY crawlab.conf /etc/nginx/conf.d @@ -12,9 +22,6 @@ COPY crawlab.conf /etc/nginx/conf.d # 拷贝执行脚本 COPY docker_init.sh /app/docker_init.sh -# 定义后端API脚本 -ENV CRAWLAB_API_ADDRESS http://localhost:8000 - EXPOSE 8080 CMD ["/bin/sh", "/app/docker_init.sh"] \ No newline at end of file diff --git a/docker/Dockerfile.master.alpine b/docker/Dockerfile.master.alpine index 6979861b..b9dbb742 100644 --- a/docker/Dockerfile.master.alpine +++ b/docker/Dockerfile.master.alpine @@ -75,7 +75,7 @@ RUN sed -i 's/#rc_sys=""/rc_sys="lxc"/g' /etc/rc.conf && \ # working directory WORKDIR /app/backend - +ENV PYTHONIOENCODING utf-8 # frontend port EXPOSE 8080 diff --git a/docker/Dockerfile.worker.alpine b/docker/Dockerfile.worker.alpine index e7a66776..388125a2 100644 --- a/docker/Dockerfile.worker.alpine +++ b/docker/Dockerfile.worker.alpine @@ -35,7 +35,7 @@ RUN apk del .build-deps # working directory WORKDIR /app/backend - +ENV PYTHONIOENCODING utf-8 # backend port EXPOSE 8000 diff --git a/frontend/package.json b/frontend/package.json index 139297d3..60ac5cc8 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,11 +1,12 @@ { "name": "crawlab", - "version": "0.2.3", + "version": "0.3.4", "private": true, "scripts": { - "serve": "vue-cli-service serve --ip=0.0.0.0", + "serve": "vue-cli-service serve --ip=0.0.0.0 --mode=development", "serve:prod": "vue-cli-service serve --mode=production --ip=0.0.0.0", "config": "vue ui", + "build:dev": "vue-cli-service build --mode development", "build:prod": "vue-cli-service build --mode production", "lint": "vue-cli-service lint", "test:unit": "vue-cli-service test:unit" diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js index 38734c46..22707159 100644 --- a/frontend/src/api/request.js +++ b/frontend/src/api/request.js @@ -3,28 +3,51 @@ import router from '../router' let baseUrl = process.env.VUE_APP_BASE_URL ? process.env.VUE_APP_BASE_URL : 'http://localhost:8000' -const request = (method, path, params, data) => { - return new Promise((resolve, reject) => { +const request = async (method, path, params, data, others = {}) => { + try { const url = baseUrl + path const headers = { 'Authorization': window.localStorage.getItem('token') } - axios({ + const response = await axios({ method, url, params, data, - headers + headers, + ...others }) - .then(resolve) - .catch(error => { - console.log(error) - if (error.response.status === 401) { - router.push('/login') - } - reject(error) - }) - }) + // console.log(response) + return response + } catch (e) { + if (e.response.status === 401 && router.currentRoute.path !== '/login') { + router.push('/login') + } + await Promise.reject(e) + } + + // return new Promise((resolve, reject) => { + // const url = baseUrl + path + // const headers = { + // 'Authorization': window.localStorage.getItem('token') + // } + // axios({ + // method, + // url, + // params, + // data, + // headers, + // ...others + // }) + // .then(resolve) + // .catch(error => { + // console.log(error) + // if (error.response.status === 401) { + // router.push('/login') + // } + // reject(error) + // }) + // }) } const get = (path, params) => { diff --git a/frontend/src/assets/logo.svg b/frontend/src/assets/logo.svg new file mode 100644 index 00000000..b0e23910 --- /dev/null +++ b/frontend/src/assets/logo.svg @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index 06ef1dba..266ef2eb 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -19,6 +19,9 @@ /> + + + diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 661e4757..381b253c 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -18,10 +18,10 @@ - + - + @@ -39,10 +39,14 @@ - - - - + + + + + + + + @@ -99,16 +103,7 @@ export default { 'spiderForm' ]), isShowRun () { - if (this.isCustomized) { - // customized spider - return !!this.spiderForm.cmd - } else { - // configurable spider - return !!this.spiderForm.fields - } - }, - isCustomized () { - return this.spiderForm.type === 'customized' + return !!this.spiderForm.cmd } }, methods: { diff --git a/frontend/src/components/InfoView/TaskInfoView.vue b/frontend/src/components/InfoView/TaskInfoView.vue index bfe6419a..80aaa770 100644 --- a/frontend/src/components/InfoView/TaskInfoView.vue +++ b/frontend/src/components/InfoView/TaskInfoView.vue @@ -15,6 +15,9 @@ + + + @@ -86,15 +89,15 @@ export default { return dayjs(str).format('YYYY-MM-DD HH:mm:ss') }, getWaitDuration (row) { - if (row.start_ts.match('^0001')) return 'NA' + if (!row.start_ts || row.start_ts.match('^0001')) return 'NA' return dayjs(row.start_ts).diff(row.create_ts, 'second') }, getRuntimeDuration (row) { - if (row.finish_ts.match('^0001')) return 'NA' + if (!row.finish_ts || row.finish_ts.match('^0001')) return 'NA' return dayjs(row.finish_ts).diff(row.start_ts, 'second') }, getTotalDuration (row) { - if (row.finish_ts.match('^0001')) return 'NA' + if (!row.finish_ts || row.finish_ts.match('^0001')) return 'NA' return dayjs(row.finish_ts).diff(row.create_ts, 'second') } } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index c8573bd4..c56959c9 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -154,6 +154,8 @@ export default { 'Last Run': '上次运行', 'Action': '操作', 'No command line': '没有执行命令', + 'Last Status': '上次运行状态', + 'Remark': '备注', // 任务 'Task Info': '任务信息', @@ -214,6 +216,7 @@ export default { // 下拉框 User: '用户', Logout: '退出登录', + Documentation: '文档', // 选择 'Yes': '是', @@ -244,7 +247,8 @@ export default { 'username already exists': '用户名已存在', 'Deleted successfully': '成功删除', 'Saved successfully': '成功保存', - + 'Please zip your spider files from the root directory': '爬虫文件请从根目录下开始压缩。', + 'English': 'English', // 登录 'Sign in': '登录', 'Sign-in': '登录', @@ -263,5 +267,20 @@ export default { 'admin': '管理用户', 'Role': '角色', 'Edit User': '更改用户', - 'Users': '用户' + 'Users': '用户', + tagsView: { + closeOthers: '关闭其他', + close: '关闭', + refresh: '刷新', + closeAll: '关闭所有' + }, + nodeList: { + type: '节点类型' + }, + schedules: { + cron: 'Cron', + add_cron: '生成Cron', + // Cron Format: [second] [minute] [hour] [day of month] [month] [day of week] + cron_format: 'Cron 格式: [秒] [分] [小时] [日] [月] [周]' + } } diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index 9a238d08..84c96cd3 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -46,7 +46,6 @@ export const constantRouterMap = [ ] }, { - name: 'Node', path: '/nodes', component: Layout, meta: { @@ -76,7 +75,6 @@ export const constantRouterMap = [ ] }, { - name: 'Spider', path: '/spiders', component: Layout, meta: { @@ -106,7 +104,6 @@ export const constantRouterMap = [ ] }, { - name: 'Task', path: '/tasks', component: Layout, meta: { @@ -136,7 +133,6 @@ export const constantRouterMap = [ ] }, { - name: 'Schedule', path: '/schedules', component: Layout, meta: { @@ -157,7 +153,6 @@ export const constantRouterMap = [ ] }, { - name: 'Site', path: '/sites', component: Layout, hidden: true, @@ -178,7 +173,6 @@ export const constantRouterMap = [ ] }, { - name: 'User', path: '/users', component: Layout, meta: { diff --git a/frontend/src/store/modules/node.js b/frontend/src/store/modules/node.js index 266beb3e..5e21a222 100644 --- a/frontend/src/store/modules/node.js +++ b/frontend/src/store/modules/node.js @@ -25,15 +25,7 @@ const mutations = { const { id, systemInfo } = payload for (let i = 0; i < state.nodeList.length; i++) { if (state.nodeList[i]._id === id) { - // Vue.set(state.nodeList[i], 'systemInfo', {}) state.nodeList[i].systemInfo = systemInfo - // for (const key in systemInfo) { - // if (systemInfo.hasOwnProperty(key)) { - // console.log(key) - // state.nodeList[i].systemInfo[key] = systemInfo[key] - // // Vue.set(state.nodeList[i].systemInfo, key, systemInfo[key]) - // } - // } break } } @@ -76,10 +68,12 @@ const actions = { getTaskList ({ state, commit }, id) { return request.get(`/nodes/${id}/tasks`) .then(response => { - commit('task/SET_TASK_LIST', - response.data.data.map(d => d) - .sort((a, b) => a.create_ts < b.create_ts ? 1 : -1), - { root: true }) + if (response.data.data) { + commit('task/SET_TASK_LIST', + response.data.data.map(d => d) + .sort((a, b) => a.create_ts < b.create_ts ? 1 : -1), + { root: true }) + } }) }, getNodeSystemInfo ({ state, commit }, id) { diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index 2ab37838..07a0bac3 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -4,6 +4,8 @@ const state = { // list of spiders spiderList: [], + spiderTotal: 0, + // active spider data spiderForm: {}, @@ -38,6 +40,9 @@ const state = { const getters = {} const mutations = { + SET_SPIDER_TOTAL (state, value) { + state.spiderTotal = value + }, SET_SPIDER_FORM (state, value) { state.spiderForm = value }, @@ -71,14 +76,11 @@ const mutations = { } const actions = { - getSpiderList ({ state, commit }) { - let params = {} - if (state.filterSite) { - params.site = state.filterSite - } + getSpiderList ({ state, commit }, params = {}) { return request.get('/spiders', params) .then(response => { - commit('SET_SPIDER_LIST', response.data.data) + commit('SET_SPIDER_LIST', response.data.data.list) + commit('SET_SPIDER_TOTAL', response.data.data.total) }) }, editSpider ({ state, dispatch }) { @@ -101,10 +103,11 @@ const actions = { }) }, crawlSpider ({ state, dispatch }, payload) { - const { id, nodeId } = payload + const { id, nodeId, param } = payload return request.put(`/tasks`, { spider_id: id, - node_id: nodeId + node_id: nodeId, + param: param }) }, getTaskList ({ state, commit }, id) { diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 545a169b..bb182706 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -120,10 +120,26 @@ const actions = { commit('SET_TASK_RESULTS_TOTAL_COUNT', response.data.total) }) }, + async getTaskResultExcel ({ state, commit }, id) { + const { data } = await request.request('GET', '/tasks/' + id + '/results/download', {}, { + responseType: 'blob' // important + }) + const downloadUrl = window.URL.createObjectURL(new Blob([data])) + + const link = document.createElement('a') + + link.href = downloadUrl + + link.setAttribute('download', 'data.csv') // any other extension + + document.body.appendChild(link) + link.click() + link.remove() + }, cancelTask ({ state, dispatch }, id) { return request.post(`/tasks/${id}/cancel`) .then(() => { - dispatch('getTaskData') + dispatch('getTaskData', id) }) } } diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 976d98d9..e294ad0c 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -8,6 +8,9 @@ + + v0.3.4 + {{$t('Logout')}} @@ -30,8 +33,9 @@ - 文档 + {{$t('Documentation')}} + diff --git a/frontend/src/views/login/index.vue b/frontend/src/views/login/index.vue index 195ae1de..a21c0f42 100644 --- a/frontend/src/views/login/index.vue +++ b/frontend/src/views/login/index.vue @@ -4,7 +4,7 @@ diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index 28ca4961..4d283966 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -16,7 +16,7 @@ - + - + + :placeholder="$t('schedules.cron')"> - {{$t('生成Cron')}} + {{$t('schedules.add_cron')}} - - + @@ -111,7 +111,7 @@ - + - + - +