code cleanup

This commit is contained in:
marvzhang
2020-12-04 15:33:15 +08:00
parent c83b331101
commit 00cf719ecc
287 changed files with 63 additions and 39029 deletions

View File

@@ -60,8 +60,4 @@ notification:
senderIdentity: ''
smtp:
user: ''
password: ''
repo:
apiUrl: "https://center.crawlab.cn/api"
# apiUrl: "http://localhost:8002"
ossUrl: "https://repo.crawlab.cn"
password: ''

View File

@@ -1,57 +0,0 @@
package config
import (
"github.com/fsnotify/fsnotify"
"github.com/spf13/viper"
"log"
"strings"
)
type Config struct {
Name string
}
// 监控配置文件变化并热加载程序
func (c *Config) WatchConfig() {
viper.WatchConfig()
viper.OnConfigChange(func(e fsnotify.Event) {
log.Printf("Config file changed: %s", e.Name)
})
}
func (c *Config) Init() error {
if c.Name != "" {
viper.SetConfigFile(c.Name) // 如果指定了配置文件,则解析指定的配置文件
} else {
viper.AddConfigPath("./conf") // 如果没有指定配置文件,则解析默认的配置文件
viper.SetConfigName("config")
}
viper.SetConfigType("yaml") // 设置配置文件格式为YAML
viper.AutomaticEnv() // 读取匹配的环境变量
viper.SetEnvPrefix("CRAWLAB") // 读取环境变量的前缀为CRAWLAB
replacer := strings.NewReplacer(".", "_")
viper.SetEnvKeyReplacer(replacer)
if err := viper.ReadInConfig(); err != nil { // viper解析配置文件
return err
}
return nil
}
func InitConfig(cfg string) error {
c := Config{
Name: cfg,
}
// 初始化配置文件
if err := c.Init(); err != nil {
return err
}
// 监控配置文件变化并热加载程序
c.WatchConfig()
return nil
}

View File

@@ -1,16 +0,0 @@
package config
import (
. "github.com/smartystreets/goconvey/convey"
"testing"
)
func TestInitConfig(t *testing.T) {
Convey("Test InitConfig func", t, func() {
x := InitConfig("../conf/config.yml")
Convey("The value should be nil", func() {
So(x, ShouldEqual, nil)
})
})
}

View File

@@ -1,8 +0,0 @@
package constants
const (
ActionTypeVisit = "visit"
ActionTypeInstallDep = "install_dep"
ActionTypeInstallLang = "install_lang"
ActionTypeViewDisclaimer = "view_disclaimer"
)

View File

@@ -1,8 +0,0 @@
package constants
const (
AnchorStartStage = "START_STAGE"
AnchorStartUrl = "START_URL"
AnchorItems = "ITEMS"
AnchorParsers = "PARSERS"
)

View File

@@ -1,7 +0,0 @@
package constants
const (
OwnerTypeAll = "all"
OwnerTypeMe = "me"
OwnerTypePublic = "public"
)

View File

@@ -1,20 +0,0 @@
package constants
const (
ChallengeLogin7d = "login_7d"
ChallengeLogin30d = "login_30d"
ChallengeLogin90d = "login_90d"
ChallengeLogin180d = "login_180d"
ChallengeCreateCustomizedSpider = "create_customized_spider"
ChallengeCreateConfigurableSpider = "create_configurable_spider"
ChallengeCreateSchedule = "create_schedule"
ChallengeCreateNodes = "create_nodes"
ChallengeCreateUser = "create_user"
ChallengeRunRandom = "run_random"
ChallengeScrape1k = "scrape_1k"
ChallengeScrape10k = "scrape_10k"
ChallengeScrape100k = "scrape_100k"
ChallengeInstallDep = "install_dep"
ChallengeInstallLang = "install_lang"
ChallengeViewDisclaimer = "view_disclaimer"
)

View File

@@ -1,9 +0,0 @@
package constants
const (
ChannelAllNode = "nodes:public"
ChannelWorkerNode = "nodes:"
ChannelMasterNode = "nodes:master"
)

View File

@@ -1,6 +0,0 @@
package constants
const (
ASCENDING = "ascending"
DESCENDING = "descending"
)

View File

@@ -1,6 +0,0 @@
package constants
const (
EngineScrapy = "scrapy"
EngineColly = "colly"
)

View File

@@ -1,5 +0,0 @@
package constants
const (
ContextUser = "currentUser"
)

View File

@@ -1,13 +0,0 @@
package constants
import (
"crawlab/errors"
"net/http"
)
var (
ErrorMongoError = errors.NewSystemOPError(1001, "system error:[mongo]%s", http.StatusInternalServerError)
//users
ErrorUserNotFound = errors.NewBusinessError(10001, "user not found.", http.StatusUnauthorized)
ErrorUsernameOrPasswordInvalid = errors.NewBusinessError(11001, "username or password invalid", http.StatusUnauthorized)
)

View File

@@ -1,5 +0,0 @@
package constants
const (
ErrorRegexPattern = "(?:[ :,.]|^)((?:error|exception|traceback)s?)(?:[ :,.]|$)"
)

View File

@@ -1,9 +0,0 @@
package constants
const (
MsgTypeGetLog = "get-log"
MsgTypeGetSystemInfo = "get-sys-info"
MsgTypeCancelTask = "cancel-task"
MsgTypeRemoveLog = "remove-log"
MsgTypeRemoveSpider = "remove-spider"
)

View File

@@ -1,6 +0,0 @@
package constants
const (
ObjectIdNull = "000000000000000000000000"
Infinite = 999999999
)

View File

@@ -1,6 +0,0 @@
package constants
const (
StatusOnline = "online"
StatusOffline = "offline"
)

View File

@@ -1,13 +0,0 @@
package constants
const (
NotificationTriggerOnTaskEnd = "notification_trigger_on_task_end"
NotificationTriggerOnTaskError = "notification_trigger_on_task_error"
NotificationTriggerNever = "notification_trigger_never"
)
const (
NotificationTypeMail = "notification_type_mail"
NotificationTypeDingTalk = "notification_type_ding_talk"
NotificationTypeWechat = "notification_type_wechat"
)

View File

@@ -1,8 +0,0 @@
package constants
const (
RegisterTypeMac = "mac"
RegisterTypeIp = "ip"
RegisterTypeHostname = "hostname"
RegisterTypeCustomName = "customName"
)

View File

@@ -1,12 +0,0 @@
package constants
const (
RpcInstallLang = "install_lang"
RpcInstallDep = "install_dep"
RpcUninstallDep = "uninstall_dep"
RpcGetInstalledDepList = "get_installed_dep_list"
RpcGetLang = "get_lang"
RpcCancelTask = "cancel_task"
RpcGetSystemInfoService = "get_system_info"
RpcRemoveSpider = "remove_spider"
)

View File

@@ -1,10 +0,0 @@
package constants
const (
ScheduleStatusStop = "stopped"
ScheduleStatusRunning = "running"
ScheduleStatusError = "error"
ScheduleStatusErrorNotFoundNode = "Not Found Node"
ScheduleStatusErrorNotFoundSpider = "Not Found Spider"
)

View File

@@ -1,5 +0,0 @@
package constants
const ScrapyProtectedStageNames = ""
const ScrapyProtectedFieldNames = "_id,task_id,ts"

View File

@@ -1,7 +0,0 @@
package constants
const (
Customized = "customized"
Configurable = "configurable"
Plugin = "plugin"
)

View File

@@ -1,25 +0,0 @@
package constants
const (
Windows = "windows"
Linux = "linux"
Darwin = "darwin"
)
const (
Python = "python"
Nodejs = "node"
Java = "java"
)
const (
InstallStatusNotInstalled = "not-installed"
InstallStatusInstalling = "installing"
InstallStatusInstallingOther = "installing-other"
InstallStatusInstalled = "installed"
)
const (
LangTypeLang = "lang"
LangTypeWebDriver = "webdriver"
)

View File

@@ -1,32 +0,0 @@
package constants
const (
// 调度中
StatusPending string = "pending"
// 运行中
StatusRunning string = "running"
// 已完成
StatusFinished string = "finished"
// 错误
StatusError string = "error"
// 取消
StatusCancelled string = "cancelled"
// 节点重启导致的异常终止
StatusAbnormal string = "abnormal"
)
const (
TaskFinish string = "finish"
TaskCancel string = "cancel"
)
const (
RunTypeAllNodes string = "all-nodes"
RunTypeRandom string = "random"
RunTypeSelectedNodes string = "selected-nodes"
)
const (
TaskTypeSpider string = "spider"
TaskTypeSystem string = "system"
)

View File

@@ -1,6 +0,0 @@
package constants
const (
RoleAdmin = "admin"
RoleNormal = "normal"
)

View File

@@ -1,9 +0,0 @@
package constants
const (
String = "string"
Number = "number"
Boolean = "boolean"
Array = "array"
Object = "object"
)

View File

@@ -1,142 +0,0 @@
[
{
"name": "login_7d",
"title_cn": "连续登录 7 天",
"title_en": "Logged-in for 7 days",
"description_cn": "连续 7 天登录 Crawlab即可完成挑战",
"description_en": "Logged-in for consecutive 7 days to complete the challenge",
"difficulty": 1
},
{
"name": "login_30d",
"title_cn": "连续登录 30 天",
"title_en": "Logged-in for 30 days",
"description_cn": "连续 30 天登录 Crawlab即可完成挑战",
"description_en": "Logged-in for consecutive 30 days to complete the challenge",
"difficulty": 2
},
{
"name": "login_90d",
"title_cn": "连续登录 90 天",
"title_en": "Logged-in for 90 days",
"description_cn": "连续 90 天登录 Crawlab即可完成挑战",
"description_en": "Logged-in for consecutive 90 days to complete the challenge",
"difficulty": 3
},
{
"name": "login_180d",
"title_cn": "连续登录 180 天",
"title_en": "Logged-in for 180 days",
"description_cn": "连续 180 天登录 Crawlab即可完成挑战",
"description_en": "Logged-in for consecutive 180 days to complete the challenge",
"difficulty": 4
},
{
"name": "create_customized_spider",
"title_cn": "创建 1 个自定义爬虫",
"title_en": "Create a customized spider",
"description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '自定义爬虫',输入相应的参数,点击添加,即可完成挑战!",
"description_en": "In Spider List page, click 'Add Spider', select 'Customized Spider', enter params, click 'Add' to finish the challenge.",
"difficulty": 1,
"path": "/spiders"
},
{
"name": "create_configurable_spider",
"title_cn": "创建 1 个可配置爬虫",
"title_en": "Create a configurable spider",
"description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '可配置爬虫',输入相应的参数,点击添加,即可完成挑战!",
"description_en": "In Spider List page, click 'Add Spider', select 'Configurable Spider', enter params, click 'Add' to finish the challenge.",
"difficulty": 1,
"path": "/spiders"
},
{
"name": "run_random",
"title_cn": "用随机模式成功运行爬虫",
"title_en": "Run a spider in random mode successfully",
"description_cn": "在您创建好的爬虫中,导航到其对应的详情页(爬虫列表中点击爬虫),选择随机模式运行一个爬虫,并能运行成功。",
"description_en": "In your created spiders, navigate to corresponding detail page (click spider in Spider List page), run a spider in random mode successfully.",
"difficulty": 1,
"path": "/spiders"
},
{
"name": "scrape_1k",
"title_cn": "抓取 1 千条数据",
"title_en": "Scrape 1k records",
"description_cn": "运行您创建好的爬虫,抓取 1 千条及以上的结果数据,即可完成挑战!",
"description_en": "Run your created spiders, scrape 1k and more results to finish the challenge.",
"difficulty": 2,
"path": "/spiders"
},
{
"name": "scrape_10k",
"title_cn": "抓取 1 万条数据",
"title_en": "Scrape 10k records",
"description_cn": "运行您创建好的爬虫,抓取 1 万条及以上的结果数据,即可完成挑战!",
"description_en": "Run your created spiders, scrape 10k and more results to finish the challenge.",
"difficulty": 3,
"path": "/spiders"
},
{
"name": "scrape_100k",
"title_cn": "抓取 10 万条数据",
"title_en": "Scrape 100k records",
"description_cn": "运行您创建好的爬虫,抓取 10 万条及以上的结果数据,即可完成挑战!",
"description_en": "Run your created spiders, scrape 100k and more results to finish the challenge.",
"difficulty": 4,
"path": "/spiders"
},
{
"name": "create_schedule",
"title_cn": "创建 1 个定时任务",
"title_en": "Create a schedule",
"description_cn": "在定时任务列表中,创建一个定时任务,正确设置好 Cron 表达式,即可完成挑战!",
"description_en": "In Schedule List page, create a schedule and configure cron expression to finish the task.",
"difficulty": 1,
"path": "/schedules"
},
{
"name": "create_nodes",
"title_cn": "创建 1 个节点集群",
"title_en": "Create a node cluster",
"description_cn": "按照文档的部署指南,部署含有 3 个节点的集群,即可完成挑战!",
"description_en": "Deploy a 3-node cluster according to the deployment guidance in documentation to finish the task.",
"difficulty": 3,
"path": "/nodes"
},
{
"name": "install_dep",
"title_cn": "安装 1 个依赖",
"title_en": "Install a dependency successfully",
"description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,搜索并安装所需的 1 个依赖,即可完成挑战!",
"description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', search and install a dependency.",
"difficulty": 3,
"path": "/nodes"
},
{
"name": "install_lang",
"title_cn": "安装 1 个语言环境",
"title_en": "Install a language successfully",
"description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,点击安装所需的 1 个语言环境,即可完成挑战!",
"description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', install a language.",
"difficulty": 3,
"path": "/nodes"
},
{
"name": "view_disclaimer",
"title_cn": "阅读免责声明",
"title_en": "View disclaimer",
"description_cn": "在左侧菜单栏,点击 '免责声明' 查看其内容,即可完成挑战!",
"description_en": "In the left side menu, click 'Disclaimer' and view its content to finish the challenge.",
"difficulty": 1,
"path": "/disclaimer"
},
{
"name": "create_user",
"title_cn": "创建 1 个用户",
"title_en": "Create a user",
"description_cn": "在用户管理页面中创建一个新用户,即可完成挑战!",
"description_en": "In User Admin page, create a new user to finish the challenge.",
"difficulty": 1,
"path": "/users"
}
]

View File

@@ -1,44 +0,0 @@
package database
import (
"context"
"github.com/apex/log"
"github.com/olivere/elastic/v7"
"github.com/satori/go.uuid"
"github.com/spf13/viper"
"sync"
"time"
)
var doOnce sync.Once
var ctx context.Context
var ESClient *elastic.Client
func InitEsClient() {
esClientStr := viper.GetString("setting.esClient")
ctx = context.Background()
ESClient, _ = elastic.NewClient(elastic.SetURL(esClientStr), elastic.SetSniff(false))
}
// WriteMsg will write the msg and level into es
func WriteMsgToES(when time.Time, msg chan string, index string) {
doOnce.Do(InitEsClient)
vals := make(map[string]interface{})
vals["@timestamp"] = when.Format(time.RFC3339)
for {
select {
case vals["@msg"] = <-msg:
uid := uuid.NewV4().String()
_, err := ESClient.Index().Index(index).Id(uid).BodyJson(vals).Refresh("wait_for").Do(ctx)
if err != nil {
log.Error(err.Error())
log.Error("send msg log to es error")
return
}
case <-time.After(6 * time.Second):
return
}
}
return
}

View File

@@ -1,112 +0,0 @@
package database
import (
"crawlab/constants"
"github.com/apex/log"
"github.com/cenkalti/backoff/v4"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"github.com/spf13/viper"
"net"
"reflect"
"time"
)
var Session *mgo.Session
func GetSession() *mgo.Session {
return Session.Copy()
}
func GetDb() (*mgo.Session, *mgo.Database) {
s := GetSession()
return s, s.DB(viper.GetString("mongo.db"))
}
func GetCol(collectionName string) (*mgo.Session, *mgo.Collection) {
s := GetSession()
db := s.DB(viper.GetString("mongo.db"))
col := db.C(collectionName)
return s, col
}
func GetGridFs(prefix string) (*mgo.Session, *mgo.GridFS) {
s, db := GetDb()
gf := db.GridFS(prefix)
return s, gf
}
func FillNullObjectId(doc interface{}) {
t := reflect.TypeOf(doc)
if t.Kind() == reflect.Ptr {
t = t.Elem()
}
if t.Kind() != reflect.Struct {
return
}
v := reflect.ValueOf(doc)
for i := 0; i < t.NumField(); i++ {
ft := t.Field(i)
fv := v.Elem().Field(i)
val := fv.Interface()
switch val.(type) {
case bson.ObjectId:
if !val.(bson.ObjectId).Valid() {
v.FieldByName(ft.Name).Set(reflect.ValueOf(bson.ObjectIdHex(constants.ObjectIdNull)))
}
}
}
}
func InitMongo() error {
var mongoHost = viper.GetString("mongo.host")
var mongoPort = viper.GetString("mongo.port")
var mongoDb = viper.GetString("mongo.db")
var mongoUsername = viper.GetString("mongo.username")
var mongoPassword = viper.GetString("mongo.password")
var mongoAuth = viper.GetString("mongo.authSource")
if Session == nil {
var dialInfo mgo.DialInfo
addr := net.JoinHostPort(mongoHost, mongoPort)
timeout := time.Second * 10
dialInfo = mgo.DialInfo{
Addrs: []string{addr},
Timeout: timeout,
Database: mongoDb,
PoolLimit: 100,
PoolTimeout: timeout,
ReadTimeout: timeout,
WriteTimeout: timeout,
AppName: "crawlab",
FailFast: true,
MinPoolSize: 10,
MaxIdleTimeMS: 1000 * 30,
}
if mongoUsername != "" {
dialInfo.Username = mongoUsername
dialInfo.Password = mongoPassword
dialInfo.Source = mongoAuth
}
bp := backoff.NewExponentialBackOff()
var err error
err = backoff.Retry(func() error {
Session, err = mgo.DialWithInfo(&dialInfo)
if err != nil {
log.WithError(err).Warnf("waiting for connect mongo database, after %f seconds try again.", bp.NextBackOff().Seconds())
}
return err
}, bp)
}
//Add Unique index for 'key'
keyIndex := mgo.Index{
Key: []string{"key"},
Unique: true,
}
s, c := GetCol("nodes")
defer s.Close()
c.EnsureIndex(keyIndex)
return nil
}

View File

@@ -1,70 +0,0 @@
package database
import (
"crawlab/config"
"github.com/apex/log"
"github.com/globalsign/mgo"
. "github.com/smartystreets/goconvey/convey"
"github.com/spf13/viper"
"reflect"
"testing"
)
func init() {
if err := config.InitConfig("../conf/config.yml"); err != nil {
log.Fatal("Init config failed")
}
log.Infof("初始化配置成功")
err := InitMongo()
if err != nil {
log.Fatal("Init mongodb failed")
}
}
func TestGetDb(t *testing.T) {
Convey("Test GetDb", t, func() {
if err := config.InitConfig("../conf/config.yml"); err != nil {
t.Fatal("Init config failed")
}
t.Log("初始化配置成功")
err := InitMongo()
if err != nil {
t.Fatal("Init mongodb failed")
}
s, db := GetDb()
Convey("The value should be Session.Copy", func() {
So(s, ShouldResemble, Session.Copy())
})
Convey("The value should be reference of database", func() {
So(db, ShouldResemble, s.DB(viper.GetString("mongo.db")))
})
})
}
func TestGetCol(t *testing.T) {
var c = "nodes"
var colActual *mgo.Collection
Convey("Test GetCol", t, func() {
s, col := GetCol(c)
Convey("s should resemble Session.Copy", func() {
So(s, ShouldResemble, Session.Copy())
So(reflect.TypeOf(col), ShouldResemble, reflect.TypeOf(colActual))
})
})
}
func TestGetGridFs(t *testing.T) {
var prefix = "files"
var gfActual *mgo.GridFS
Convey("Test GetGridFs", t, func() {
s, gf := GetGridFs(prefix)
Convey("s should be session.copy", func() {
So(s, ShouldResemble, Session.Copy())
})
Convey("gf should be *mgo.GridFS", func() {
So(reflect.TypeOf(gf), ShouldResemble, reflect.TypeOf(gfActual))
})
})
}

View File

@@ -1,96 +0,0 @@
package database
import (
"context"
"crawlab/utils"
"fmt"
"github.com/apex/log"
"github.com/gomodule/redigo/redis"
errors2 "github.com/pkg/errors"
"time"
)
type ConsumeFunc func(message redis.Message) error
func (r *Redis) Close() {
err := r.pool.Close()
if err != nil {
log.Errorf("redis close error.")
}
}
func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error {
psc := redis.PubSubConn{Conn: r.pool.Get()}
if err := psc.Subscribe(redis.Args{}.AddFlat(channel)...); err != nil {
return err
}
done := make(chan error, 1)
tick := time.NewTicker(time.Second * 3)
defer tick.Stop()
go func() {
defer utils.Close(psc)
for {
switch msg := psc.Receive().(type) {
case error:
done <- fmt.Errorf("redis pubsub receive err: %v", msg)
return
case redis.Message:
if err := consume(msg); err != nil {
fmt.Printf("redis pubsub consume message err: %v", err)
continue
}
case redis.Subscription:
if msg.Count == 0 {
// all channels are unsubscribed
return
}
}
}
}()
// start a new goroutine to receive message
for {
select {
case <-ctx.Done():
if err := psc.Unsubscribe(); err != nil {
fmt.Printf("redis pubsub unsubscribe err: %v \n", err)
}
done <- nil
case <-tick.C:
if err := psc.Ping(""); err != nil {
fmt.Printf("ping message error: %s \n", err)
//done <- err
}
case err := <-done:
close(done)
return err
}
}
}
func (r *Redis) Subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error {
index := 0
go func() {
for {
err := r.subscribe(ctx, consume, channel...)
fmt.Println(err)
if err == nil {
index = 0
break
}
time.Sleep(5 * time.Second)
index += 1
fmt.Printf("try reconnect %d times \n", index)
}
}()
return nil
}
func (r *Redis) Publish(channel, message string) (n int, err error) {
conn := r.pool.Get()
defer utils.Close(conn)
n, err = redis.Int(conn.Do("PUBLISH", channel, message))
if err != nil {
return 0, errors2.Wrapf(err, "redis publish %s %s", channel, message)
}
return
}

View File

@@ -1,289 +0,0 @@
package database
import (
"context"
"crawlab/entity"
"crawlab/utils"
"errors"
"github.com/apex/log"
"github.com/cenkalti/backoff/v4"
"github.com/gomodule/redigo/redis"
"github.com/spf13/viper"
"runtime/debug"
"strings"
"time"
)
var RedisClient *Redis
type Redis struct {
pool *redis.Pool
}
type Mutex struct {
Name string
expiry time.Duration
tries int
delay time.Duration
value string
}
func NewRedisClient() *Redis {
return &Redis{pool: NewRedisPool()}
}
func (r *Redis) RPush(collection string, value interface{}) error {
c := r.pool.Get()
defer utils.Close(c)
if _, err := c.Do("RPUSH", collection, value); err != nil {
log.Error(err.Error())
debug.PrintStack()
return err
}
return nil
}
func (r *Redis) LPush(collection string, value interface{}) error {
c := r.pool.Get()
defer utils.Close(c)
if _, err := c.Do("RPUSH", collection, value); err != nil {
log.Error(err.Error())
debug.PrintStack()
return err
}
return nil
}
func (r *Redis) LPop(collection string) (string, error) {
c := r.pool.Get()
defer utils.Close(c)
value, err2 := redis.String(c.Do("LPOP", collection))
if err2 != nil {
return value, err2
}
return value, nil
}
func (r *Redis) HSet(collection string, key string, value string) error {
c := r.pool.Get()
defer utils.Close(c)
if _, err := c.Do("HSET", collection, key, value); err != nil {
log.Error(err.Error())
debug.PrintStack()
return err
}
return nil
}
func (r *Redis) Ping() error {
c := r.pool.Get()
defer utils.Close(c)
_, err2 := redis.String(c.Do("PING"))
return err2
}
func (r *Redis) HGet(collection string, key string) (string, error) {
c := r.pool.Get()
defer utils.Close(c)
value, err2 := redis.String(c.Do("HGET", collection, key))
if err2 != nil && err2 != redis.ErrNil {
log.Error(err2.Error())
debug.PrintStack()
return value, err2
}
return value, nil
}
func (r *Redis) HDel(collection string, key string) error {
c := r.pool.Get()
defer utils.Close(c)
if _, err := c.Do("HDEL", collection, key); err != nil {
log.Error(err.Error())
debug.PrintStack()
return err
}
return nil
}
func (r *Redis) HScan(collection string) (results []string, err error) {
c := r.pool.Get()
defer utils.Close(c)
var (
cursor int64
items []string
)
for {
values, err := redis.Values(c.Do("HSCAN", collection, cursor))
if err != nil {
return results, err
}
values, err = redis.Scan(values, &cursor, &items)
if err != nil {
return results, err
}
for i := 0; i < len(items); i += 2 {
cur := items[i+1]
results = append(results, cur)
}
if cursor == 0 {
break
}
}
return results, err
}
func (r *Redis) HKeys(collection string) ([]string, error) {
c := r.pool.Get()
defer utils.Close(c)
value, err2 := redis.Strings(c.Do("HKEYS", collection))
if err2 != nil {
log.Error(err2.Error())
debug.PrintStack()
return []string{}, err2
}
return value, nil
}
func (r *Redis) BRPop(collection string, timeout int) (string, error) {
if timeout <= 0 {
timeout = 60
}
c := r.pool.Get()
defer utils.Close(c)
values, err := redis.Strings(c.Do("BRPOP", collection, timeout))
if err != nil {
return "", err
}
return values[1], nil
}
func NewRedisPool() *redis.Pool {
var address = viper.GetString("redis.address")
var port = viper.GetString("redis.port")
var database = viper.GetString("redis.database")
var password = viper.GetString("redis.password")
var url string
if password == "" {
url = "redis://" + address + ":" + port + "/" + database
} else {
url = "redis://x:" + password + "@" + address + ":" + port + "/" + database
}
return &redis.Pool{
Dial: func() (conn redis.Conn, e error) {
return redis.DialURL(url,
redis.DialConnectTimeout(time.Second*10),
redis.DialReadTimeout(time.Second*600),
redis.DialWriteTimeout(time.Second*10),
)
},
TestOnBorrow: func(c redis.Conn, t time.Time) error {
if time.Since(t) < time.Minute {
return nil
}
_, err := c.Do("PING")
return err
},
MaxIdle: 10,
MaxActive: 0,
IdleTimeout: 300 * time.Second,
Wait: false,
MaxConnLifetime: 0,
}
}
func InitRedis() error {
RedisClient = NewRedisClient()
b := backoff.NewExponentialBackOff()
b.MaxInterval = 20 * time.Second
err := backoff.Retry(func() error {
err := RedisClient.Ping()
if err != nil {
log.WithError(err).Warnf("waiting for redis pool active connection. will after %f seconds try again.", b.NextBackOff().Seconds())
}
return err
}, b)
return err
}
func Pub(channel string, msg entity.NodeMessage) error {
if _, err := RedisClient.Publish(channel, utils.GetJson(msg)); err != nil {
log.Errorf("publish redis error: %s", err.Error())
debug.PrintStack()
return err
}
return nil
}
func Sub(channel string, consume ConsumeFunc) error {
ctx := context.Background()
if err := RedisClient.Subscribe(ctx, consume, channel); err != nil {
log.Errorf("subscribe redis error: %s", err.Error())
debug.PrintStack()
return err
}
return nil
}
// 构建同步锁key
func (r *Redis) getLockKey(lockKey string) string {
lockKey = strings.ReplaceAll(lockKey, ":", "-")
return "nodes:lock:" + lockKey
}
// 获得锁
func (r *Redis) Lock(lockKey string) (int64, error) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)
ts := time.Now().Unix()
ok, err := c.Do("SET", lockKey, ts, "NX", "PX", 30000)
if err != nil {
log.Errorf("get lock fail with error: %s", err.Error())
debug.PrintStack()
return 0, err
}
if ok == nil {
log.Errorf("the lockKey is locked: key=%s", lockKey)
return 0, errors.New("the lockKey is locked")
}
return ts, nil
}
func (r *Redis) UnLock(lockKey string, value int64) {
c := r.pool.Get()
defer utils.Close(c)
lockKey = r.getLockKey(lockKey)
getValue, err := redis.Int64(c.Do("GET", lockKey))
if err != nil {
log.Errorf("get lockKey error: %s", err.Error())
debug.PrintStack()
return
}
if getValue != value {
log.Errorf("the lockKey value diff: %d, %d", value, getValue)
return
}
v, err := redis.Int64(c.Do("DEL", lockKey))
if err != nil {
log.Errorf("unlock failed, error: %s", err.Error())
debug.PrintStack()
return
}
if v == 0 {
log.Errorf("unlock failed: key=%s", lockKey)
return
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,17 +0,0 @@
package entity
import "strconv"
type Page struct {
Skip int
Limit int
PageNum int
PageSize int
}
func (p *Page) GetPage(pageNum string, pageSize string) {
p.PageNum, _ = strconv.Atoi(pageNum)
p.PageSize, _ = strconv.Atoi(pageSize)
p.Skip = p.PageSize * (p.PageNum - 1)
p.Limit = p.PageSize
}

View File

@@ -1,40 +0,0 @@
package entity
type ConfigSpiderData struct {
// 通用
Name string `yaml:"name" json:"name"`
DisplayName string `yaml:"display_name" json:"display_name"`
Col string `yaml:"col" json:"col"`
Remark string `yaml:"remark" json:"remark"`
Type string `yaml:"type" bson:"type"`
// 可配置爬虫
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages []Stage `yaml:"stages" json:"stages"`
Settings map[string]string `yaml:"settings" json:"settings"`
// 自定义爬虫
Cmd string `yaml:"cmd" json:"cmd"`
}
type Stage struct {
Name string `yaml:"name" json:"name"`
IsList bool `yaml:"is_list" json:"is_list"`
ListCss string `yaml:"list_css" json:"list_css"`
ListXpath string `yaml:"list_xpath" json:"list_xpath"`
PageCss string `yaml:"page_css" json:"page_css"`
PageXpath string `yaml:"page_xpath" json:"page_xpath"`
PageAttr string `yaml:"page_attr" json:"page_attr"`
Fields []Field `yaml:"fields" json:"fields"`
}
type Field struct {
Name string `yaml:"name" json:"name"`
Css string `yaml:"css" json:"css"`
Xpath string `yaml:"xpath" json:"xpath"`
Attr string `yaml:"attr" json:"attr"`
NextStage string `yaml:"next_stage" json:"next_stage"`
Remark string `yaml:"remark" json:"remark"`
}

View File

@@ -1,8 +0,0 @@
package entity
type DocItem struct {
Title string `json:"title"`
Url string `json:"url"`
Path string `json:"path"`
Children []DocItem `json:"children"`
}

View File

@@ -1,28 +0,0 @@
package entity
type NodeMessage struct {
// 通信类别
Type string `json:"type"`
// 任务相关
TaskId string `json:"task_id"` // 任务ID
// 节点相关
NodeId string `json:"node_id"` // 节点ID
// 日志相关
LogPath string `json:"log_path"` // 日志路径
Log string `json:"log"` // 日志
// 系统信息
SysInfo SystemInfo `json:"sys_info"`
// 爬虫相关
SpiderId string `json:"spider_id"` //爬虫ID
// 语言相关
Lang Lang `json:"lang"`
// 错误相关
Error string `json:"error"`
}

View File

@@ -1,11 +0,0 @@
package entity
type RpcMessage struct {
Id string `json:"id"` // 消息ID
Method string `json:"method"` // 消息方法
NodeId string `json:"node_id"` // 节点ID
Params map[string]string `json:"params"` // 参数
Timeout int `json:"timeout"` // 超时
Result string `json:"result"` // 结果
Error string `json:"error"` // 错误
}

View File

@@ -1,17 +0,0 @@
package entity
type SpiderType struct {
Type string `json:"type" bson:"_id"`
Count int `json:"count" bson:"count"`
}
type ScrapySettingParam struct {
Key string `json:"key"`
Value interface{} `json:"value"`
Type string `json:"type"`
}
type ScrapyItem struct {
Name string `json:"name"`
Fields []string `json:"fields"`
}

View File

@@ -1,39 +0,0 @@
package entity
type SystemInfo struct {
ARCH string `json:"arch"`
OS string `json:"os"`
Hostname string `json:"host_name"`
NumCpu int `json:"num_cpu"`
Executables []Executable `json:"executables"`
}
type Executable struct {
Path string `json:"path"`
FileName string `json:"file_name"`
DisplayName string `json:"display_name"`
}
type Lang struct {
Name string `json:"name"`
ExecutableName string `json:"executable_name"`
ExecutablePaths []string `json:"executable_paths"`
DepExecutablePath string `json:"dep_executable_path"`
LockPath string `json:"lock_path"`
InstallScript string `json:"install_script"`
InstallStatus string `json:"install_status"`
DepFileName string `json:"dep_file_name"`
InstallDepArgs string `json:"install_dep_cmd"`
Type string `json:"type"`
}
type Dependency struct {
Name string `json:"name"`
Version string `json:"version"`
Description string `json:"description"`
Installed bool `json:"installed"`
}
type PackageJson struct {
Dependencies map[string]string `json:"dependencies"`
}

View File

@@ -1,23 +0,0 @@
package entity
type Release struct {
Name string `json:"name"`
Draft bool `json:"draft"`
PreRelease bool `json:"pre_release"`
PublishedAt string `json:"published_at"`
Body string `json:"body"`
}
type ReleaseSlices []Release
func (r ReleaseSlices) Len() int {
return len(r)
}
func (r ReleaseSlices) Less(i, j int) bool {
return r[i].PublishedAt < r[j].PublishedAt
}
func (r ReleaseSlices) Swap(i, j int) {
r[i], r[j] = r[j], r[i]
}

View File

@@ -1,54 +0,0 @@
package errors
import (
"fmt"
"net/http"
)
type Scope int
const (
ScopeSystem Scope = 1
ScopeBusiness Scope = 2
)
type OPError struct {
HttpCode int
Message string
Code int
Scope Scope
}
func (O OPError) Error() string {
var scope string
switch O.Scope {
case ScopeSystem:
scope = "system"
case ScopeBusiness:
scope = "business"
}
return fmt.Sprintf("%s error: [%d]%s.", scope, O.Code, O.Message)
}
func NewSystemOPError(code int, message string, httpCodes ...int) *OPError {
httpCode := http.StatusOK
if len(httpCodes) > 0 {
httpCode = httpCodes[0]
}
return NewOpError(code, message, ScopeSystem, httpCode)
}
func NewOpError(code int, message string, scope Scope, httpCode int) *OPError {
return &OPError{
Message: message,
Code: code,
Scope: scope,
HttpCode: httpCode,
}
}
func NewBusinessError(code int, message string, httpCodes ...int) *OPError {
httpCode := http.StatusOK
if len(httpCodes) > 0 {
httpCode = httpCodes[0]
}
return NewOpError(code, message, ScopeBusiness, httpCode)
}

View File

@@ -1,43 +1,47 @@
module crawlab
go 1.12
go 1.15
replace (
github.com/crawlab-team/crawlab-core => /Users/marvzhang/projects/crawlab-team/crawlab-core
github.com/crawlab-team/crawlab-db => /Users/marvzhang/projects/crawlab-team/crawlab-db
)
require (
github.com/Masterminds/semver v1.4.2 // indirect
github.com/Masterminds/sprig v2.16.0+incompatible // indirect
github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751
github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd // indirect
github.com/aokoli/goutils v1.0.1 // indirect
github.com/apex/log v1.1.4
github.com/cenkalti/backoff/v4 v4.0.2
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/fsnotify/fsnotify v1.4.9
github.com/apex/log v1.9.0
github.com/cenkalti/backoff/v4 v4.1.0 // indirect
github.com/crawlab-team/crawlab-core v0.0.0-00010101000000-000000000000
github.com/crawlab-team/crawlab-db v0.0.2
github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/gin-gonic/gin v1.6.3
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 // indirect
github.com/go-playground/validator/v10 v10.3.0
github.com/gomodule/redigo v2.0.0+incompatible
github.com/hashicorp/go-sockaddr v1.0.0
github.com/gomodule/redigo v2.0.0+incompatible // indirect
github.com/hashicorp/go-sockaddr v1.0.0 // indirect
github.com/huandu/xstrings v1.2.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/imroc/req v0.3.0
github.com/imroc/req v0.3.0 // indirect
github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 // indirect
github.com/matcornic/hermes v1.2.0
github.com/matcornic/hermes v1.2.0 // indirect
github.com/mattn/go-runewidth v0.0.3 // indirect
github.com/olekukonko/tablewriter v0.0.1 // indirect
github.com/olivere/elastic/v7 v7.0.15
github.com/pkg/errors v0.9.1
github.com/satori/go.uuid v1.2.0
github.com/smartystreets/goconvey v1.6.4
github.com/spf13/viper v1.7.0
github.com/pkg/errors v0.9.1 // indirect
github.com/satori/go.uuid v1.2.0 // indirect
github.com/smartystreets/goconvey v1.6.4 // indirect
github.com/spf13/viper v1.7.1
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect
github.com/swaggo/gin-swagger v1.2.0
github.com/swaggo/swag v1.6.6
go.uber.org/atomic v1.6.0
go.uber.org/atomic v1.6.0 // indirect
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f // indirect
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df
gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df // indirect
gopkg.in/russross/blackfriday.v2 v2.0.0 // indirect
gopkg.in/src-d/go-git.v4 v4.13.1
gopkg.in/yaml.v2 v2.3.0
gopkg.in/src-d/go-git.v4 v4.13.1 // indirect
gopkg.in/yaml.v2 v2.3.0 // indirect
)

View File

@@ -42,7 +42,10 @@ github.com/aokoli/goutils v1.0.1 h1:7fpzNGoJ3VA8qcrm++XEE1QUe0mIwNeLa02Nwq7RDkg=
github.com/aokoli/goutils v1.0.1/go.mod h1:SijmP0QR8LtwsmDs8Yii5Z/S4trXFGFC2oO5g9DP+DQ=
github.com/apex/log v1.1.4 h1:3Zk+boorIQAAGBrHn0JUtAau4ihMamT4WdnfdnXM1zQ=
github.com/apex/log v1.1.4/go.mod h1:AlpoD9aScyQfJDVHmLMEcx4oU6LqzkWp4Mg9GdAcEvQ=
github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0=
github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA=
github.com/apex/logs v0.0.4/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo=
github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo=
github.com/aphistic/golf v0.0.0-20180712155816-02c07f170c5a/go.mod h1:3NqKYiepwy8kCu4PNA+aP7WUV72eXWJeP9/r3/K9aLE=
github.com/aphistic/sweet v0.2.0/go.mod h1:fWDlIh/isSE9n6EPsRmC0det+whmX6dJid3stzu0Xys=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
@@ -59,6 +62,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs=
github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg=
github.com/cenkalti/backoff/v4 v4.1.0 h1:c8LkOFQTzuO0WBM/ae5HdGQuZPfPxp7lqBRwQRm4fSc=
github.com/cenkalti/backoff/v4 v4.1.0/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@@ -137,6 +142,7 @@ github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1
github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
github.com/go-playground/validator/v10 v10.3.0 h1:nZU+7q+yJoFmwvNgv/LnPUkwPal62+b2xXj0AU1Es7o=
github.com/go-playground/validator/v10 v10.3.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -213,6 +219,8 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0=
github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
@@ -232,6 +240,7 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxv
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA=
github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw=
@@ -239,6 +248,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@@ -261,6 +272,7 @@ github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHX
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8BzLR4=
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
@@ -343,6 +355,8 @@ github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.7.0 h1:xVKxvI7ouOI5I+U9s2eeiUfMaWBVoXA3AWskkrqK0VM=
github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg=
github.com/spf13/viper v1.7.1 h1:pM5oEahlgWv/WnHXpgbKz7iLIxRf65tye2Ci+XFK5sk=
github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg=
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
github.com/src-d/gcfg v1.4.0/go.mod h1:p/UMsR43ujA89BJY9duynAwIpvqEujIH/jFlfL7jWoI=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
@@ -356,6 +370,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E=
@@ -366,6 +381,8 @@ github.com/swaggo/swag v1.5.1/go.mod h1:1Bl9F/ZBpVWh22nY0zmYyASPO1lI/zIwRDrpZU+t
github.com/swaggo/swag v1.6.6 h1:3YX5hmuUyCMT/OqqnjW92gULAfHg3hVjpcPm53N64RY=
github.com/swaggo/swag v1.6.6/go.mod h1:xDhTyuFIujYiN3DKWC/H/83xcfHp+UE/IzWWampG7Zc=
github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0=
github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk=
github.com/tj/go-buffer v1.1.0/go.mod h1:iyiJpfFcR2B9sXu7KvjbT9fpM4mOelRSDTbntVj52Uc=
github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0=
github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao=
github.com/tj/go-spin v1.1.0/go.mod h1:Mg1mzmePZm4dva8Qz60H2lHwmJ2loum4VIrLgVnKwh4=
@@ -580,6 +597,8 @@ gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=

View File

@@ -1,22 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe

View File

@@ -1 +0,0 @@
language: go

View File

@@ -1,21 +0,0 @@
Copyright (C) 2012 Rob Figueiredo
All Rights Reserved.
MIT LICENSE
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -1,125 +0,0 @@
[![GoDoc](http://godoc.org/github.com/robfig/cron?status.png)](http://godoc.org/github.com/robfig/cron)
[![Build Status](https://travis-ci.org/robfig/cron.svg?branch=master)](https://travis-ci.org/robfig/cron)
# cron
Cron V3 has been released!
To download the specific tagged release, run:
go get github.com/robfig/cron/v3@v3.0.0
Import it in your program as:
import "github.com/robfig/cron/v3"
It requires Go 1.11 or later due to usage of Go Modules.
Refer to the documentation here:
http://godoc.org/github.com/robfig/cron
The rest of this document describes the the advances in v3 and a list of
breaking changes for users that wish to upgrade from an earlier version.
## Upgrading to v3 (June 2019)
cron v3 is a major upgrade to the library that addresses all outstanding bugs,
feature requests, and rough edges. It is based on a merge of master which
contains various fixes to issues found over the years and the v2 branch which
contains some backwards-incompatible features like the ability to remove cron
jobs. In addition, v3 adds support for Go Modules, cleans up rough edges like
the timezone support, and fixes a number of bugs.
New features:
- Support for Go modules. Callers must now import this library as
`github.com/robfig/cron/v3`, instead of `gopkg.in/...`
- Fixed bugs:
- 0f01e6b parser: fix combining of Dow and Dom (#70)
- dbf3220 adjust times when rolling the clock forward to handle non-existent midnight (#157)
- eeecf15 spec_test.go: ensure an error is returned on 0 increment (#144)
- 70971dc cron.Entries(): update request for snapshot to include a reply channel (#97)
- 1cba5e6 cron: fix: removing a job causes the next scheduled job to run too late (#206)
- Standard cron spec parsing by default (first field is "minute"), with an easy
way to opt into the seconds field (quartz-compatible). Although, note that the
year field (optional in Quartz) is not supported.
- Extensible, key/value logging via an interface that complies with
the https://github.com/go-logr/logr project.
- The new Chain & JobWrapper types allow you to install "interceptors" to add
cross-cutting behavior like the following:
- Recover any panics from jobs
- Delay a job's execution if the previous run hasn't completed yet
- Skip a job's execution if the previous run hasn't completed yet
- Log each job's invocations
- Notification when jobs are completed
It is backwards incompatible with both v1 and v2. These updates are required:
- The v1 branch accepted an optional seconds field at the beginning of the cron
spec. This is non-standard and has led to a lot of confusion. The new default
parser conforms to the standard as described by [the Cron wikipedia page].
UPDATING: To retain the old behavior, construct your Cron with a custom
parser:
// Seconds field, required
cron.New(cron.WithSeconds())
// Seconds field, optional
cron.New(
cron.WithParser(
cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor))
- The Cron type now accepts functional options on construction rather than the
previous ad-hoc behavior modification mechanisms (setting a field, calling a setter).
UPDATING: Code that sets Cron.ErrorLogger or calls Cron.SetLocation must be
updated to provide those values on construction.
- CRON_TZ is now the recommended way to specify the timezone of a single
schedule, which is sanctioned by the specification. The legacy "TZ=" prefix
will continue to be supported since it is unambiguous and easy to do so.
UPDATING: No update is required.
- By default, cron will no longer recover panics in jobs that it runs.
Recovering can be surprising (see issue #192) and seems to be at odds with
typical behavior of libraries. Relatedly, the `cron.WithPanicLogger` option
has been removed to accommodate the more general JobWrapper type.
UPDATING: To opt into panic recovery and configure the panic logger:
cron.New(cron.WithChain(
cron.Recover(logger), // or use cron.DefaultLogger
))
- In adding support for https://github.com/go-logr/logr, `cron.WithVerboseLogger` was
removed, since it is duplicative with the leveled logging.
UPDATING: Callers should use `WithLogger` and specify a logger that does not
discard `Info` logs. For convenience, one is provided that wraps `*log.Logger`:
cron.New(
cron.WithLogger(cron.VerbosePrintfLogger(logger)))
### Background - Cron spec format
There are two cron spec formats in common usage:
- The "standard" cron format, described on [the Cron wikipedia page] and used by
the cron Linux system utility.
- The cron format used by [the Quartz Scheduler], commonly used for scheduled
jobs in Java software
[the Cron wikipedia page]: https://en.wikipedia.org/wiki/Cron
[the Quartz Scheduler]: http://www.quartz-scheduler.org/documentation/quartz-2.x/tutorials/crontrigger.html
The original version of this package included an optional "seconds" field, which
made it incompatible with both of these formats. Now, the "standard" format is
the default format accepted, and the Quartz format is opt-in.

View File

@@ -1,92 +0,0 @@
package cron
import (
"fmt"
"runtime"
"sync"
"time"
)
// JobWrapper decorates the given Job with some behavior.
type JobWrapper func(Job) Job
// Chain is a sequence of JobWrappers that decorates submitted jobs with
// cross-cutting behaviors like logging or synchronization.
type Chain struct {
wrappers []JobWrapper
}
// NewChain returns a Chain consisting of the given JobWrappers.
func NewChain(c ...JobWrapper) Chain {
return Chain{c}
}
// Then decorates the given job with all JobWrappers in the chain.
//
// This:
// NewChain(m1, m2, m3).Then(job)
// is equivalent to:
// m1(m2(m3(job)))
func (c Chain) Then(j Job) Job {
for i := range c.wrappers {
j = c.wrappers[len(c.wrappers)-i-1](j)
}
return j
}
// Recover panics in wrapped jobs and log them with the provided logger.
func Recover(logger Logger) JobWrapper {
return func(j Job) Job {
return FuncJob(func() {
defer func() {
if r := recover(); r != nil {
const size = 64 << 10
buf := make([]byte, size)
buf = buf[:runtime.Stack(buf, false)]
err, ok := r.(error)
if !ok {
err = fmt.Errorf("%v", r)
}
logger.Error(err, "panic", "stack", "...\n"+string(buf))
}
}()
j.Run()
})
}
}
// DelayIfStillRunning serializes jobs, delaying subsequent runs until the
// previous one is complete. Jobs running after a delay of more than a minute
// have the delay logged at Info.
func DelayIfStillRunning(logger Logger) JobWrapper {
return func(j Job) Job {
var mu sync.Mutex
return FuncJob(func() {
start := time.Now()
mu.Lock()
defer mu.Unlock()
if dur := time.Since(start); dur > time.Minute {
logger.Info("delay", "duration", dur)
}
j.Run()
})
}
}
// SkipIfStillRunning skips an invocation of the Job if a previous invocation is
// still running. It logs skips to the given logger at Info level.
func SkipIfStillRunning(logger Logger) JobWrapper {
var ch = make(chan struct{}, 1)
ch <- struct{}{}
return func(j Job) Job {
return FuncJob(func() {
select {
case v := <-ch:
j.Run()
ch <- v
default:
logger.Info("skip")
}
})
}
}

View File

@@ -1,221 +0,0 @@
package cron
import (
"io/ioutil"
"log"
"reflect"
"sync"
"testing"
"time"
)
func appendingJob(slice *[]int, value int) Job {
var m sync.Mutex
return FuncJob(func() {
m.Lock()
*slice = append(*slice, value)
m.Unlock()
})
}
func appendingWrapper(slice *[]int, value int) JobWrapper {
return func(j Job) Job {
return FuncJob(func() {
appendingJob(slice, value).Run()
j.Run()
})
}
}
func TestChain(t *testing.T) {
var nums []int
var (
append1 = appendingWrapper(&nums, 1)
append2 = appendingWrapper(&nums, 2)
append3 = appendingWrapper(&nums, 3)
append4 = appendingJob(&nums, 4)
)
NewChain(append1, append2, append3).Then(append4).Run()
if !reflect.DeepEqual(nums, []int{1, 2, 3, 4}) {
t.Error("unexpected order of calls:", nums)
}
}
func TestChainRecover(t *testing.T) {
panickingJob := FuncJob(func() {
panic("panickingJob panics")
})
t.Run("panic exits job by default", func(t *testing.T) {
defer func() {
if err := recover(); err == nil {
t.Errorf("panic expected, but none received")
}
}()
NewChain().Then(panickingJob).
Run()
})
t.Run("Recovering JobWrapper recovers", func(t *testing.T) {
NewChain(Recover(PrintfLogger(log.New(ioutil.Discard, "", 0)))).
Then(panickingJob).
Run()
})
t.Run("composed with the *IfStillRunning wrappers", func(t *testing.T) {
NewChain(Recover(PrintfLogger(log.New(ioutil.Discard, "", 0)))).
Then(panickingJob).
Run()
})
}
type countJob struct {
m sync.Mutex
started int
done int
delay time.Duration
}
func (j *countJob) Run() {
j.m.Lock()
j.started++
j.m.Unlock()
time.Sleep(j.delay)
j.m.Lock()
j.done++
j.m.Unlock()
}
func (j *countJob) Started() int {
defer j.m.Unlock()
j.m.Lock()
return j.started
}
func (j *countJob) Done() int {
defer j.m.Unlock()
j.m.Lock()
return j.done
}
func TestChainDelayIfStillRunning(t *testing.T) {
t.Run("runs immediately", func(t *testing.T) {
var j countJob
wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j)
go wrappedJob.Run()
time.Sleep(2 * time.Millisecond) // Give the job 2ms to complete.
if c := j.Done(); c != 1 {
t.Errorf("expected job run once, immediately, got %d", c)
}
})
t.Run("second run immediate if first done", func(t *testing.T) {
var j countJob
wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j)
go func() {
go wrappedJob.Run()
time.Sleep(time.Millisecond)
go wrappedJob.Run()
}()
time.Sleep(3 * time.Millisecond) // Give both jobs 3ms to complete.
if c := j.Done(); c != 2 {
t.Errorf("expected job run twice, immediately, got %d", c)
}
})
t.Run("second run delayed if first not done", func(t *testing.T) {
var j countJob
j.delay = 10 * time.Millisecond
wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j)
go func() {
go wrappedJob.Run()
time.Sleep(time.Millisecond)
go wrappedJob.Run()
}()
// After 5ms, the first job is still in progress, and the second job was
// run but should be waiting for it to finish.
time.Sleep(5 * time.Millisecond)
started, done := j.Started(), j.Done()
if started != 1 || done != 0 {
t.Error("expected first job started, but not finished, got", started, done)
}
// Verify that the second job completes.
time.Sleep(25 * time.Millisecond)
started, done = j.Started(), j.Done()
if started != 2 || done != 2 {
t.Error("expected both jobs done, got", started, done)
}
})
}
func TestChainSkipIfStillRunning(t *testing.T) {
t.Run("runs immediately", func(t *testing.T) {
var j countJob
wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j)
go wrappedJob.Run()
time.Sleep(2 * time.Millisecond) // Give the job 2ms to complete.
if c := j.Done(); c != 1 {
t.Errorf("expected job run once, immediately, got %d", c)
}
})
t.Run("second run immediate if first done", func(t *testing.T) {
var j countJob
wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j)
go func() {
go wrappedJob.Run()
time.Sleep(time.Millisecond)
go wrappedJob.Run()
}()
time.Sleep(3 * time.Millisecond) // Give both jobs 3ms to complete.
if c := j.Done(); c != 2 {
t.Errorf("expected job run twice, immediately, got %d", c)
}
})
t.Run("second run skipped if first not done", func(t *testing.T) {
var j countJob
j.delay = 10 * time.Millisecond
wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j)
go func() {
go wrappedJob.Run()
time.Sleep(time.Millisecond)
go wrappedJob.Run()
}()
// After 5ms, the first job is still in progress, and the second job was
// aleady skipped.
time.Sleep(5 * time.Millisecond)
started, done := j.Started(), j.Done()
if started != 1 || done != 0 {
t.Error("expected first job started, but not finished, got", started, done)
}
// Verify that the first job completes and second does not run.
time.Sleep(25 * time.Millisecond)
started, done = j.Started(), j.Done()
if started != 1 || done != 1 {
t.Error("expected second job skipped, got", started, done)
}
})
t.Run("skip 10 jobs on rapid fire", func(t *testing.T) {
var j countJob
j.delay = 10 * time.Millisecond
wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j)
for i := 0; i < 11; i++ {
go wrappedJob.Run()
}
time.Sleep(200 * time.Millisecond)
done := j.Done()
if done != 1 {
t.Error("expected 1 jobs executed, 10 jobs dropped, got", done)
}
})
}

View File

@@ -1,27 +0,0 @@
package cron
import "time"
// ConstantDelaySchedule represents a simple recurring duty cycle, e.g. "Every 5 minutes".
// It does not support jobs more frequent than once a second.
type ConstantDelaySchedule struct {
Delay time.Duration
}
// Every returns a crontab Schedule that activates once every duration.
// Delays of less than a second are not supported (will round up to 1 second).
// Any fields less than a Second are truncated.
func Every(duration time.Duration) ConstantDelaySchedule {
if duration < time.Second {
duration = time.Second
}
return ConstantDelaySchedule{
Delay: duration - time.Duration(duration.Nanoseconds())%time.Second,
}
}
// Next returns the next time this should be run.
// This rounds so that the next activation time will be on the second.
func (schedule ConstantDelaySchedule) Next(t time.Time) time.Time {
return t.Add(schedule.Delay - time.Duration(t.Nanosecond())*time.Nanosecond)
}

View File

@@ -1,54 +0,0 @@
package cron
import (
"testing"
"time"
)
func TestConstantDelayNext(t *testing.T) {
tests := []struct {
time string
delay time.Duration
expected string
}{
// Simple cases
{"Mon Jul 9 14:45 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"},
{"Mon Jul 9 14:59 2012", 15 * time.Minute, "Mon Jul 9 15:14 2012"},
{"Mon Jul 9 14:59:59 2012", 15 * time.Minute, "Mon Jul 9 15:14:59 2012"},
// Wrap around hours
{"Mon Jul 9 15:45 2012", 35 * time.Minute, "Mon Jul 9 16:20 2012"},
// Wrap around days
{"Mon Jul 9 23:46 2012", 14 * time.Minute, "Tue Jul 10 00:00 2012"},
{"Mon Jul 9 23:45 2012", 35 * time.Minute, "Tue Jul 10 00:20 2012"},
{"Mon Jul 9 23:35:51 2012", 44*time.Minute + 24*time.Second, "Tue Jul 10 00:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", 25*time.Hour + 44*time.Minute + 24*time.Second, "Thu Jul 11 01:20:15 2012"},
// Wrap around months
{"Mon Jul 9 23:35 2012", 91*24*time.Hour + 25*time.Minute, "Thu Oct 9 00:00 2012"},
// Wrap around minute, hour, day, month, and year
{"Mon Dec 31 23:59:45 2012", 15 * time.Second, "Tue Jan 1 00:00:00 2013"},
// Round to nearest second on the delay
{"Mon Jul 9 14:45 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"},
// Round up to 1 second if the duration is less.
{"Mon Jul 9 14:45:00 2012", 15 * time.Millisecond, "Mon Jul 9 14:45:01 2012"},
// Round to nearest second when calculating the next time.
{"Mon Jul 9 14:45:00.005 2012", 15 * time.Minute, "Mon Jul 9 15:00 2012"},
// Round to nearest second for both.
{"Mon Jul 9 14:45:00.005 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"},
}
for _, c := range tests {
actual := Every(c.delay).Next(getTime(c.time))
expected := getTime(c.expected)
if actual != expected {
t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.delay, expected, actual)
}
}
}

View File

@@ -1,350 +0,0 @@
package cron
import (
"context"
"sort"
"sync"
"time"
)
// Cron keeps track of any number of entries, invoking the associated func as
// specified by the schedule. It may be started, stopped, and the entries may
// be inspected while running.
type Cron struct {
entries []*Entry
chain Chain
stop chan struct{}
add chan *Entry
remove chan EntryID
snapshot chan chan []Entry
running bool
logger Logger
runningMu sync.Mutex
location *time.Location
parser Parser
nextID EntryID
jobWaiter sync.WaitGroup
}
// Job is an interface for submitted cron jobs.
type Job interface {
Run()
}
// Schedule describes a job's duty cycle.
type Schedule interface {
// Next returns the next activation time, later than the given time.
// Next is invoked initially, and then each time the job is run.
Next(time.Time) time.Time
}
// EntryID identifies an entry within a Cron instance
type EntryID int
// Entry consists of a schedule and the func to execute on that schedule.
type Entry struct {
// ID is the cron-assigned ID of this entry, which may be used to look up a
// snapshot or remove it.
ID EntryID
// Schedule on which this job should be run.
Schedule Schedule
// Next time the job will run, or the zero time if Cron has not been
// started or this entry's schedule is unsatisfiable
Next time.Time
// Prev is the last time this job was run, or the zero time if never.
Prev time.Time
// WrappedJob is the thing to run when the Schedule is activated.
WrappedJob Job
// Job is the thing that was submitted to cron.
// It is kept around so that user code that needs to get at the job later,
// e.g. via Entries() can do so.
Job Job
}
// Valid returns true if this is not the zero entry.
func (e Entry) Valid() bool { return e.ID != 0 }
// byTime is a wrapper for sorting the entry array by time
// (with zero time at the end).
type byTime []*Entry
func (s byTime) Len() int { return len(s) }
func (s byTime) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s byTime) Less(i, j int) bool {
// Two zero times should return false.
// Otherwise, zero is "greater" than any other time.
// (To sort it at the end of the list.)
if s[i].Next.IsZero() {
return false
}
if s[j].Next.IsZero() {
return true
}
return s[i].Next.Before(s[j].Next)
}
// New returns a new Cron job runner, modified by the given options.
//
// Available Settings
//
// Time Zone
// Description: The time zone in which schedules are interpreted
// Default: time.Local
//
// Parser
// Description: Parser converts cron spec strings into cron.Schedules.
// Default: Accepts this spec: https://en.wikipedia.org/wiki/Cron
//
// Chain
// Description: Wrap submitted jobs to customize behavior.
// Default: A chain that recovers panics and logs them to stderr.
//
// See "cron.With*" to modify the default behavior.
func New(opts ...Option) *Cron {
c := &Cron{
entries: nil,
chain: NewChain(),
add: make(chan *Entry),
stop: make(chan struct{}),
snapshot: make(chan chan []Entry),
remove: make(chan EntryID),
running: false,
runningMu: sync.Mutex{},
logger: DefaultLogger,
location: time.Local,
parser: standardParser,
}
for _, opt := range opts {
opt(c)
}
return c
}
// FuncJob is a wrapper that turns a func() into a cron.Job
type FuncJob func()
func (f FuncJob) Run() { f() }
// AddFunc adds a func to the Cron to be run on the given schedule.
// The spec is parsed using the time zone of this Cron instance as the default.
// An opaque ID is returned that can be used to later remove it.
func (c *Cron) AddFunc(spec string, cmd func()) (EntryID, error) {
return c.AddJob(spec, FuncJob(cmd))
}
// AddJob adds a Job to the Cron to be run on the given schedule.
// The spec is parsed using the time zone of this Cron instance as the default.
// An opaque ID is returned that can be used to later remove it.
func (c *Cron) AddJob(spec string, cmd Job) (EntryID, error) {
schedule, err := c.parser.Parse(spec)
if err != nil {
return 0, err
}
return c.Schedule(schedule, cmd), nil
}
// Schedule adds a Job to the Cron to be run on the given schedule.
// The job is wrapped with the configured Chain.
func (c *Cron) Schedule(schedule Schedule, cmd Job) EntryID {
c.runningMu.Lock()
defer c.runningMu.Unlock()
c.nextID++
entry := &Entry{
ID: c.nextID,
Schedule: schedule,
WrappedJob: c.chain.Then(cmd),
Job: cmd,
}
if !c.running {
c.entries = append(c.entries, entry)
} else {
c.add <- entry
}
return entry.ID
}
// Entries returns a snapshot of the cron entries.
func (c *Cron) Entries() []Entry {
c.runningMu.Lock()
defer c.runningMu.Unlock()
if c.running {
replyChan := make(chan []Entry, 1)
c.snapshot <- replyChan
return <-replyChan
}
return c.entrySnapshot()
}
// Location gets the time zone location
func (c *Cron) Location() *time.Location {
return c.location
}
// Entry returns a snapshot of the given entry, or nil if it couldn't be found.
func (c *Cron) Entry(id EntryID) Entry {
for _, entry := range c.Entries() {
if id == entry.ID {
return entry
}
}
return Entry{}
}
// Remove an entry from being run in the future.
func (c *Cron) Remove(id EntryID) {
c.runningMu.Lock()
defer c.runningMu.Unlock()
if c.running {
c.remove <- id
} else {
c.removeEntry(id)
}
}
// Start the cron scheduler in its own goroutine, or no-op if already started.
func (c *Cron) Start() {
c.runningMu.Lock()
defer c.runningMu.Unlock()
if c.running {
return
}
c.running = true
go c.run()
}
// Run the cron scheduler, or no-op if already running.
func (c *Cron) Run() {
c.runningMu.Lock()
if c.running {
c.runningMu.Unlock()
return
}
c.running = true
c.runningMu.Unlock()
c.run()
}
// run the scheduler.. this is private just due to the need to synchronize
// access to the 'running' state variable.
func (c *Cron) run() {
c.logger.Info("start")
// Figure out the next activation times for each entry.
now := c.now()
for _, entry := range c.entries {
entry.Next = entry.Schedule.Next(now)
c.logger.Info("schedule", "now", now, "entry", entry.ID, "next", entry.Next)
}
for {
// Determine the next entry to run.
sort.Sort(byTime(c.entries))
var timer *time.Timer
if len(c.entries) == 0 || c.entries[0].Next.IsZero() {
// If there are no entries yet, just sleep - it still handles new entries
// and stop requests.
timer = time.NewTimer(100000 * time.Hour)
} else {
timer = time.NewTimer(c.entries[0].Next.Sub(now))
}
for {
select {
case now = <-timer.C:
now = now.In(c.location)
c.logger.Info("wake", "now", now)
// Run every entry whose next time was less than now
for _, e := range c.entries {
if e.Next.After(now) || e.Next.IsZero() {
break
}
c.startJob(e.WrappedJob)
e.Prev = e.Next
e.Next = e.Schedule.Next(now)
c.logger.Info("run", "now", now, "entry", e.ID, "next", e.Next)
}
case newEntry := <-c.add:
timer.Stop()
now = c.now()
newEntry.Next = newEntry.Schedule.Next(now)
c.entries = append(c.entries, newEntry)
c.logger.Info("added", "now", now, "entry", newEntry.ID, "next", newEntry.Next)
case replyChan := <-c.snapshot:
replyChan <- c.entrySnapshot()
continue
case <-c.stop:
timer.Stop()
c.logger.Info("stop")
return
case id := <-c.remove:
timer.Stop()
now = c.now()
c.removeEntry(id)
c.logger.Info("removed", "entry", id)
}
break
}
}
}
// startJob runs the given job in a new goroutine.
func (c *Cron) startJob(j Job) {
c.jobWaiter.Add(1)
go func() {
defer c.jobWaiter.Done()
j.Run()
}()
}
// now returns current time in c location
func (c *Cron) now() time.Time {
return time.Now().In(c.location)
}
// Stop stops the cron scheduler if it is running; otherwise it does nothing.
// A context is returned so the caller can wait for running jobs to complete.
func (c *Cron) Stop() context.Context {
c.runningMu.Lock()
defer c.runningMu.Unlock()
if c.running {
c.stop <- struct{}{}
c.running = false
}
ctx, cancel := context.WithCancel(context.Background())
go func() {
c.jobWaiter.Wait()
cancel()
}()
return ctx
}
// entrySnapshot returns a copy of the current cron entry list.
func (c *Cron) entrySnapshot() []Entry {
var entries = make([]Entry, len(c.entries))
for i, e := range c.entries {
entries[i] = *e
}
return entries
}
func (c *Cron) removeEntry(id EntryID) {
var entries []*Entry
for _, e := range c.entries {
if e.ID != id {
entries = append(entries, e)
}
}
c.entries = entries
}

View File

@@ -1,699 +0,0 @@
package cron
import (
"bytes"
"fmt"
"log"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// Many tests schedule a job for every second, and then wait at most a second
// for it to run. This amount is just slightly larger than 1 second to
// compensate for a few milliseconds of runtime.
const OneSecond = 1*time.Second + 50*time.Millisecond
type syncWriter struct {
wr bytes.Buffer
m sync.Mutex
}
func (sw *syncWriter) Write(data []byte) (n int, err error) {
sw.m.Lock()
n, err = sw.wr.Write(data)
sw.m.Unlock()
return
}
func (sw *syncWriter) String() string {
sw.m.Lock()
defer sw.m.Unlock()
return sw.wr.String()
}
func newBufLogger(sw *syncWriter) Logger {
return PrintfLogger(log.New(sw, "", log.LstdFlags))
}
func TestFuncPanicRecovery(t *testing.T) {
var buf syncWriter
cron := New(WithParser(secondParser),
WithChain(Recover(newBufLogger(&buf))))
cron.Start()
defer cron.Stop()
_, _ = cron.AddFunc("* * * * * ?", func() {
panic("YOLO")
})
<-time.After(OneSecond)
if !strings.Contains(buf.String(), "YOLO") {
t.Error("expected a panic to be logged, got none")
}
}
type DummyJob struct{}
func (d DummyJob) Run() {
panic("YOLO")
}
func TestJobPanicRecovery(t *testing.T) {
var job DummyJob
var buf syncWriter
cron := New(WithParser(secondParser),
WithChain(Recover(newBufLogger(&buf))))
cron.Start()
defer cron.Stop()
_, _ = cron.AddJob("* * * * * ?", job)
select {
case <-time.After(OneSecond):
if !strings.Contains(buf.String(), "YOLO") {
t.Error("expected a panic to be logged, got none")
}
return
}
}
// Start and stop cron with no entries.
func TestNoEntries(t *testing.T) {
cron := newWithSeconds()
cron.Start()
select {
case <-time.After(OneSecond):
t.Fatal("expected cron will be stopped immediately")
case <-stop(cron):
}
}
// Start, stop, then add an entry. Verify entry doesn't run.
func TestStopCausesJobsToNotRun(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
cron.Start()
cron.Stop()
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
select {
case <-time.After(OneSecond):
// No job ran!
case <-wait(wg):
t.Fatal("expected stopped cron does not run any job")
}
}
// Add a job, start cron, expect it runs.
func TestAddBeforeRunning(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Start()
defer cron.Stop()
// Give cron 2 seconds to run our job (which is always activated).
select {
case <-time.After(OneSecond):
t.Fatal("expected job runs")
case <-wait(wg):
}
}
// Start cron, add a job, expect it runs.
func TestAddWhileRunning(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
cron.Start()
defer cron.Stop()
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
select {
case <-time.After(OneSecond):
t.Fatal("expected job runs")
case <-wait(wg):
}
}
// Test for #34. Adding a job after calling start results in multiple job invocations
func TestAddWhileRunningWithDelay(t *testing.T) {
cron := newWithSeconds()
cron.Start()
defer cron.Stop()
time.Sleep(5 * time.Second)
var calls int64
_, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) })
<-time.After(OneSecond)
if atomic.LoadInt64(&calls) != 1 {
t.Errorf("called %d times, expected 1\n", calls)
}
}
// Add a job, remove a job, start cron, expect nothing runs.
func TestRemoveBeforeRunning(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
id, _ := cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Remove(id)
cron.Start()
defer cron.Stop()
select {
case <-time.After(OneSecond):
// Success, shouldn't run
case <-wait(wg):
t.FailNow()
}
}
// Start cron, add a job, remove it, expect it doesn't run.
func TestRemoveWhileRunning(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
cron.Start()
defer cron.Stop()
id, _ := cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Remove(id)
select {
case <-time.After(OneSecond):
case <-wait(wg):
t.FailNow()
}
}
// Test timing with Entries.
func TestSnapshotEntries(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := New()
_, _ = cron.AddFunc("@every 2s", func() { wg.Done() })
cron.Start()
defer cron.Stop()
// Cron should fire in 2 seconds. After 1 second, call Entries.
select {
case <-time.After(OneSecond):
cron.Entries()
}
// Even though Entries was called, the cron should fire at the 2 second mark.
select {
case <-time.After(OneSecond):
t.Error("expected job runs at 2 second mark")
case <-wait(wg):
}
}
// Test that the entries are correctly sorted.
// Add a bunch of long-in-the-future entries, and an immediate entry, and ensure
// that the immediate entry runs immediately.
// Also: Test that multiple jobs run in the same instant.
func TestMultipleEntries(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(2)
cron := newWithSeconds()
_, _ = cron.AddFunc("0 0 0 1 1 ?", func() {})
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
id1, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() })
id2, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() })
_, _ = cron.AddFunc("0 0 0 31 12 ?", func() {})
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Remove(id1)
cron.Start()
cron.Remove(id2)
defer cron.Stop()
select {
case <-time.After(OneSecond):
t.Error("expected job run in proper order")
case <-wait(wg):
}
}
// Test running the same job twice.
func TestRunningJobTwice(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(2)
cron := newWithSeconds()
_, _ = cron.AddFunc("0 0 0 1 1 ?", func() {})
_, _ = cron.AddFunc("0 0 0 31 12 ?", func() {})
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Start()
defer cron.Stop()
select {
case <-time.After(2 * OneSecond):
t.Error("expected job fires 2 times")
case <-wait(wg):
}
}
func TestRunningMultipleSchedules(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(2)
cron := newWithSeconds()
_, _ = cron.AddFunc("0 0 0 1 1 ?", func() {})
_, _ = cron.AddFunc("0 0 0 31 12 ?", func() {})
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
cron.Schedule(Every(time.Minute), FuncJob(func() {}))
cron.Schedule(Every(time.Second), FuncJob(func() { wg.Done() }))
cron.Schedule(Every(time.Hour), FuncJob(func() {}))
cron.Start()
defer cron.Stop()
select {
case <-time.After(2 * OneSecond):
t.Error("expected job fires 2 times")
case <-wait(wg):
}
}
// Test that the cron is run in the local time zone (as opposed to UTC).
func TestLocalTimezone(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(2)
now := time.Now()
// FIX: Issue #205
// This calculation doesn't work in seconds 58 or 59.
// Take the easy way out and sleep.
if now.Second() >= 58 {
time.Sleep(2 * time.Second)
now = time.Now()
}
spec := fmt.Sprintf("%d,%d %d %d %d %d ?",
now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month())
cron := newWithSeconds()
_, _ = cron.AddFunc(spec, func() { wg.Done() })
cron.Start()
defer cron.Stop()
select {
case <-time.After(OneSecond * 2):
t.Error("expected job fires 2 times")
case <-wait(wg):
}
}
// Test that the cron is run in the given time zone (as opposed to local).
func TestNonLocalTimezone(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(2)
loc, err := time.LoadLocation("Atlantic/Cape_Verde")
if err != nil {
fmt.Printf("Failed to load time zone Atlantic/Cape_Verde: %+v", err)
t.Fail()
}
now := time.Now().In(loc)
// FIX: Issue #205
// This calculation doesn't work in seconds 58 or 59.
// Take the easy way out and sleep.
if now.Second() >= 58 {
time.Sleep(2 * time.Second)
now = time.Now().In(loc)
}
spec := fmt.Sprintf("%d,%d %d %d %d %d ?",
now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month())
cron := New(WithLocation(loc), WithParser(secondParser))
_, _ = cron.AddFunc(spec, func() { wg.Done() })
cron.Start()
defer cron.Stop()
select {
case <-time.After(OneSecond * 2):
t.Error("expected job fires 2 times")
case <-wait(wg):
}
}
// Test that calling stop before start silently returns without
// blocking the stop channel.
func TestStopWithoutStart(t *testing.T) {
cron := New()
cron.Stop()
}
type testJob struct {
wg *sync.WaitGroup
name string
}
func (t testJob) Run() {
t.wg.Done()
}
// Test that adding an invalid job spec returns an error
func TestInvalidJobSpec(t *testing.T) {
cron := New()
_, err := cron.AddJob("this will not parse", nil)
if err == nil {
t.Errorf("expected an error with invalid spec, got nil")
}
}
// Test blocking run method behaves as Start()
func TestBlockingRun(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() })
var unblockChan = make(chan struct{})
go func() {
cron.Run()
close(unblockChan)
}()
defer cron.Stop()
select {
case <-time.After(OneSecond):
t.Error("expected job fires")
case <-unblockChan:
t.Error("expected that Run() blocks")
case <-wait(wg):
}
}
// Test that double-running is a no-op
func TestStartNoop(t *testing.T) {
var tickChan = make(chan struct{}, 2)
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * ?", func() {
tickChan <- struct{}{}
})
cron.Start()
defer cron.Stop()
// Wait for the first firing to ensure the runner is going
<-tickChan
cron.Start()
<-tickChan
// Fail if this job fires again in a short period, indicating a double-run
select {
case <-time.After(time.Millisecond):
case <-tickChan:
t.Error("expected job fires exactly twice")
}
}
// Simple test using Runnables.
func TestJob(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
cron := newWithSeconds()
_, _ = cron.AddJob("0 0 0 30 Feb ?", testJob{wg, "job0"})
_, _ = cron.AddJob("0 0 0 1 1 ?", testJob{wg, "job1"})
job2, _ := cron.AddJob("* * * * * ?", testJob{wg, "job2"})
_, _ = cron.AddJob("1 0 0 1 1 ?", testJob{wg, "job3"})
cron.Schedule(Every(5*time.Second+5*time.Nanosecond), testJob{wg, "job4"})
job5 := cron.Schedule(Every(5*time.Minute), testJob{wg, "job5"})
// Test getting an Entry pre-Start.
if actualName := cron.Entry(job2).Job.(testJob).name; actualName != "job2" {
t.Error("wrong job retrieved:", actualName)
}
if actualName := cron.Entry(job5).Job.(testJob).name; actualName != "job5" {
t.Error("wrong job retrieved:", actualName)
}
cron.Start()
defer cron.Stop()
select {
case <-time.After(OneSecond):
t.FailNow()
case <-wait(wg):
}
// Ensure the entries are in the right order.
expecteds := []string{"job2", "job4", "job5", "job1", "job3", "job0"}
var actuals = make([]string, 0, len(cron.Entries()))
for _, entry := range cron.Entries() {
actuals = append(actuals, entry.Job.(testJob).name)
}
for i, expected := range expecteds {
if actuals[i] != expected {
t.Fatalf("Jobs not in the right order. (expected) %s != %s (actual)", expecteds, actuals)
}
}
// Test getting Entries.
if actualName := cron.Entry(job2).Job.(testJob).name; actualName != "job2" {
t.Error("wrong job retrieved:", actualName)
}
if actualName := cron.Entry(job5).Job.(testJob).name; actualName != "job5" {
t.Error("wrong job retrieved:", actualName)
}
}
// Issue #206
// Ensure that the next run of a job after removing an entry is accurate.
func TestScheduleAfterRemoval(t *testing.T) {
var wg1 sync.WaitGroup
var wg2 sync.WaitGroup
wg1.Add(1)
wg2.Add(1)
// The first time this job is run, set a timer and remove the other job
// 750ms later. Correct behavior would be to still run the job again in
// 250ms, but the bug would cause it to run instead 1s later.
var calls int
var mu sync.Mutex
cron := newWithSeconds()
hourJob := cron.Schedule(Every(time.Hour), FuncJob(func() {}))
cron.Schedule(Every(time.Second), FuncJob(func() {
mu.Lock()
defer mu.Unlock()
switch calls {
case 0:
wg1.Done()
calls++
case 1:
time.Sleep(750 * time.Millisecond)
cron.Remove(hourJob)
calls++
case 2:
calls++
wg2.Done()
case 3:
panic("unexpected 3rd call")
}
}))
cron.Start()
defer cron.Stop()
// the first run might be any length of time 0 - 1s, since the schedule
// rounds to the second. wait for the first run to true up.
wg1.Wait()
select {
case <-time.After(2 * OneSecond):
t.Error("expected job fires 2 times")
case <-wait(&wg2):
}
}
type ZeroSchedule struct{}
func (*ZeroSchedule) Next(time.Time) time.Time {
return time.Time{}
}
// Tests that job without time does not run
func TestJobWithZeroTimeDoesNotRun(t *testing.T) {
cron := newWithSeconds()
var calls int64
_, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) })
cron.Schedule(new(ZeroSchedule), FuncJob(func() { t.Error("expected zero task will not run") }))
cron.Start()
defer cron.Stop()
<-time.After(OneSecond)
if atomic.LoadInt64(&calls) != 1 {
t.Errorf("called %d times, expected 1\n", calls)
}
}
func TestStopAndWait(t *testing.T) {
t.Run("nothing running, returns immediately", func(t *testing.T) {
cron := newWithSeconds()
cron.Start()
ctx := cron.Stop()
select {
case <-ctx.Done():
case <-time.After(time.Millisecond):
t.Error("context was not done immediately")
}
})
t.Run("repeated calls to Stop", func(t *testing.T) {
cron := newWithSeconds()
cron.Start()
_ = cron.Stop()
time.Sleep(time.Millisecond)
ctx := cron.Stop()
select {
case <-ctx.Done():
case <-time.After(time.Millisecond):
t.Error("context was not done immediately")
}
})
t.Run("a couple fast jobs added, still returns immediately", func(t *testing.T) {
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * *", func() {})
cron.Start()
_, _ = cron.AddFunc("* * * * * *", func() {})
_, _ = cron.AddFunc("* * * * * *", func() {})
_, _ = cron.AddFunc("* * * * * *", func() {})
time.Sleep(time.Second)
ctx := cron.Stop()
select {
case <-ctx.Done():
case <-time.After(time.Millisecond):
t.Error("context was not done immediately")
}
})
t.Run("a couple fast jobs and a slow job added, waits for slow job", func(t *testing.T) {
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * *", func() {})
cron.Start()
_, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) })
_, _ = cron.AddFunc("* * * * * *", func() {})
time.Sleep(time.Second)
ctx := cron.Stop()
// Verify that it is not done for at least 750ms
select {
case <-ctx.Done():
t.Error("context was done too quickly immediately")
case <-time.After(750 * time.Millisecond):
// expected, because the job sleeping for 1 second is still running
}
// Verify that it IS done in the next 500ms (giving 250ms buffer)
select {
case <-ctx.Done():
// expected
case <-time.After(1500 * time.Millisecond):
t.Error("context not done after job should have completed")
}
})
t.Run("repeated calls to stop, waiting for completion and after", func(t *testing.T) {
cron := newWithSeconds()
_, _ = cron.AddFunc("* * * * * *", func() {})
_, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) })
cron.Start()
_, _ = cron.AddFunc("* * * * * *", func() {})
time.Sleep(time.Second)
ctx := cron.Stop()
ctx2 := cron.Stop()
// Verify that it is not done for at least 1500ms
select {
case <-ctx.Done():
t.Error("context was done too quickly immediately")
case <-ctx2.Done():
t.Error("context2 was done too quickly immediately")
case <-time.After(1500 * time.Millisecond):
// expected, because the job sleeping for 2 seconds is still running
}
// Verify that it IS done in the next 1s (giving 500ms buffer)
select {
case <-ctx.Done():
// expected
case <-time.After(time.Second):
t.Error("context not done after job should have completed")
}
// Verify that ctx2 is also done.
select {
case <-ctx2.Done():
// expected
case <-time.After(time.Millisecond):
t.Error("context2 not done even though context1 is")
}
// Verify that a new context retrieved from stop is immediately done.
ctx3 := cron.Stop()
select {
case <-ctx3.Done():
// expected
case <-time.After(time.Millisecond):
t.Error("context not done even when cron Stop is completed")
}
})
}
func TestMultiThreadedStartAndStop(t *testing.T) {
cron := New()
go cron.Run()
time.Sleep(2 * time.Millisecond)
cron.Stop()
}
func wait(wg *sync.WaitGroup) chan bool {
ch := make(chan bool)
go func() {
wg.Wait()
ch <- true
}()
return ch
}
func stop(cron *Cron) chan bool {
ch := make(chan bool)
go func() {
cron.Stop()
ch <- true
}()
return ch
}
// newWithSeconds returns a Cron with the seconds field enabled.
func newWithSeconds() *Cron {
return New(WithParser(secondParser), WithChain())
}

View File

@@ -1,212 +0,0 @@
/*
Package cron implements a cron spec parser and job runner.
Usage
Callers may register Funcs to be invoked on a given schedule. Cron will run
them in their own goroutines.
c := cron.New()
c.AddFunc("30 * * * *", func() { fmt.Println("Every hour on the half hour") })
c.AddFunc("30 3-6,20-23 * * *", func() { fmt.Println(".. in the range 3-6am, 8-11pm") })
c.AddFunc("CRON_TZ=Asia/Tokyo 30 04 * * * *", func() { fmt.Println("Runs at 04:30 Tokyo time every day") })
c.AddFunc("@hourly", func() { fmt.Println("Every hour, starting an hour from now") })
c.AddFunc("@every 1h30m", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") })
c.Start()
..
// Funcs are invoked in their own goroutine, asynchronously.
...
// Funcs may also be added to a running Cron
c.AddFunc("@daily", func() { fmt.Println("Every day") })
..
// Inspect the cron job entries' next and previous run times.
inspect(c.Entries())
..
c.Stop() // Stop the scheduler (does not stop any jobs already running).
CRON Expression Format
A cron expression represents a set of times, using 5 space-separated fields.
Field name | Mandatory? | Allowed values | Allowed special characters
---------- | ---------- | -------------- | --------------------------
Minutes | Yes | 0-59 | * / , -
Hours | Yes | 0-23 | * / , -
Day of month | Yes | 1-31 | * / , - ?
Month | Yes | 1-12 or JAN-DEC | * / , -
Day of week | Yes | 0-6 or SUN-SAT | * / , - ?
Month and Day-of-week field values are case insensitive. "SUN", "Sun", and
"sun" are equally accepted.
The specific interpretation of the format is based on the Cron Wikipedia page:
https://en.wikipedia.org/wiki/Cron
Alternative Formats
Alternative Cron expression formats support other fields like seconds. You can
implement that by creating a custom Parser as follows.
cron.New(
cron.WithParser(
cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor))
The most popular alternative Cron expression format is Quartz:
http://www.quartz-scheduler.org/documentation/quartz-2.x/tutorials/crontrigger.html
Special Characters
Asterisk ( * )
The asterisk indicates that the cron expression will match for all values of the
field; e.g., using an asterisk in the 5th field (month) would indicate every
month.
Slash ( / )
Slashes are used to describe increments of ranges. For example 3-59/15 in the
1st field (minutes) would indicate the 3rd minute of the hour and every 15
minutes thereafter. The form "*\/..." is equivalent to the form "first-last/...",
that is, an increment over the largest possible range of the field. The form
"N/..." is accepted as meaning "N-MAX/...", that is, starting at N, use the
increment until the end of that specific range. It does not wrap around.
Comma ( , )
Commas are used to separate items of a list. For example, using "MON,WED,FRI" in
the 5th field (day of week) would mean Mondays, Wednesdays and Fridays.
Hyphen ( - )
Hyphens are used to define ranges. For example, 9-17 would indicate every
hour between 9am and 5pm inclusive.
Question mark ( ? )
Question mark may be used instead of '*' for leaving either day-of-month or
day-of-week blank.
Predefined schedules
You may use one of several pre-defined schedules in place of a cron expression.
Entry | Description | Equivalent To
----- | ----------- | -------------
@yearly (or @annually) | Run once a year, midnight, Jan. 1st | 0 0 1 1 *
@monthly | Run once a month, midnight, first of month | 0 0 1 * *
@weekly | Run once a week, midnight between Sat/Sun | 0 0 * * 0
@daily (or @midnight) | Run once a day, midnight | 0 0 * * *
@hourly | Run once an hour, beginning of hour | 0 * * * *
Intervals
You may also schedule a job to execute at fixed intervals, starting at the time it's added
or cron is run. This is supported by formatting the cron spec like this:
@every <duration>
where "duration" is a string accepted by time.ParseDuration
(http://golang.org/pkg/time/#ParseDuration).
For example, "@every 1h30m10s" would indicate a schedule that activates after
1 hour, 30 minutes, 10 seconds, and then every interval after that.
Note: The interval does not take the job runtime into account. For example,
if a job takes 3 minutes to run, and it is scheduled to run every 5 minutes,
it will have only 2 minutes of idle time between each run.
Time zones
By default, all interpretation and scheduling is done in the machine's local
time zone (time.Local). You can specify a different time zone on construction:
cron.New(
cron.WithLocation(time.UTC))
Individual cron schedules may also override the time zone they are to be
interpreted in by providing an additional space-separated field at the beginning
of the cron spec, of the form "CRON_TZ=Asia/Tokyo".
For example:
# Runs at 6am in time.Local
cron.New().AddFunc("0 6 * * ?", ...)
# Runs at 6am in America/New_York
nyc, _ := time.LoadLocation("America/New_York")
c := cron.New(cron.WithLocation(nyc))
c.AddFunc("0 6 * * ?", ...)
# Runs at 6am in Asia/Tokyo
cron.New().AddFunc("CRON_TZ=Asia/Tokyo 0 6 * * ?", ...)
# Runs at 6am in Asia/Tokyo
c := cron.New(cron.WithLocation(nyc))
c.SetLocation("America/New_York")
c.AddFunc("CRON_TZ=Asia/Tokyo 0 6 * * ?", ...)
The prefix "TZ=(TIME ZONE)" is also supported for legacy compatibility.
Be aware that jobs scheduled during daylight-savings leap-ahead transitions will
not be run!
Job Wrappers / Chain
A Cron runner may be configured with a chain of job wrappers to add
cross-cutting functionality to all submitted jobs. For example, they may be used
to achieve the following effects:
- Recover any panics from jobs (activated by default)
- Delay a job's execution if the previous run hasn't completed yet
- Skip a job's execution if the previous run hasn't completed yet
- Log each job's invocations
Install wrappers for all jobs added to a cron using the `cron.WithChain` option:
cron.New(cron.WithChain(
cron.SkipIfStillRunning(logger),
))
Install wrappers for individual jobs by explicitly wrapping them:
job = cron.NewChain(
cron.SkipIfStillRunning(logger),
).Then(job)
Thread safety
Since the Cron service runs concurrently with the calling code, some amount of
care must be taken to ensure proper synchronization.
All cron methods are designed to be correctly synchronized as long as the caller
ensures that invocations have a clear happens-before ordering between them.
Logging
Cron defines a Logger interface that is a subset of the one defined in
github.com/go-logr/logr. It has two logging levels (Info and Error), and
parameters are key/value pairs. This makes it possible for cron logging to plug
into structured logging systems. An adapter, [Verbose]PrintfLogger, is provided
to wrap the standard library *log.Logger.
For additional insight into Cron operations, verbose logging may be activated
which will record job runs, scheduling decisions, and added or removed jobs.
Activate it with a one-off logger as follows:
cron.New(
cron.WithLogger(
cron.VerbosePrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags))))
Implementation
Cron entries are stored in an array, sorted by their next activation time. Cron
sleeps until the next job is due to be run.
Upon waking:
- it runs each entry that is active on that second
- it calculates the next run times for the jobs that were run
- it re-sorts the array of entries by next activation time.
- it goes to sleep until the soonest job.
*/
package cron

View File

@@ -1,86 +0,0 @@
package cron
import (
"io/ioutil"
"log"
"os"
"strings"
"time"
)
// DefaultLogger is used by Cron if none is specified.
var DefaultLogger = PrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags))
// DiscardLogger can be used by callers to discard all log messages.
var DiscardLogger = PrintfLogger(log.New(ioutil.Discard, "", 0))
// Logger is the interface used in this package for logging, so that any backend
// can be plugged in. It is a subset of the github.com/go-logr/logr interface.
type Logger interface {
// Info logs routine messages about cron's operation.
Info(msg string, keysAndValues ...interface{})
// Error logs an error condition.
Error(err error, msg string, keysAndValues ...interface{})
}
// PrintfLogger wraps a Printf-based logger (such as the standard library "log")
// into an implementation of the Logger interface which logs errors only.
func PrintfLogger(l interface{ Printf(string, ...interface{}) }) Logger {
return printfLogger{l, false}
}
// VerbosePrintfLogger wraps a Printf-based logger (such as the standard library
// "log") into an implementation of the Logger interface which logs everything.
func VerbosePrintfLogger(l interface{ Printf(string, ...interface{}) }) Logger {
return printfLogger{l, true}
}
type printfLogger struct {
logger interface{ Printf(string, ...interface{}) }
logInfo bool
}
func (pl printfLogger) Info(msg string, keysAndValues ...interface{}) {
if pl.logInfo {
keysAndValues = formatTimes(keysAndValues)
pl.logger.Printf(
formatString(len(keysAndValues)),
append([]interface{}{msg}, keysAndValues...)...)
}
}
func (pl printfLogger) Error(err error, msg string, keysAndValues ...interface{}) {
keysAndValues = formatTimes(keysAndValues)
pl.logger.Printf(
formatString(len(keysAndValues)+2),
append([]interface{}{msg, "error", err}, keysAndValues...)...)
}
// formatString returns a logfmt-like format string for the number of
// key/values.
func formatString(numKeysAndValues int) string {
var sb strings.Builder
sb.WriteString("%s")
if numKeysAndValues > 0 {
sb.WriteString(", ")
}
for i := 0; i < numKeysAndValues/2; i++ {
if i > 0 {
sb.WriteString(", ")
}
sb.WriteString("%v=%v")
}
return sb.String()
}
// formatTimes formats any time.Time values as RFC3339.
func formatTimes(keysAndValues []interface{}) []interface{} {
var formattedArgs []interface{}
for _, arg := range keysAndValues {
if t, ok := arg.(time.Time); ok {
arg = t.Format(time.RFC3339)
}
formattedArgs = append(formattedArgs, arg)
}
return formattedArgs
}

View File

@@ -1,45 +0,0 @@
package cron
import (
"time"
)
// Option represents a modification to the default behavior of a Cron.
type Option func(*Cron)
// WithLocation overrides the timezone of the cron instance.
func WithLocation(loc *time.Location) Option {
return func(c *Cron) {
c.location = loc
}
}
// WithSeconds overrides the parser used for interpreting job schedules to
// include a seconds field as the first one.
func WithSeconds() Option {
return WithParser(NewParser(
Second | Minute | Hour | Dom | Month | Dow | Descriptor,
))
}
// WithParser overrides the parser used for interpreting job schedules.
func WithParser(p Parser) Option {
return func(c *Cron) {
c.parser = p
}
}
// WithChain specifies Job wrappers to apply to all jobs added to this cron.
// Refer to the Chain* functions in this package for provided wrappers.
func WithChain(wrappers ...JobWrapper) Option {
return func(c *Cron) {
c.chain = NewChain(wrappers...)
}
}
// WithLogger uses the provided logger.
func WithLogger(logger Logger) Option {
return func(c *Cron) {
c.logger = logger
}
}

View File

@@ -1,42 +0,0 @@
package cron
import (
"log"
"strings"
"testing"
"time"
)
func TestWithLocation(t *testing.T) {
c := New(WithLocation(time.UTC))
if c.location != time.UTC {
t.Errorf("expected UTC, got %v", c.location)
}
}
func TestWithParser(t *testing.T) {
var parser = NewParser(Dow)
c := New(WithParser(parser))
if c.parser != parser {
t.Error("expected provided parser")
}
}
func TestWithVerboseLogger(t *testing.T) {
var buf syncWriter
var logger = log.New(&buf, "", log.LstdFlags)
c := New(WithLogger(VerbosePrintfLogger(logger)))
if c.logger.(printfLogger).logger != logger {
t.Error("expected provided logger")
}
_, _ = c.AddFunc("@every 1s", func() {})
c.Start()
time.Sleep(OneSecond)
c.Stop()
out := buf.String()
if !strings.Contains(out, "schedule,") ||
!strings.Contains(out, "run,") {
t.Error("expected to see some actions, got:", out)
}
}

View File

@@ -1,434 +0,0 @@
package cron
import (
"fmt"
"math"
"strconv"
"strings"
"time"
)
// Configuration options for creating a parser. Most options specify which
// fields should be included, while others enable features. If a field is not
// included the parser will assume a default value. These options do not change
// the order fields are parse in.
type ParseOption int
const (
Second ParseOption = 1 << iota // Seconds field, default 0
SecondOptional // Optional seconds field, default 0
Minute // Minutes field, default 0
Hour // Hours field, default 0
Dom // Day of month field, default *
Month // Month field, default *
Dow // Day of week field, default *
DowOptional // Optional day of week field, default *
Descriptor // Allow descriptors such as @monthly, @weekly, etc.
)
var places = []ParseOption{
Second,
Minute,
Hour,
Dom,
Month,
Dow,
}
var defaults = []string{
"0",
"0",
"0",
"*",
"*",
"*",
}
// A custom Parser that can be configured.
type Parser struct {
options ParseOption
}
// NewParser creates a Parser with custom options.
//
// It panics if more than one Optional is given, since it would be impossible to
// correctly infer which optional is provided or missing in general.
//
// Examples
//
// // Standard parser without descriptors
// specParser := NewParser(Minute | Hour | Dom | Month | Dow)
// sched, err := specParser.Parse("0 0 15 */3 *")
//
// // Same as above, just excludes time fields
// subsParser := NewParser(Dom | Month | Dow)
// sched, err := specParser.Parse("15 */3 *")
//
// // Same as above, just makes Dow optional
// subsParser := NewParser(Dom | Month | DowOptional)
// sched, err := specParser.Parse("15 */3")
//
func NewParser(options ParseOption) Parser {
optionals := 0
if options&DowOptional > 0 {
optionals++
}
if options&SecondOptional > 0 {
optionals++
}
if optionals > 1 {
panic("multiple optionals may not be configured")
}
return Parser{options}
}
// Parse returns a new crontab schedule representing the given spec.
// It returns a descriptive error if the spec is not valid.
// It accepts crontab specs and features configured by NewParser.
func (p Parser) Parse(spec string) (Schedule, error) {
if len(spec) == 0 {
return nil, fmt.Errorf("empty spec string")
}
// Extract timezone if present
var loc = time.Local
if strings.HasPrefix(spec, "TZ=") || strings.HasPrefix(spec, "CRON_TZ=") {
var err error
i := strings.Index(spec, " ")
eq := strings.Index(spec, "=")
if loc, err = time.LoadLocation(spec[eq+1 : i]); err != nil {
return nil, fmt.Errorf("provided bad location %s: %v", spec[eq+1:i], err)
}
spec = strings.TrimSpace(spec[i:])
}
// Handle named schedules (descriptors), if configured
if strings.HasPrefix(spec, "@") {
if p.options&Descriptor == 0 {
return nil, fmt.Errorf("parser does not accept descriptors: %v", spec)
}
return parseDescriptor(spec, loc)
}
// Split on whitespace.
fields := strings.Fields(spec)
// Validate & fill in any omitted or optional fields
var err error
fields, err = normalizeFields(fields, p.options)
if err != nil {
return nil, err
}
field := func(field string, r bounds) uint64 {
if err != nil {
return 0
}
var bits uint64
bits, err = getField(field, r)
return bits
}
var (
second = field(fields[0], seconds)
minute = field(fields[1], minutes)
hour = field(fields[2], hours)
dayofmonth = field(fields[3], dom)
month = field(fields[4], months)
dayofweek = field(fields[5], dow)
)
if err != nil {
return nil, err
}
return &SpecSchedule{
Second: second,
Minute: minute,
Hour: hour,
Dom: dayofmonth,
Month: month,
Dow: dayofweek,
Location: loc,
}, nil
}
// normalizeFields takes a subset set of the time fields and returns the full set
// with defaults (zeroes) populated for unset fields.
//
// As part of performing this function, it also validates that the provided
// fields are compatible with the configured options.
func normalizeFields(fields []string, options ParseOption) ([]string, error) {
// Validate optionals & add their field to options
optionals := 0
if options&SecondOptional > 0 {
options |= Second
optionals++
}
if options&DowOptional > 0 {
options |= Dow
optionals++
}
if optionals > 1 {
return nil, fmt.Errorf("multiple optionals may not be configured")
}
// Figure out how many fields we need
max := 0
for _, place := range places {
if options&place > 0 {
max++
}
}
min := max - optionals
// Validate number of fields
if count := len(fields); count < min || count > max {
if min == max {
return nil, fmt.Errorf("expected exactly %d fields, found %d: %s", min, count, fields)
}
return nil, fmt.Errorf("expected %d to %d fields, found %d: %s", min, max, count, fields)
}
// Populate the optional field if not provided
if min < max && len(fields) == min {
switch {
case options&DowOptional > 0:
fields = append(fields, defaults[5]) // TODO: improve access to default
case options&SecondOptional > 0:
fields = append([]string{defaults[0]}, fields...)
default:
return nil, fmt.Errorf("unknown optional field")
}
}
// Populate all fields not part of options with their defaults
n := 0
expandedFields := make([]string, len(places))
copy(expandedFields, defaults)
for i, place := range places {
if options&place > 0 {
expandedFields[i] = fields[n]
n++
}
}
return expandedFields, nil
}
var standardParser = NewParser(
Minute | Hour | Dom | Month | Dow | Descriptor,
)
// ParseStandard returns a new crontab schedule representing the given
// standardSpec (https://en.wikipedia.org/wiki/Cron). It requires 5 entries
// representing: minute, hour, day of month, month and day of week, in that
// order. It returns a descriptive error if the spec is not valid.
//
// It accepts
// - Standard crontab specs, e.g. "* * * * ?"
// - Descriptors, e.g. "@midnight", "@every 1h30m"
func ParseStandard(standardSpec string) (Schedule, error) {
return standardParser.Parse(standardSpec)
}
// getField returns an Int with the bits set representing all of the times that
// the field represents or error parsing field value. A "field" is a comma-separated
// list of "ranges".
func getField(field string, r bounds) (uint64, error) {
var bits uint64
ranges := strings.FieldsFunc(field, func(r rune) bool { return r == ',' })
for _, expr := range ranges {
bit, err := getRange(expr, r)
if err != nil {
return bits, err
}
bits |= bit
}
return bits, nil
}
// getRange returns the bits indicated by the given expression:
// number | number "-" number [ "/" number ]
// or error parsing range.
func getRange(expr string, r bounds) (uint64, error) {
var (
start, end, step uint
rangeAndStep = strings.Split(expr, "/")
lowAndHigh = strings.Split(rangeAndStep[0], "-")
singleDigit = len(lowAndHigh) == 1
err error
)
var extra uint64
if lowAndHigh[0] == "*" || lowAndHigh[0] == "?" {
start = r.min
end = r.max
extra = starBit
} else {
start, err = parseIntOrName(lowAndHigh[0], r.names)
if err != nil {
return 0, err
}
switch len(lowAndHigh) {
case 1:
end = start
case 2:
end, err = parseIntOrName(lowAndHigh[1], r.names)
if err != nil {
return 0, err
}
default:
return 0, fmt.Errorf("too many hyphens: %s", expr)
}
}
switch len(rangeAndStep) {
case 1:
step = 1
case 2:
step, err = mustParseInt(rangeAndStep[1])
if err != nil {
return 0, err
}
// Special handling: "N/step" means "N-max/step".
if singleDigit {
end = r.max
}
if step > 1 {
extra = 0
}
default:
return 0, fmt.Errorf("too many slashes: %s", expr)
}
if start < r.min {
return 0, fmt.Errorf("beginning of range (%d) below minimum (%d): %s", start, r.min, expr)
}
if end > r.max {
return 0, fmt.Errorf("end of range (%d) above maximum (%d): %s", end, r.max, expr)
}
if start > end {
return 0, fmt.Errorf("beginning of range (%d) beyond end of range (%d): %s", start, end, expr)
}
if step == 0 {
return 0, fmt.Errorf("step of range should be a positive number: %s", expr)
}
return getBits(start, end, step) | extra, nil
}
// parseIntOrName returns the (possibly-named) integer contained in expr.
func parseIntOrName(expr string, names map[string]uint) (uint, error) {
if names != nil {
if namedInt, ok := names[strings.ToLower(expr)]; ok {
return namedInt, nil
}
}
return mustParseInt(expr)
}
// mustParseInt parses the given expression as an int or returns an error.
func mustParseInt(expr string) (uint, error) {
num, err := strconv.Atoi(expr)
if err != nil {
return 0, fmt.Errorf("failed to parse int from %s: %s", expr, err)
}
if num < 0 {
return 0, fmt.Errorf("negative number (%d) not allowed: %s", num, expr)
}
return uint(num), nil
}
// getBits sets all bits in the range [min, max], modulo the given step size.
func getBits(min, max, step uint) uint64 {
var bits uint64
// If step is 1, use shifts.
if step == 1 {
return ^(math.MaxUint64 << (max + 1)) & (math.MaxUint64 << min)
}
// Else, use a simple loop.
for i := min; i <= max; i += step {
bits |= 1 << i
}
return bits
}
// all returns all bits within the given bounds. (plus the star bit)
func all(r bounds) uint64 {
return getBits(r.min, r.max, 1) | starBit
}
// parseDescriptor returns a predefined schedule for the expression, or error if none matches.
func parseDescriptor(descriptor string, loc *time.Location) (Schedule, error) {
switch descriptor {
case "@yearly", "@annually":
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: 1 << hours.min,
Dom: 1 << dom.min,
Month: 1 << months.min,
Dow: all(dow),
Location: loc,
}, nil
case "@monthly":
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: 1 << hours.min,
Dom: 1 << dom.min,
Month: all(months),
Dow: all(dow),
Location: loc,
}, nil
case "@weekly":
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: 1 << hours.min,
Dom: all(dom),
Month: all(months),
Dow: 1 << dow.min,
Location: loc,
}, nil
case "@daily", "@midnight":
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: 1 << hours.min,
Dom: all(dom),
Month: all(months),
Dow: all(dow),
Location: loc,
}, nil
case "@hourly":
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: all(hours),
Dom: all(dom),
Month: all(months),
Dow: all(dow),
Location: loc,
}, nil
}
const every = "@every "
if strings.HasPrefix(descriptor, every) {
duration, err := time.ParseDuration(descriptor[len(every):])
if err != nil {
return nil, fmt.Errorf("failed to parse duration %s: %s", descriptor, err)
}
return Every(duration), nil
}
return nil, fmt.Errorf("unrecognized descriptor: %s", descriptor)
}

View File

@@ -1,384 +0,0 @@
package cron
import (
"reflect"
"strings"
"testing"
"time"
)
var secondParser = NewParser(Second | Minute | Hour | Dom | Month | DowOptional | Descriptor)
func TestRange(t *testing.T) {
zero := uint64(0)
ranges := []struct {
expr string
min, max uint
expected uint64
err string
}{
{"5", 0, 7, 1 << 5, ""},
{"0", 0, 7, 1 << 0, ""},
{"7", 0, 7, 1 << 7, ""},
{"5-5", 0, 7, 1 << 5, ""},
{"5-6", 0, 7, 1<<5 | 1<<6, ""},
{"5-7", 0, 7, 1<<5 | 1<<6 | 1<<7, ""},
{"5-6/2", 0, 7, 1 << 5, ""},
{"5-7/2", 0, 7, 1<<5 | 1<<7, ""},
{"5-7/1", 0, 7, 1<<5 | 1<<6 | 1<<7, ""},
{"*", 1, 3, 1<<1 | 1<<2 | 1<<3 | starBit, ""},
{"*/2", 1, 3, 1<<1 | 1<<3, ""},
{"5--5", 0, 0, zero, "too many hyphens"},
{"jan-x", 0, 0, zero, "failed to parse int from"},
{"2-x", 1, 5, zero, "failed to parse int from"},
{"*/-12", 0, 0, zero, "negative number"},
{"*//2", 0, 0, zero, "too many slashes"},
{"1", 3, 5, zero, "below minimum"},
{"6", 3, 5, zero, "above maximum"},
{"5-3", 3, 5, zero, "beyond end of range"},
{"*/0", 0, 0, zero, "should be a positive number"},
}
for _, c := range ranges {
actual, err := getRange(c.expr, bounds{c.min, c.max, nil})
if len(c.err) != 0 && (err == nil || !strings.Contains(err.Error(), c.err)) {
t.Errorf("%s => expected %v, got %v", c.expr, c.err, err)
}
if len(c.err) == 0 && err != nil {
t.Errorf("%s => unexpected error %v", c.expr, err)
}
if actual != c.expected {
t.Errorf("%s => expected %d, got %d", c.expr, c.expected, actual)
}
}
}
func TestField(t *testing.T) {
fields := []struct {
expr string
min, max uint
expected uint64
}{
{"5", 1, 7, 1 << 5},
{"5,6", 1, 7, 1<<5 | 1<<6},
{"5,6,7", 1, 7, 1<<5 | 1<<6 | 1<<7},
{"1,5-7/2,3", 1, 7, 1<<1 | 1<<5 | 1<<7 | 1<<3},
}
for _, c := range fields {
actual, _ := getField(c.expr, bounds{c.min, c.max, nil})
if actual != c.expected {
t.Errorf("%s => expected %d, got %d", c.expr, c.expected, actual)
}
}
}
func TestAll(t *testing.T) {
allBits := []struct {
r bounds
expected uint64
}{
{minutes, 0xfffffffffffffff}, // 0-59: 60 ones
{hours, 0xffffff}, // 0-23: 24 ones
{dom, 0xfffffffe}, // 1-31: 31 ones, 1 zero
{months, 0x1ffe}, // 1-12: 12 ones, 1 zero
{dow, 0x7f}, // 0-6: 7 ones
}
for _, c := range allBits {
actual := all(c.r) // all() adds the starBit, so compensate for that..
if c.expected|starBit != actual {
t.Errorf("%d-%d/%d => expected %b, got %b",
c.r.min, c.r.max, 1, c.expected|starBit, actual)
}
}
}
func TestBits(t *testing.T) {
bits := []struct {
min, max, step uint
expected uint64
}{
{0, 0, 1, 0x1},
{1, 1, 1, 0x2},
{1, 5, 2, 0x2a}, // 101010
{1, 4, 2, 0xa}, // 1010
}
for _, c := range bits {
actual := getBits(c.min, c.max, c.step)
if c.expected != actual {
t.Errorf("%d-%d/%d => expected %b, got %b",
c.min, c.max, c.step, c.expected, actual)
}
}
}
func TestParseScheduleErrors(t *testing.T) {
var tests = []struct{ expr, err string }{
{"* 5 j * * *", "failed to parse int from"},
{"@every Xm", "failed to parse duration"},
{"@unrecognized", "unrecognized descriptor"},
{"* * * *", "expected 5 to 6 fields"},
{"", "empty spec string"},
}
for _, c := range tests {
actual, err := secondParser.Parse(c.expr)
if err == nil || !strings.Contains(err.Error(), c.err) {
t.Errorf("%s => expected %v, got %v", c.expr, c.err, err)
}
if actual != nil {
t.Errorf("expected nil schedule on error, got %v", actual)
}
}
}
func TestParseSchedule(t *testing.T) {
tokyo, _ := time.LoadLocation("Asia/Tokyo")
entries := []struct {
parser Parser
expr string
expected Schedule
}{
{secondParser, "0 5 * * * *", every5min(time.Local)},
{standardParser, "5 * * * *", every5min(time.Local)},
{secondParser, "CRON_TZ=UTC 0 5 * * * *", every5min(time.UTC)},
{standardParser, "CRON_TZ=UTC 5 * * * *", every5min(time.UTC)},
{secondParser, "CRON_TZ=Asia/Tokyo 0 5 * * * *", every5min(tokyo)},
{secondParser, "@every 5m", ConstantDelaySchedule{5 * time.Minute}},
{secondParser, "@midnight", midnight(time.Local)},
{secondParser, "TZ=UTC @midnight", midnight(time.UTC)},
{secondParser, "TZ=Asia/Tokyo @midnight", midnight(tokyo)},
{secondParser, "@yearly", annual(time.Local)},
{secondParser, "@annually", annual(time.Local)},
{
parser: secondParser,
expr: "* 5 * * * *",
expected: &SpecSchedule{
Second: all(seconds),
Minute: 1 << 5,
Hour: all(hours),
Dom: all(dom),
Month: all(months),
Dow: all(dow),
Location: time.Local,
},
},
}
for _, c := range entries {
actual, err := c.parser.Parse(c.expr)
if err != nil {
t.Errorf("%s => unexpected error %v", c.expr, err)
}
if !reflect.DeepEqual(actual, c.expected) {
t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual)
}
}
}
func TestOptionalSecondSchedule(t *testing.T) {
parser := NewParser(SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor)
entries := []struct {
expr string
expected Schedule
}{
{"0 5 * * * *", every5min(time.Local)},
{"5 5 * * * *", every5min5s(time.Local)},
{"5 * * * *", every5min(time.Local)},
}
for _, c := range entries {
actual, err := parser.Parse(c.expr)
if err != nil {
t.Errorf("%s => unexpected error %v", c.expr, err)
}
if !reflect.DeepEqual(actual, c.expected) {
t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual)
}
}
}
func TestNormalizeFields(t *testing.T) {
tests := []struct {
name string
input []string
options ParseOption
expected []string
}{
{
"AllFields_NoOptional",
[]string{"0", "5", "*", "*", "*", "*"},
Second | Minute | Hour | Dom | Month | Dow | Descriptor,
[]string{"0", "5", "*", "*", "*", "*"},
},
{
"AllFields_SecondOptional_Provided",
[]string{"0", "5", "*", "*", "*", "*"},
SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor,
[]string{"0", "5", "*", "*", "*", "*"},
},
{
"AllFields_SecondOptional_NotProvided",
[]string{"5", "*", "*", "*", "*"},
SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor,
[]string{"0", "5", "*", "*", "*", "*"},
},
{
"SubsetFields_NoOptional",
[]string{"5", "15", "*"},
Hour | Dom | Month,
[]string{"0", "0", "5", "15", "*", "*"},
},
{
"SubsetFields_DowOptional_Provided",
[]string{"5", "15", "*", "4"},
Hour | Dom | Month | DowOptional,
[]string{"0", "0", "5", "15", "*", "4"},
},
{
"SubsetFields_DowOptional_NotProvided",
[]string{"5", "15", "*"},
Hour | Dom | Month | DowOptional,
[]string{"0", "0", "5", "15", "*", "*"},
},
{
"SubsetFields_SecondOptional_NotProvided",
[]string{"5", "15", "*"},
SecondOptional | Hour | Dom | Month,
[]string{"0", "0", "5", "15", "*", "*"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual, err := normalizeFields(test.input, test.options)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(actual, test.expected) {
t.Errorf("expected %v, got %v", test.expected, actual)
}
})
}
}
func TestNormalizeFields_Errors(t *testing.T) {
tests := []struct {
name string
input []string
options ParseOption
err string
}{
{
"TwoOptionals",
[]string{"0", "5", "*", "*", "*", "*"},
SecondOptional | Minute | Hour | Dom | Month | DowOptional,
"",
},
{
"TooManyFields",
[]string{"0", "5", "*", "*"},
SecondOptional | Minute | Hour,
"",
},
{
"NoFields",
[]string{},
SecondOptional | Minute | Hour,
"",
},
{
"TooFewFields",
[]string{"*"},
SecondOptional | Minute | Hour,
"",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual, err := normalizeFields(test.input, test.options)
if err == nil {
t.Errorf("expected an error, got none. results: %v", actual)
return
}
if !strings.Contains(err.Error(), test.err) {
t.Errorf("expected error %q, got %q", test.err, err.Error())
}
})
}
}
func TestStandardSpecSchedule(t *testing.T) {
entries := []struct {
expr string
expected Schedule
err string
}{
{
expr: "5 * * * *",
expected: &SpecSchedule{1 << seconds.min, 1 << 5, all(hours), all(dom), all(months), all(dow), time.Local},
},
{
expr: "@every 5m",
expected: ConstantDelaySchedule{time.Duration(5) * time.Minute},
},
{
expr: "5 j * * *",
err: "failed to parse int from",
},
{
expr: "* * * *",
err: "expected exactly 5 fields",
},
}
for _, c := range entries {
actual, err := ParseStandard(c.expr)
if len(c.err) != 0 && (err == nil || !strings.Contains(err.Error(), c.err)) {
t.Errorf("%s => expected %v, got %v", c.expr, c.err, err)
}
if len(c.err) == 0 && err != nil {
t.Errorf("%s => unexpected error %v", c.expr, err)
}
if !reflect.DeepEqual(actual, c.expected) {
t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual)
}
}
}
func TestNoDescriptorParser(t *testing.T) {
parser := NewParser(Minute | Hour)
_, err := parser.Parse("@every 1m")
if err == nil {
t.Error("expected an error, got none")
}
}
func every5min(loc *time.Location) *SpecSchedule {
return &SpecSchedule{1 << 0, 1 << 5, all(hours), all(dom), all(months), all(dow), loc}
}
func every5min5s(loc *time.Location) *SpecSchedule {
return &SpecSchedule{1 << 5, 1 << 5, all(hours), all(dom), all(months), all(dow), loc}
}
func midnight(loc *time.Location) *SpecSchedule {
return &SpecSchedule{1, 1, 1, all(dom), all(months), all(dow), loc}
}
func annual(loc *time.Location) *SpecSchedule {
return &SpecSchedule{
Second: 1 << seconds.min,
Minute: 1 << minutes.min,
Hour: 1 << hours.min,
Dom: 1 << dom.min,
Month: 1 << months.min,
Dow: all(dow),
Location: loc,
}
}

View File

@@ -1,188 +0,0 @@
package cron
import "time"
// SpecSchedule specifies a duty cycle (to the second granularity), based on a
// traditional crontab specification. It is computed initially and stored as bit sets.
type SpecSchedule struct {
Second, Minute, Hour, Dom, Month, Dow uint64
// Override location for this schedule.
Location *time.Location
}
// bounds provides a range of acceptable values (plus a map of name to value).
type bounds struct {
min, max uint
names map[string]uint
}
// The bounds for each field.
var (
seconds = bounds{0, 59, nil}
minutes = bounds{0, 59, nil}
hours = bounds{0, 23, nil}
dom = bounds{1, 31, nil}
months = bounds{1, 12, map[string]uint{
"jan": 1,
"feb": 2,
"mar": 3,
"apr": 4,
"may": 5,
"jun": 6,
"jul": 7,
"aug": 8,
"sep": 9,
"oct": 10,
"nov": 11,
"dec": 12,
}}
dow = bounds{0, 6, map[string]uint{
"sun": 0,
"mon": 1,
"tue": 2,
"wed": 3,
"thu": 4,
"fri": 5,
"sat": 6,
}}
)
const (
// Set the top bit if a star was included in the expression.
starBit = 1 << 63
)
// Next returns the next time this schedule is activated, greater than the given
// time. If no time can be found to satisfy the schedule, return the zero time.
func (s *SpecSchedule) Next(t time.Time) time.Time {
// General approach
//
// For Month, Day, Hour, Minute, Second:
// Check if the time value matches. If yes, continue to the next field.
// If the field doesn't match the schedule, then increment the field until it matches.
// While incrementing the field, a wrap-around brings it back to the beginning
// of the field list (since it is necessary to re-verify previous field
// values)
// Convert the given time into the schedule's timezone, if one is specified.
// Save the original timezone so we can convert back after we find a time.
// Note that schedules without a time zone specified (time.Local) are treated
// as local to the time provided.
origLocation := t.Location()
loc := s.Location
if loc == time.Local {
loc = t.Location()
}
if s.Location != time.Local {
t = t.In(s.Location)
}
// Start at the earliest possible time (the upcoming second).
t = t.Add(1*time.Second - time.Duration(t.Nanosecond())*time.Nanosecond)
// This flag indicates whether a field has been incremented.
added := false
// If no time is found within five years, return zero.
yearLimit := t.Year() + 5
WRAP:
if t.Year() > yearLimit {
return time.Time{}
}
// Find the first applicable month.
// If it's this month, then do nothing.
for 1<<uint(t.Month())&s.Month == 0 {
// If we have to add a month, reset the other parts to 0.
if !added {
added = true
// Otherwise, set the date at the beginning (since the current time is irrelevant).
t = time.Date(t.Year(), t.Month(), 1, 0, 0, 0, 0, loc)
}
t = t.AddDate(0, 1, 0)
// Wrapped around.
if t.Month() == time.January {
goto WRAP
}
}
// Now get a day in that month.
//
// NOTE: This causes issues for daylight savings regimes where midnight does
// not exist. For example: Sao Paulo has DST that transforms midnight on
// 11/3 into 1am. Handle that by noticing when the Hour ends up != 0.
for !dayMatches(s, t) {
if !added {
added = true
t = time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, loc)
}
t = t.AddDate(0, 0, 1)
// Notice if the hour is no longer midnight due to DST.
// Add an hour if it's 23, subtract an hour if it's 1.
if t.Hour() != 0 {
if t.Hour() > 12 {
t = t.Add(time.Duration(24-t.Hour()) * time.Hour)
} else {
t = t.Add(time.Duration(-t.Hour()) * time.Hour)
}
}
if t.Day() == 1 {
goto WRAP
}
}
for 1<<uint(t.Hour())&s.Hour == 0 {
if !added {
added = true
t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), 0, 0, 0, loc)
}
t = t.Add(1 * time.Hour)
if t.Hour() == 0 {
goto WRAP
}
}
for 1<<uint(t.Minute())&s.Minute == 0 {
if !added {
added = true
t = t.Truncate(time.Minute)
}
t = t.Add(1 * time.Minute)
if t.Minute() == 0 {
goto WRAP
}
}
for 1<<uint(t.Second())&s.Second == 0 {
if !added {
added = true
t = t.Truncate(time.Second)
}
t = t.Add(1 * time.Second)
if t.Second() == 0 {
goto WRAP
}
}
return t.In(origLocation)
}
// dayMatches returns true if the schedule's day-of-week and day-of-month
// restrictions are satisfied by the given time.
func dayMatches(s *SpecSchedule, t time.Time) bool {
var (
domMatch = 1<<uint(t.Day())&s.Dom > 0
dowMatch = 1<<uint(t.Weekday())&s.Dow > 0
)
if s.Dom&starBit > 0 || s.Dow&starBit > 0 {
return domMatch && dowMatch
}
return domMatch || dowMatch
}

View File

@@ -1,300 +0,0 @@
package cron
import (
"strings"
"testing"
"time"
)
func TestActivation(t *testing.T) {
tests := []struct {
time, spec string
expected bool
}{
// Every fifteen minutes.
{"Mon Jul 9 15:00 2012", "0/15 * * * *", true},
{"Mon Jul 9 15:45 2012", "0/15 * * * *", true},
{"Mon Jul 9 15:40 2012", "0/15 * * * *", false},
// Every fifteen minutes, starting at 5 minutes.
{"Mon Jul 9 15:05 2012", "5/15 * * * *", true},
{"Mon Jul 9 15:20 2012", "5/15 * * * *", true},
{"Mon Jul 9 15:50 2012", "5/15 * * * *", true},
// Named months
{"Sun Jul 15 15:00 2012", "0/15 * * Jul *", true},
{"Sun Jul 15 15:00 2012", "0/15 * * Jun *", false},
// Everything set.
{"Sun Jul 15 08:30 2012", "30 08 ? Jul Sun", true},
{"Sun Jul 15 08:30 2012", "30 08 15 Jul ?", true},
{"Mon Jul 16 08:30 2012", "30 08 ? Jul Sun", false},
{"Mon Jul 16 08:30 2012", "30 08 15 Jul ?", false},
// Predefined schedules
{"Mon Jul 9 15:00 2012", "@hourly", true},
{"Mon Jul 9 15:04 2012", "@hourly", false},
{"Mon Jul 9 15:00 2012", "@daily", false},
{"Mon Jul 9 00:00 2012", "@daily", true},
{"Mon Jul 9 00:00 2012", "@weekly", false},
{"Sun Jul 8 00:00 2012", "@weekly", true},
{"Sun Jul 8 01:00 2012", "@weekly", false},
{"Sun Jul 8 00:00 2012", "@monthly", false},
{"Sun Jul 1 00:00 2012", "@monthly", true},
// Test interaction of DOW and DOM.
// If both are restricted, then only one needs to match.
{"Sun Jul 15 00:00 2012", "* * 1,15 * Sun", true},
{"Fri Jun 15 00:00 2012", "* * 1,15 * Sun", true},
{"Wed Aug 1 00:00 2012", "* * 1,15 * Sun", true},
{"Sun Jul 15 00:00 2012", "* * */10 * Sun", true}, // verifies #70
// However, if one has a star, then both need to match.
{"Sun Jul 15 00:00 2012", "* * * * Mon", false},
{"Mon Jul 9 00:00 2012", "* * 1,15 * *", false},
{"Sun Jul 15 00:00 2012", "* * 1,15 * *", true},
{"Sun Jul 15 00:00 2012", "* * */2 * Sun", true},
}
for _, test := range tests {
sched, err := ParseStandard(test.spec)
if err != nil {
t.Error(err)
continue
}
actual := sched.Next(getTime(test.time).Add(-1 * time.Second))
expected := getTime(test.time)
if test.expected && expected != actual || !test.expected && expected == actual {
t.Errorf("Fail evaluating %s on %s: (expected) %s != %s (actual)",
test.spec, test.time, expected, actual)
}
}
}
func TestNext(t *testing.T) {
runs := []struct {
time, spec string
expected string
}{
// Simple cases
{"Mon Jul 9 14:45 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"},
{"Mon Jul 9 14:59 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"},
{"Mon Jul 9 14:59:59 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"},
// Wrap around hours
{"Mon Jul 9 15:45 2012", "0 20-35/15 * * * *", "Mon Jul 9 16:20 2012"},
// Wrap around days
{"Mon Jul 9 23:46 2012", "0 */15 * * * *", "Tue Jul 10 00:00 2012"},
{"Mon Jul 9 23:45 2012", "0 20-35/15 * * * *", "Tue Jul 10 00:20 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * * * *", "Tue Jul 10 00:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 1/2 * * *", "Tue Jul 10 01:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 10-12 * * *", "Tue Jul 10 10:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 1/2 */2 * *", "Thu Jul 11 01:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * 9-20 * *", "Wed Jul 10 00:20:15 2012"},
{"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * 9-20 Jul *", "Wed Jul 10 00:20:15 2012"},
// Wrap around months
{"Mon Jul 9 23:35 2012", "0 0 0 9 Apr-Oct ?", "Thu Aug 9 00:00 2012"},
{"Mon Jul 9 23:35 2012", "0 0 0 */5 Apr,Aug,Oct Mon", "Tue Aug 1 00:00 2012"},
{"Mon Jul 9 23:35 2012", "0 0 0 */5 Oct Mon", "Mon Oct 1 00:00 2012"},
// Wrap around years
{"Mon Jul 9 23:35 2012", "0 0 0 * Feb Mon", "Mon Feb 4 00:00 2013"},
{"Mon Jul 9 23:35 2012", "0 0 0 * Feb Mon/2", "Fri Feb 1 00:00 2013"},
// Wrap around minute, hour, day, month, and year
{"Mon Dec 31 23:59:45 2012", "0 * * * * *", "Tue Jan 1 00:00:00 2013"},
// Leap year
{"Mon Jul 9 23:35 2012", "0 0 0 29 Feb ?", "Mon Feb 29 00:00 2016"},
// Daylight savings time 2am EST (-5) -> 3am EDT (-4)
{"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 30 2 11 Mar ?", "2013-03-11T02:30:00-0400"},
// hourly job
{"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T01:00:00-0500"},
{"2012-03-11T01:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T03:00:00-0400"},
{"2012-03-11T03:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T04:00:00-0400"},
{"2012-03-11T04:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T05:00:00-0400"},
// hourly job using CRON_TZ
{"2012-03-11T00:00:00-0500", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T01:00:00-0500"},
{"2012-03-11T01:00:00-0500", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T03:00:00-0400"},
{"2012-03-11T03:00:00-0400", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T04:00:00-0400"},
{"2012-03-11T04:00:00-0400", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T05:00:00-0400"},
// 1am nightly job
{"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-03-11T01:00:00-0500"},
{"2012-03-11T01:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-03-12T01:00:00-0400"},
// 2am nightly job (skipped)
{"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 2 * * ?", "2012-03-12T02:00:00-0400"},
// Daylight savings time 2am EDT (-4) => 1am EST (-5)
{"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 30 2 04 Nov ?", "2012-11-04T02:30:00-0500"},
{"2012-11-04T01:45:00-0400", "TZ=America/New_York 0 30 1 04 Nov ?", "2012-11-04T01:30:00-0500"},
// hourly job
{"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T01:00:00-0400"},
{"2012-11-04T01:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T01:00:00-0500"},
{"2012-11-04T01:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T02:00:00-0500"},
// 1am nightly job (runs twice)
{"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 1 * * ?", "2012-11-04T01:00:00-0400"},
{"2012-11-04T01:00:00-0400", "TZ=America/New_York 0 0 1 * * ?", "2012-11-04T01:00:00-0500"},
{"2012-11-04T01:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-11-05T01:00:00-0500"},
// 2am nightly job
{"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 2 * * ?", "2012-11-04T02:00:00-0500"},
{"2012-11-04T02:00:00-0500", "TZ=America/New_York 0 0 2 * * ?", "2012-11-05T02:00:00-0500"},
// 3am nightly job
{"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 3 * * ?", "2012-11-04T03:00:00-0500"},
{"2012-11-04T03:00:00-0500", "TZ=America/New_York 0 0 3 * * ?", "2012-11-05T03:00:00-0500"},
// hourly job
{"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 * * * ?", "2012-11-04T01:00:00-0400"},
{"TZ=America/New_York 2012-11-04T01:00:00-0400", "0 0 * * * ?", "2012-11-04T01:00:00-0500"},
{"TZ=America/New_York 2012-11-04T01:00:00-0500", "0 0 * * * ?", "2012-11-04T02:00:00-0500"},
// 1am nightly job (runs twice)
{"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 1 * * ?", "2012-11-04T01:00:00-0400"},
{"TZ=America/New_York 2012-11-04T01:00:00-0400", "0 0 1 * * ?", "2012-11-04T01:00:00-0500"},
{"TZ=America/New_York 2012-11-04T01:00:00-0500", "0 0 1 * * ?", "2012-11-05T01:00:00-0500"},
// 2am nightly job
{"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 2 * * ?", "2012-11-04T02:00:00-0500"},
{"TZ=America/New_York 2012-11-04T02:00:00-0500", "0 0 2 * * ?", "2012-11-05T02:00:00-0500"},
// 3am nightly job
{"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 3 * * ?", "2012-11-04T03:00:00-0500"},
{"TZ=America/New_York 2012-11-04T03:00:00-0500", "0 0 3 * * ?", "2012-11-05T03:00:00-0500"},
// Unsatisfiable
{"Mon Jul 9 23:35 2012", "0 0 0 30 Feb ?", ""},
{"Mon Jul 9 23:35 2012", "0 0 0 31 Apr ?", ""},
// Monthly job
{"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 3 3 * ?", "2012-12-03T03:00:00-0500"},
// Test the scenario of DST resulting in midnight not being a valid time.
// https://github.com/robfig/cron/issues/157
{"2018-10-17T05:00:00-0400", "TZ=America/Sao_Paulo 0 0 9 10 * ?", "2018-11-10T06:00:00-0500"},
{"2018-02-14T05:00:00-0500", "TZ=America/Sao_Paulo 0 0 9 22 * ?", "2018-02-22T07:00:00-0500"},
}
for _, c := range runs {
sched, err := secondParser.Parse(c.spec)
if err != nil {
t.Error(err)
continue
}
actual := sched.Next(getTime(c.time))
expected := getTime(c.expected)
if !actual.Equal(expected) {
t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.spec, expected, actual)
}
}
}
func TestErrors(t *testing.T) {
invalidSpecs := []string{
"xyz",
"60 0 * * *",
"0 60 * * *",
"0 0 * * XYZ",
}
for _, spec := range invalidSpecs {
_, err := ParseStandard(spec)
if err == nil {
t.Error("expected an error parsing: ", spec)
}
}
}
func getTime(value string) time.Time {
if value == "" {
return time.Time{}
}
var location = time.Local
if strings.HasPrefix(value, "TZ=") {
parts := strings.Fields(value)
loc, err := time.LoadLocation(parts[0][len("TZ="):])
if err != nil {
panic("could not parse location:" + err.Error())
}
location = loc
value = parts[1]
}
var layouts = []string{
"Mon Jan 2 15:04 2006",
"Mon Jan 2 15:04:05 2006",
}
for _, layout := range layouts {
if t, err := time.ParseInLocation(layout, value, location); err == nil {
return t
}
}
if t, err := time.ParseInLocation("2006-01-02T15:04:05-0700", value, location); err == nil {
return t
}
panic("could not parse time value " + value)
}
func TestNextWithTz(t *testing.T) {
runs := []struct {
time, spec string
expected string
}{
// Failing tests
{"2016-01-03T13:09:03+0530", "14 14 * * *", "2016-01-03T14:14:00+0530"},
{"2016-01-03T04:09:03+0530", "14 14 * * ?", "2016-01-03T14:14:00+0530"},
// Passing tests
{"2016-01-03T14:09:03+0530", "14 14 * * *", "2016-01-03T14:14:00+0530"},
{"2016-01-03T14:00:00+0530", "14 14 * * ?", "2016-01-03T14:14:00+0530"},
}
for _, c := range runs {
sched, err := ParseStandard(c.spec)
if err != nil {
t.Error(err)
continue
}
actual := sched.Next(getTimeTZ(c.time))
expected := getTimeTZ(c.expected)
if !actual.Equal(expected) {
t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.spec, expected, actual)
}
}
}
func getTimeTZ(value string) time.Time {
if value == "" {
return time.Time{}
}
t, err := time.Parse("Mon Jan 2 15:04 2006", value)
if err != nil {
t, err = time.Parse("Mon Jan 2 15:04:05 2006", value)
if err != nil {
t, err = time.Parse("2006-01-02T15:04:05-0700", value)
if err != nil {
panic(err)
}
}
}
return t
}
// https://github.com/robfig/cron/issues/144
func TestSlash0NoHang(t *testing.T) {
schedule := "TZ=America/New_York 15/0 * * * *"
_, err := ParseStandard(schedule)
if err == nil {
t.Error("expected an error on 0 increment")
}
}

View File

@@ -1,10 +0,0 @@
package validate
import (
"github.com/globalsign/mgo/bson"
"github.com/go-playground/validator/v10"
)
func MongoID(sl validator.FieldLevel) bool {
return bson.IsObjectIdHex(sl.Field().String())
}

View File

@@ -2,24 +2,20 @@ package main
import (
"context"
"crawlab/config"
"crawlab/database"
_ "crawlab/docs"
validate2 "crawlab/lib/validate"
"crawlab/middlewares"
"crawlab/model"
"crawlab/routes"
"crawlab/services"
"crawlab/services/challenge"
"crawlab/services/rpc"
"github.com/apex/log"
"github.com/crawlab-team/crawlab-core/config"
validate2 "github.com/crawlab-team/crawlab-core/lib/validate"
"github.com/crawlab-team/crawlab-core/middlewares"
"github.com/crawlab-team/crawlab-core/model"
"github.com/crawlab-team/crawlab-core/routes"
"github.com/crawlab-team/crawlab-core/services"
"github.com/crawlab-team/crawlab-core/services/rpc"
"github.com/crawlab-team/crawlab-db"
"github.com/gin-gonic/gin"
"github.com/gin-gonic/gin/binding"
"github.com/go-playground/validator/v10"
"github.com/olivere/elastic/v7"
"github.com/spf13/viper"
"github.com/swaggo/gin-swagger"
"github.com/swaggo/gin-swagger/swaggerFiles"
"net"
"net/http"
"os"
@@ -29,11 +25,6 @@ import (
"time"
)
var swagHandler gin.HandlerFunc
func init() {
swagHandler = ginSwagger.WrapHandler(swaggerFiles.Handler)
}
func main() {
app := gin.New()
app.Use(gin.Logger(), gin.Recovery())
@@ -41,10 +32,6 @@ func main() {
_ = v.RegisterValidation("bid", validate2.MongoID)
}
if swagHandler != nil {
app.GET("/swagger/*any", swagHandler)
}
// 初始化配置
if err := config.InitConfig(""); err != nil {
log.Error("init config error:" + err.Error())
@@ -52,7 +39,7 @@ func main() {
}
log.Info("initialized config successfully")
// 初始化Mongodb数据库
if err := database.InitMongo(); err != nil {
if err := db.InitMongo(); err != nil {
log.Error("init mongodb error:" + err.Error())
debug.PrintStack()
panic(err)
@@ -60,7 +47,7 @@ func main() {
log.Info("initialized mongodb successfully")
// 初始化Redis数据库
if err := database.InitRedis(); err != nil {
if err := db.InitRedis(); err != nil {
log.Error("init redis error:" + err.Error())
debug.PrintStack()
panic(err)
@@ -107,14 +94,6 @@ func main() {
}
log.Info("initialized dependency fetcher successfully")
// 初始化挑战服务
if err := challenge.InitChallengeService(); err != nil {
log.Error("init challenge service error:" + err.Error())
debug.PrintStack()
panic(err)
}
log.Info("initialized challenge service successfully")
// 初始化清理服务
if err := services.InitCleanService(); err != nil {
log.Error("init clean service error:" + err.Error())
@@ -238,13 +217,13 @@ func main() {
}
// 任务
{
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
authGroup.PUT("/tasks", routes.PutTask) // 派发任务
authGroup.PUT("/tasks/batch", routes.PutBatchTasks) // 批量派发任务
authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务
authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
authGroup.PUT("/tasks", routes.PutTask) // 派发任务
authGroup.PUT("/tasks/batch", routes.PutBatchTasks) // 批量派发任务
authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务
authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务
//authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务
authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务
authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志
authGroup.GET("/tasks/:id/error-log", routes.GetTaskErrorLog) // 任务错误日志
@@ -303,11 +282,6 @@ func main() {
authGroup.POST("/projects/:id", routes.PostProject) // 新增
authGroup.DELETE("/projects/:id", routes.DeleteProject) // 删除
}
// 挑战
{
authGroup.GET("/challenges", routes.GetChallengeList) // 挑战列表
authGroup.POST("/challenges-check", routes.CheckChallengeList) // 检查挑战列表
}
// 操作
{
//authGroup.GET("/actions", routes.GetActionList) // 操作列表
@@ -330,12 +304,6 @@ func main() {
authGroup.GET("/git/public-key", routes.GetGitSshPublicKey) // 获取 SSH 公钥
authGroup.GET("/git/commits", routes.GetGitCommits) // 获取 Git Commits
authGroup.POST("/git/checkout", routes.PostGitCheckout) // 获取 Git Commits
// 爬虫市场 / 仓库
{
authGroup.GET("/repos", routes.GetRepoList) // 获取仓库列表
authGroup.GET("/repos/sub-dir", routes.GetRepoSubDirList) // 获取仓库子目录
authGroup.POST("/repos/download", routes.DownloadRepo) // 下载仓库
}
}
}

View File

@@ -1,48 +0,0 @@
package middlewares
import (
"crawlab/constants"
"crawlab/routes"
"crawlab/services"
"github.com/gin-gonic/gin"
"net/http"
"strings"
)
func AuthorizationMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
// 获取token string
tokenStr := c.GetHeader("Authorization")
// 校验token
user, err := services.CheckToken(tokenStr)
// 校验失败,返回错误响应
if err != nil {
c.AbortWithStatusJSON(http.StatusUnauthorized, routes.Response{
Status: "ok",
Message: "unauthorized",
Error: "unauthorized",
})
return
}
// 如果为普通权限,校验请求地址是否符合要求
if user.Role == constants.RoleNormal {
if strings.HasPrefix(strings.ToLower(c.Request.URL.Path), "/users") {
c.AbortWithStatusJSON(http.StatusUnauthorized, routes.Response{
Status: "ok",
Message: "unauthorized",
Error: "unauthorized",
})
return
}
}
// 设置用户
c.Set(constants.ContextUser, &user)
// 校验成功
c.Next()
}
}

View File

@@ -1,19 +0,0 @@
package middlewares
import "github.com/gin-gonic/gin"
func CORSMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With")
c.Writer.Header().Set("Access-Control-Allow-Methods", "DELETE, POST, OPTIONS, GET, PUT")
if c.Request.Method == "OPTIONS" {
c.AbortWithStatus(204)
return
}
c.Next()
}
}

View File

@@ -1,54 +0,0 @@
package middlewares
import (
"bytes"
"context"
"fmt"
"github.com/gin-gonic/gin"
"github.com/olivere/elastic/v7"
"github.com/satori/go.uuid"
"github.com/spf13/viper"
"strconv"
"time"
)
func EsLog(ctx context.Context, esClient *elastic.Client) gin.HandlerFunc {
return func(c *gin.Context) {
// 开始时间
crawlabIndex := viper.GetString("setting.crawlabLogIndex")
start := time.Now()
// 处理请求
c.Next()
// 结束时间
end := time.Now()
//执行时间
latency := strconv.FormatInt(end.Sub(start).Nanoseconds()/1000, 10)
path := c.Request.URL.Path
clientIP := c.ClientIP()
method := c.Request.Method
statusCode := strconv.Itoa(c.Writer.Status())
buf := new(bytes.Buffer)
buf.ReadFrom(c.Request.Body)
b := buf.String()
accessLog := "costTime:" + latency + "ms--" + "StatusCode:" + statusCode + "--" + "Method:" + method + "--" + "ClientIp:" + clientIP + "--" +
"RequestURI:" + path + "--" + "Host:" + c.Request.Host + "--" + "UserAgent--" + c.Request.UserAgent() + "--RequestBody:" +
string(b)
WriteMsg(ctx, crawlabIndex, esClient, time.Now(), accessLog)
}
}
// WriteMsg will write the msg and level into es
func WriteMsg(ctx context.Context, crawlabIndex string, es *elastic.Client, when time.Time, msg string) error {
vals := make(map[string]interface{})
vals["@timestamp"] = when.Format(time.RFC3339)
vals["@msg"] = msg
uid := uuid.NewV4().String()
_, err := es.Index().Index(crawlabIndex).Id(uid).BodyJson(vals).Refresh("wait_for").Do(ctx)
if err != nil {
fmt.Println(err)
}
return err
}

View File

@@ -1,16 +0,0 @@
package mock
type Response struct {
Status string `json:"status"`
Message string `json:"message"`
Data interface{} `json:"data"`
Error string `json:"error"`
}
type ListResponse struct {
Status string `json:"status"`
Message string `json:"message"`
Total int `json:"total"`
Data interface{} `json:"data"`
Error string `json:"error"`
}

View File

@@ -1,8 +0,0 @@
package mock
type File struct {
Name string `json:"name"`
Path string `json:"path"`
IsDir bool `json:"is_dir"`
Size int64 `json:"size"`
}

View File

@@ -1,222 +0,0 @@
package mock
import (
"crawlab/entity"
"crawlab/model"
"crawlab/services"
"github.com/apex/log"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"net/http"
"time"
)
var NodeList = []model.Node{
{
Id: bson.ObjectId("5d429e6c19f7abede924fee2"),
Ip: "10.32.35.15",
Name: "test1",
Status: "online",
Port: "8081",
Mac: "ac:12:df:12:fd",
Description: "For test1",
IsMaster: true,
UpdateTs: time.Now(),
CreateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
},
{
Id: bson.ObjectId("5d429e6c19f7abede924fe22"),
Ip: "10.32.35.12",
Name: "test2",
Status: "online",
Port: "8082",
Mac: "ac:12:df:12:vh",
Description: "For test2",
IsMaster: true,
UpdateTs: time.Now(),
CreateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
},
}
var TaskList = []model.Task{
{
Id: "1234",
SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"),
StartTs: time.Now(),
FinishTs: time.Now(),
Status: "进行中",
NodeId: bson.ObjectId("5d429e6c19f7abede924fee2"),
LogPath: "./log",
Cmd: "scrapy crawl test",
Error: "",
ResultCount: 0,
WaitDuration: 10.0,
RuntimeDuration: 10,
TotalDuration: 20,
SpiderName: "test",
NodeName: "test",
CreateTs: time.Now(),
UpdateTs: time.Now(),
},
{
Id: "5678",
SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"),
StartTs: time.Now(),
FinishTs: time.Now(),
Status: "进行中",
NodeId: bson.ObjectId("5d429e6c19f7abede924fee2"),
LogPath: "./log",
Cmd: "scrapy crawl test2",
Error: "",
ResultCount: 0,
WaitDuration: 10.0,
RuntimeDuration: 10,
TotalDuration: 20,
SpiderName: "test",
NodeName: "test",
CreateTs: time.Now(),
UpdateTs: time.Now(),
},
}
var dataList = []services.Data{
{
Mac: "ac:12:fc:fd:ds:dd",
Ip: "192.10.2.1",
Master: true,
UpdateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
},
{
Mac: "22:12:fc:fd:ds:dd",
Ip: "182.10.2.2",
Master: true,
UpdateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
},
}
var executeble = []entity.Executable{
{
Path: "/test",
FileName: "test.py",
DisplayName: "test.py",
},
}
var systemInfo = entity.SystemInfo{ARCH: "x86",
OS: "linux",
Hostname: "test",
NumCpu: 4,
Executables: executeble,
}
func GetNodeList(c *gin.Context) {
nodes := NodeList
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: nodes,
})
}
func GetNode(c *gin.Context) {
var result model.Node
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
for _, node := range NodeList {
if node.Id == bson.ObjectId(id) {
result = node
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
}
func Ping(c *gin.Context) {
data := dataList[0]
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: data,
})
}
func PostNode(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
var oldItem model.Node
for _, node := range NodeList {
if node.Id == bson.ObjectId(id) {
oldItem = node
}
}
log.Info(id)
var newItem model.Node
if err := c.ShouldBindJSON(&newItem); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
newItem.Id = oldItem.Id
log.Info("Post Node success")
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetNodeTaskList(c *gin.Context) {
tasks := TaskList
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: tasks,
})
}
func DeleteNode(c *gin.Context) {
id := bson.ObjectId("5d429e6c19f7abede924fee2")
for _, node := range NodeList {
if node.Id == id {
log.Infof("Delete a node")
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetSystemInfo(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
sysInfo := systemInfo
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: sysInfo,
})
}

View File

@@ -1,198 +0,0 @@
package mock
import (
"bytes"
"crawlab/model"
"encoding/json"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
. "github.com/smartystreets/goconvey/convey"
"net/http"
"net/http/httptest"
"testing"
"time"
)
var app *gin.Engine
// 本测试依赖MongoDB的服务所以在测试之前需要启动MongoDB及相关服务
func init() {
app = gin.Default()
// mock Test
// 节点相关的API
app.GET("/ping", Ping)
app.GET("/nodes", GetNodeList) // 节点列表
app.GET("/nodes/:id", GetNode) // 节点详情
app.POST("/nodes/:id", PostNode) // 修改节点
app.GET("/nodes/:id/tasks", GetNodeTaskList) // 节点任务列表
app.GET("/nodes/:id/system", GetSystemInfo) // 节点任务列表
app.DELETE("/nodes/:id", DeleteNode) // 删除节点
//// 爬虫
app.GET("/stats/home", GetHomeStats) // 首页统计数据
// 定时任务
app.GET("/schedules", GetScheduleList) // 定时任务列表
app.GET("/schedules/:id", GetSchedule) // 定时任务详情
app.PUT("/schedules", PutSchedule) // 创建定时任务
app.POST("/schedules/:id", PostSchedule) // 修改定时任务
app.DELETE("/schedules/:id", DeleteSchedule) // 删除定时任务
app.GET("/tasks", GetTaskList) // 任务列表
app.GET("/tasks/:id", GetTask) // 任务详情
app.PUT("/tasks", PutTask) // 派发任务
app.DELETE("/tasks/:id", DeleteTask) // 删除任务
app.GET("/tasks/:id/results", GetTaskResults) // 任务结果
app.GET("/tasks/:id/results/download", DownloadTaskResultsCsv) // 下载任务结果
app.GET("/spiders", GetSpiderList) // 爬虫列表
app.GET("/spiders/:id", GetSpider) // 爬虫详情
app.POST("/spiders/:id", PostSpider) // 修改爬虫
app.DELETE("/spiders/:id", DeleteSpider) // 删除爬虫
app.GET("/spiders/:id/tasks", GetSpiderTasks) // 爬虫任务列表
app.GET("/spiders/:id/dir", GetSpiderDir) // 爬虫目录
}
//mock test, test data in ./mock
func TestGetNodeList(t *testing.T) {
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/nodes", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetNodeList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetNode(t *testing.T) {
var resp Response
var mongoId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/nodes/"+mongoId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
t.Log(resp.Data)
Convey("Test API GetNode", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
So(resp.Data.(map[string]interface{})["_id"], ShouldEqual, bson.ObjectId(mongoId).Hex())
})
})
}
func TestPing(t *testing.T) {
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/ping", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API ping", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetNodeTaskList(t *testing.T) {
var resp Response
var mongoId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/tasks", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetNodeTaskList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestDeleteNode(t *testing.T) {
var resp Response
var mongoId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("DELETE", "nodes/"+mongoId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API DeleteNode", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestPostNode(t *testing.T) {
var newItem = model.Node{
Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
Ip: "10.32.35.15",
Name: "test1",
Status: "online",
Port: "8081",
Mac: "ac:12:df:12:fd",
Description: "For test1",
IsMaster: true,
UpdateTs: time.Now(),
CreateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
}
var resp Response
body, _ := json.Marshal(newItem)
var mongoId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "nodes/"+mongoId, bytes.NewReader(body))
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
t.Log(resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API PostNode", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetSystemInfo(t *testing.T) {
var resp Response
var mongoId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/system", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetSystemInfo", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}

View File

@@ -1,136 +0,0 @@
package mock
import (
"crawlab/constants"
"crawlab/model"
"fmt"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"net/http"
"time"
)
var NodeIdss = []bson.ObjectId{bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
bson.ObjectIdHex("5d429e6c19f7abede924fee1")}
var scheduleList = []model.Schedule{
{
Id: bson.ObjectId("5d429e6c19f7abede924fee2"),
Name: "test schedule",
SpiderId: "123",
NodeIds: NodeIdss,
Cron: "***1*",
EntryId: 10,
// 前端展示
SpiderName: "test scedule",
CreateTs: time.Now(),
UpdateTs: time.Now(),
},
{
Id: bson.ObjectId("xx429e6c19f7abede924fee2"),
Name: "test schedule2",
SpiderId: "234",
NodeIds: NodeIdss,
Cron: "***1*",
EntryId: 10,
// 前端展示
SpiderName: "test scedule2",
CreateTs: time.Now(),
UpdateTs: time.Now(),
},
}
func GetScheduleList(c *gin.Context) {
results := scheduleList
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: results,
})
}
func GetSchedule(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
var result model.Schedule
for _, sch := range scheduleList {
if sch.Id == bson.ObjectId(id) {
result = sch
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
}
func PostSchedule(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
var oldItem model.Schedule
for _, sch := range scheduleList {
if sch.Id == bson.ObjectId(id) {
oldItem = sch
}
}
var newItem model.Schedule
if err := c.ShouldBindJSON(&newItem); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
newItem.Id = oldItem.Id
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PutSchedule(c *gin.Context) {
var item model.Schedule
// 绑定数据模型
if err := c.ShouldBindJSON(&item); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 如果node_id为空则置为空ObjectId
for _, NodeId := range item.NodeIds {
if NodeId == "" {
NodeId = bson.ObjectIdHex(constants.ObjectIdNull)
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func DeleteSchedule(c *gin.Context) {
id := bson.ObjectIdHex("5d429e6c19f7abede924fee2")
for _, sch := range scheduleList {
if sch.Id == id {
fmt.Println("delete a schedule")
}
}
fmt.Println(id)
fmt.Println("update schedule")
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}

View File

@@ -1,141 +0,0 @@
package mock
import (
"bytes"
"crawlab/model"
"crawlab/utils"
"encoding/json"
"github.com/globalsign/mgo/bson"
. "github.com/smartystreets/goconvey/convey"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func TestGetScheduleList(t *testing.T) {
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/schedules", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
t.Log(resp.Data)
Convey("Test API GetScheduleList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetSchedule(t *testing.T) {
var mongoId = "5d429e6c19f7abede924fee2"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/schedules/"+mongoId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetSchedule", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
So(resp.Data.(map[string]interface{})["_id"], ShouldEqual, bson.ObjectId(mongoId).Hex())
})
})
}
func TestDeleteSchedule(t *testing.T) {
var mongoId = "5d429e6c19f7abede924fee2"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("DELETE", "/schedules/"+mongoId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test DeleteSchedule", t, func() {
Convey("Test resp status", func() {
So(resp.Status, ShouldEqual, "ok")
})
})
}
func TestPostSchedule(t *testing.T) {
var newItem = model.Schedule{
Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
Name: "test schedule",
SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
NodeIds: NodeIdss,
Cron: "***1*",
EntryId: 10,
// 前端展示
SpiderName: "test scedule",
CreateTs: time.Now(),
UpdateTs: time.Now(),
}
var resp Response
var mongoId = "5d429e6c19f7abede924fee2"
body, _ := json.Marshal(newItem)
w := httptest.NewRecorder()
req, _ := http.NewRequest("POST", "/schedules/"+mongoId, strings.NewReader(utils.BytesToString(body)))
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
t.Log(resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API PostSchedule", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestPutSchedule(t *testing.T) {
var newItem = model.Schedule{
Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
Name: "test schedule",
SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
NodeIds: NodeIdss,
Cron: "***1*",
EntryId: 10,
// 前端展示
SpiderName: "test scedule",
CreateTs: time.Now(),
UpdateTs: time.Now(),
}
var resp Response
body, _ := json.Marshal(newItem)
w := httptest.NewRecorder()
req, _ := http.NewRequest("PUT", "/schedules", bytes.NewReader(body))
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
t.Log(resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API PutSchedule", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}

View File

@@ -1,187 +0,0 @@
package mock
import (
"crawlab/constants"
"crawlab/model"
"github.com/apex/log"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"time"
)
var SpiderList = []model.Spider{
{
Id: bson.ObjectId("5d429e6c19f7abede924fee2"),
Name: "For test",
DisplayName: "test",
Type: "test",
Col: "test",
Site: "www.baidu.com",
Envs: nil,
Src: "../app/spiders",
Cmd: "scrapy crawl test",
LastRunTs: time.Now(),
CreateTs: time.Now(),
UpdateTs: time.Now(),
UserId: constants.ObjectIdNull,
},
}
func GetSpiderList(c *gin.Context) {
// mock get spider list from database
results := SpiderList
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: results,
})
}
func GetSpider(c *gin.Context) {
id := c.Param("id")
var result model.Spider
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
for _, spider := range SpiderList {
if spider.Id == bson.ObjectId(id) {
result = spider
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
}
func PostSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
var item model.Spider
if err := c.ShouldBindJSON(&item); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
log.Info("modify the item")
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetSpiderDir(c *gin.Context) {
// 爬虫ID
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
// 目录相对路径
path := c.Query("path")
var spi model.Spider
// 获取爬虫
for _, spider := range SpiderList {
if spider.Id == bson.ObjectId(id) {
spi = spider
}
}
// 获取目录下文件列表
f, err := ioutil.ReadDir(filepath.Join(spi.Src, path))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 遍历文件列表
var fileList []model.File
for _, file := range f {
fileList = append(fileList, model.File{
Name: file.Name(),
IsDir: file.IsDir(),
Size: file.Size(),
Path: filepath.Join(path, file.Name()),
})
}
// 返回结果
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: fileList,
})
}
func GetSpiderTasks(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
var spider model.Spider
for _, spi := range SpiderList {
if spi.Id == bson.ObjectId(id) {
spider = spi
}
}
var tasks model.Task
for _, task := range TaskList {
if task.SpiderId == spider.Id {
tasks = task
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: tasks,
})
}
func DeleteSpider(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
// 获取该爬虫,get this spider
var spider model.Spider
for _, spi := range SpiderList {
if spi.Id == bson.ObjectId(id) {
spider = spi
}
}
// 删除爬虫文件目录,delete the spider dir
if err := os.RemoveAll(spider.Src); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 从数据库中删除该爬虫,delete this spider from database
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}

View File

@@ -1,139 +0,0 @@
package mock
import (
"bytes"
"crawlab/constants"
"crawlab/model"
"encoding/json"
"github.com/globalsign/mgo/bson"
. "github.com/smartystreets/goconvey/convey"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestGetSpiderList(t *testing.T) {
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/spiders", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp faild")
}
Convey("Test API GetSpiderList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetSpider(t *testing.T) {
var resp Response
var spiderId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/spiders/"+spiderId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API GetSpider", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestPostSpider(t *testing.T) {
var spider = model.Spider{
Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
Name: "For test",
DisplayName: "test",
Type: "test",
Col: "test",
Site: "www.baidu.com",
Envs: nil,
Src: "/app/spider",
Cmd: "scrapy crawl test",
LastRunTs: time.Now(),
CreateTs: time.Now(),
UpdateTs: time.Now(),
UserId: constants.ObjectIdNull,
}
var resp Response
var spiderId = "5d429e6c19f7abede924fee2"
w := httptest.NewRecorder()
body, _ := json.Marshal(spider)
req, _ := http.NewRequest("POST", "/spiders/"+spiderId, bytes.NewReader(body))
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API PostSpider", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetSpiderDir(t *testing.T) {
var spiderId = "5d429e6c19f7abede924fee2"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/dir", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API GetSpiderDir", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetSpiderTasks(t *testing.T) {
var spiderId = "5d429e6c19f7abede924fee2"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/tasks", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API GetSpiderTasks", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestDeleteSpider(t *testing.T) {
var spiderId = "5d429e6c19f7abede924fee2"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("DELETE", "/spiders/"+spiderId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API DeleteSpider", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}

View File

@@ -1,62 +0,0 @@
package mock
import (
"crawlab/model"
"github.com/gin-gonic/gin"
"net/http"
)
var taskDailyItems = []model.TaskDailyItem{
{
Date: "2019/08/19",
TaskCount: 2,
AvgRuntimeDuration: 1000,
},
{
Date: "2019/08/20",
TaskCount: 3,
AvgRuntimeDuration: 10130,
},
}
func GetHomeStats(c *gin.Context) {
type DataOverview struct {
TaskCount int `json:"task_count"`
SpiderCount int `json:"spider_count"`
ActiveNodeCount int `json:"active_node_count"`
ScheduleCount int `json:"schedule_count"`
}
type Data struct {
Overview DataOverview `json:"overview"`
Daily []model.TaskDailyItem `json:"daily"`
}
// 任务总数
taskCount := 10
// 在线节点总数
activeNodeCount := 4
// 爬虫总数
spiderCount := 5
// 定时任务数
scheduleCount := 2
// 每日任务数
items := taskDailyItems
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: Data{
Overview: DataOverview{
ActiveNodeCount: activeNodeCount,
TaskCount: taskCount,
SpiderCount: spiderCount,
ScheduleCount: scheduleCount,
},
Daily: items,
},
})
}

View File

@@ -1,29 +0,0 @@
package mock
import (
"encoding/json"
"fmt"
. "github.com/smartystreets/goconvey/convey"
"net/http"
"net/http/httptest"
"testing"
)
func TestGetHomeStats(t *testing.T) {
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/stats/home", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
fmt.Println(resp.Data)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetHomeStats", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}

View File

@@ -1 +0,0 @@
package mock

View File

@@ -1,236 +0,0 @@
package mock
import (
"bytes"
"crawlab/constants"
"crawlab/model"
"crawlab/utils"
"encoding/csv"
"fmt"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"github.com/satori/go.uuid"
"net/http"
)
type TaskListRequestData struct {
PageNum int `form:"page_num"`
PageSize int `form:"page_size"`
NodeId string `form:"node_id"`
SpiderId string `form:"spider_id"`
}
type TaskResultsRequestData struct {
PageNum int `form:"page_num"`
PageSize int `form:"page_size"`
}
func GetTaskList(c *gin.Context) {
// 绑定数据
data := TaskListRequestData{}
if err := c.ShouldBindQuery(&data); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
if data.PageNum == 0 {
data.PageNum = 1
}
if data.PageSize == 0 {
data.PageNum = 10
}
// 过滤条件
query := bson.M{}
if data.NodeId != "" {
query["node_id"] = bson.ObjectIdHex(data.NodeId)
}
if data.SpiderId != "" {
query["spider_id"] = bson.ObjectIdHex(data.SpiderId)
}
// 获取任务列表
tasks := TaskList
// 获取总任务数
total := len(TaskList)
c.JSON(http.StatusOK, ListResponse{
Status: "ok",
Message: "success",
Total: total,
Data: tasks,
})
}
func GetTask(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
var result model.Task
for _, task := range TaskList {
if task.Id == id {
result = task
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
})
}
func PutTask(c *gin.Context) {
// 生成任务ID,generate task ID
id := uuid.NewV4()
// 绑定数据
var t model.Task
if err := c.ShouldBindJSON(&t); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
t.Id = id.String()
t.Status = constants.StatusPending
// 如果没有传入node_id则置为null
if t.NodeId.Hex() == "" {
t.NodeId = bson.ObjectIdHex(constants.ObjectIdNull)
}
// 将任务存入数据库,put the task into database
fmt.Println("put the task into database")
// 加入任务队列, put the task into task queue
fmt.Println("put the task into task queue")
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func DeleteTask(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
for _, task := range TaskList {
if task.Id == id {
fmt.Println("delete the task")
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetTaskResults(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
// 绑定数据
data := TaskResultsRequestData{}
if err := c.ShouldBindQuery(&data); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 获取任务
var task model.Task
for _, ta := range TaskList {
if ta.Id == id {
task = ta
}
}
fmt.Println(task)
// 获取结果
var results interface{}
total := len(TaskList)
c.JSON(http.StatusOK, ListResponse{
Status: "ok",
Message: "success",
Data: results,
Total: total,
})
}
func DownloadTaskResultsCsv(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
return
}
// 获取任务
var task model.Task
for _, ta := range TaskList {
if ta.Id == id {
task = ta
}
}
fmt.Println(task)
// 获取结果
var results []interface {
}
// 字段列表
var columns []string
if len(results) == 0 {
columns = []string{}
} else {
item := results[0].(bson.M)
for key := range item {
columns = append(columns, key)
}
}
// 缓冲
bytesBuffer := &bytes.Buffer{}
// 写入UTF-8 BOM避免使用Microsoft Excel打开乱码
bytesBuffer.WriteString("\xEF\xBB\xBF")
writer := csv.NewWriter(bytesBuffer)
// 写入表头
if err := writer.Write(columns); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 写入内容
for _, result := range results {
// 将result转换为[]string
item := result.(bson.M)
var values []string
for _, col := range columns {
value := utils.InterfaceToString(item[col])
values = append(values, value)
}
// 写入数据
if err := writer.Write(values); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
}
// 此时才会将缓冲区数据写入
writer.Flush()
// 设置下载的文件名
c.Writer.Header().Set("Content-Disposition", "attachment;filename=data.csv")
// 设置文件类型以及输出数据
c.Data(http.StatusOK, "text/csv", bytesBuffer.Bytes())
}

View File

@@ -1,138 +0,0 @@
package mock
import (
"bytes"
"crawlab/model"
"encoding/json"
"github.com/globalsign/mgo/bson"
. "github.com/smartystreets/goconvey/convey"
"net/http"
"net/http/httptest"
"testing"
"time"
)
func TestGetTaskList(t *testing.T) {
//var teskListRequestFrom = TaskListRequestData{
// PageNum: 2,
// PageSize: 10,
// NodeId: "434221grfsf",
// SpiderId: "fdfewqrftea",
//}
var resp ListResponse
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/tasks?PageNum=2&PageSize=10&NodeId=342dfsff&SpiderId=f8dsf", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetNodeList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
So(resp.Total, ShouldEqual, 2)
})
})
}
func TestGetTask(t *testing.T) {
var resp Response
var taskId = "1234"
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/tasks/"+taskId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetTask", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestPutTask(t *testing.T) {
var newItem = model.Task{
Id: "1234",
SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
StartTs: time.Now(),
FinishTs: time.Now(),
Status: "online",
NodeId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"),
LogPath: "./log",
Cmd: "scrapy crawl test",
Error: "",
ResultCount: 0,
WaitDuration: 10.0,
RuntimeDuration: 10,
TotalDuration: 20,
SpiderName: "test",
NodeName: "test",
CreateTs: time.Now(),
UpdateTs: time.Now(),
}
var resp Response
body, _ := json.Marshal(&newItem)
w := httptest.NewRecorder()
req, _ := http.NewRequest("PUT", "/tasks", bytes.NewReader(body))
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API PutTask", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestDeleteTask(t *testing.T) {
taskId := "1234"
var resp Response
w := httptest.NewRecorder()
req, _ := http.NewRequest("DELETE", "/tasks/"+taskId, nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("unmarshal resp failed")
}
Convey("Test API DeleteTask", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
})
})
}
func TestGetTaskResults(t *testing.T) {
//var teskListResultFrom = TaskResultsRequestData{
// PageNum: 2,
// PageSize: 1,
//}
taskId := "1234"
var resp ListResponse
w := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/tasks/"+taskId+"/results?PageNum=2&PageSize=1", nil)
app.ServeHTTP(w, req)
err := json.Unmarshal(w.Body.Bytes(), &resp)
if err != nil {
t.Fatal("Unmarshal resp failed")
}
Convey("Test API GetNodeList", t, func() {
Convey("Test response status", func() {
So(resp.Status, ShouldEqual, "ok")
So(resp.Message, ShouldEqual, "success")
So(resp.Total, ShouldEqual, 2)
})
})
}

View File

@@ -1 +0,0 @@
package mock

View File

@@ -1,24 +0,0 @@
package mock
import (
"github.com/gin-gonic/gin"
"runtime/debug"
)
func HandleError(statusCode int, c *gin.Context, err error) {
debug.PrintStack()
c.JSON(statusCode, Response{
Status: "ok",
Message: "error",
Error: err.Error(),
})
}
func HandleErrorF(statusCode int, c *gin.Context, err string) {
debug.PrintStack()
c.JSON(statusCode, Response{
Status: "ok",
Message: "error",
Error: err,
})
}

View File

@@ -1,162 +0,0 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
)
type Action struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
Type string `json:"type" bson:"type"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (a *Action) Save() error {
s, c := database.GetCol("actions")
defer s.Close()
a.UpdateTs = time.Now()
if err := c.UpdateId(a.Id, a); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (a *Action) Add() error {
s, c := database.GetCol("actions")
defer s.Close()
a.Id = bson.NewObjectId()
a.UpdateTs = time.Now()
a.CreateTs = time.Now()
if err := c.Insert(a); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return err
}
return nil
}
func GetAction(id bson.ObjectId) (Action, error) {
s, c := database.GetCol("actions")
defer s.Close()
var user Action
if err := c.Find(bson.M{"_id": id}).One(&user); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return user, err
}
return user, nil
}
func GetActionList(filter interface{}, skip int, limit int, sortKey string) ([]Action, error) {
s, c := database.GetCol("actions")
defer s.Close()
var actions []Action
if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&actions); err != nil {
debug.PrintStack()
return actions, err
}
return actions, nil
}
func GetActionListTotal(filter interface{}) (int, error) {
s, c := database.GetCol("actions")
defer s.Close()
var result int
result, err := c.Find(filter).Count()
if err != nil {
return result, err
}
return result, nil
}
func GetVisitDays(uid bson.ObjectId) (int, error) {
type ResData struct {
Days int `json:"days" bson:"days"`
}
s, c := database.GetCol("actions")
defer s.Close()
pipeline := []bson.M{
{
"$match": bson.M{
"user_id": uid,
"type": constants.ActionTypeVisit,
},
},
{
"$addFields": bson.M{
"date": bson.M{
"$dateToString": bson.M{
"format": "%Y%m%d",
"date": "$create_ts",
"timezone": "Asia/Shanghai",
},
},
},
},
{
"$group": bson.M{
"_id": "$date",
},
},
{
"_id": nil,
"days": bson.M{"$sum": 1},
},
}
var resData []ResData
if err := c.Pipe(pipeline).All(&resData); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return 0, err
}
return resData[0].Days, nil
}
func UpdateAction(id bson.ObjectId, item Action) error {
s, c := database.GetCol("actions")
defer s.Close()
var result Action
if err := c.FindId(id).One(&result); err != nil {
debug.PrintStack()
return err
}
if err := item.Save(); err != nil {
return err
}
return nil
}
func RemoveAction(id bson.ObjectId) error {
s, c := database.GetCol("actions")
defer s.Close()
var result Action
if err := c.FindId(id).One(&result); err != nil {
return err
}
if err := c.RemoveId(id); err != nil {
return err
}
return nil
}

View File

@@ -1,12 +0,0 @@
package model
type Base struct {
}
func (b *Base) Save() error {
return nil
}
func (b *Base) Delete() error {
return nil
}

View File

@@ -1,187 +0,0 @@
package model
import (
"crawlab/database"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
)
type Challenge struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
TitleCn string `json:"title_cn" bson:"title_cn"`
TitleEn string `json:"title_en" bson:"title_en"`
DescriptionCn string `json:"description_cn" bson:"description_cn"`
DescriptionEn string `json:"description_en" bson:"description_en"`
Difficulty int `json:"difficulty" bson:"difficulty"`
Path string `json:"path" bson:"path"`
// 前端展示
Achieved bool `json:"achieved" bson:"achieved"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (ch *Challenge) Save() error {
s, c := database.GetCol("challenges")
defer s.Close()
ch.UpdateTs = time.Now()
if err := c.UpdateId(ch.Id, ch); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (ch *Challenge) Add() error {
s, c := database.GetCol("challenges")
defer s.Close()
ch.Id = bson.NewObjectId()
ch.UpdateTs = time.Now()
ch.CreateTs = time.Now()
if err := c.Insert(ch); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return err
}
return nil
}
func GetChallenge(id bson.ObjectId) (Challenge, error) {
s, c := database.GetCol("challenges")
defer s.Close()
var ch Challenge
if err := c.Find(bson.M{"_id": id}).One(&ch); err != nil {
if err != mgo.ErrNotFound {
log.Errorf(err.Error())
debug.PrintStack()
return ch, err
}
}
return ch, nil
}
func GetChallengeByName(name string) (Challenge, error) {
s, c := database.GetCol("challenges")
defer s.Close()
var ch Challenge
if err := c.Find(bson.M{"name": name}).One(&ch); err != nil {
if err != mgo.ErrNotFound {
log.Errorf(err.Error())
debug.PrintStack()
return ch, err
}
}
return ch, nil
}
func GetChallengeList(filter interface{}, skip int, limit int, sortKey string) ([]Challenge, error) {
s, c := database.GetCol("challenges")
defer s.Close()
var challenges []Challenge
if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challenges); err != nil {
debug.PrintStack()
return challenges, err
}
return challenges, nil
}
func GetChallengeListWithAchieved(filter interface{}, skip int, limit int, sortKey string, uid bson.ObjectId) ([]Challenge, error) {
challenges, err := GetChallengeList(filter, skip, limit, sortKey)
if err != nil {
return challenges, err
}
for i, ch := range challenges {
query := bson.M{
"user_id": uid,
"challenge_id": ch.Id,
}
list, err := GetChallengeAchievementList(query, 0, 1, "-_id")
if err != nil {
continue
}
challenges[i].Achieved = len(list) > 0
}
return challenges, nil
}
func GetChallengeListTotal(filter interface{}) (int, error) {
s, c := database.GetCol("challenges")
defer s.Close()
var result int
result, err := c.Find(filter).Count()
if err != nil {
return result, err
}
return result, nil
}
type ChallengeAchievement struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
ChallengeId bson.ObjectId `json:"challenge_id" bson:"challenge_id"`
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (ca *ChallengeAchievement) Save() error {
s, c := database.GetCol("challenges_achievements")
defer s.Close()
ca.UpdateTs = time.Now()
if err := c.UpdateId(ca.Id, c); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (ca *ChallengeAchievement) Add() error {
s, c := database.GetCol("challenges_achievements")
defer s.Close()
ca.Id = bson.NewObjectId()
ca.UpdateTs = time.Now()
ca.CreateTs = time.Now()
if err := c.Insert(ca); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return err
}
return nil
}
func GetChallengeAchievementList(filter interface{}, skip int, limit int, sortKey string) ([]ChallengeAchievement, error) {
s, c := database.GetCol("challenges_achievements")
defer s.Close()
var challengeAchievements []ChallengeAchievement
if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challengeAchievements); err != nil {
debug.PrintStack()
return challengeAchievements, err
}
return challengeAchievements, nil
}

View File

@@ -1,26 +0,0 @@
package config_spider
import "crawlab/entity"
func GetAllFields(data entity.ConfigSpiderData) []entity.Field {
var fields []entity.Field
for _, stage := range data.Stages {
for _, field := range stage.Fields {
fields = append(fields, field)
}
}
return fields
}
func GetStartStageName(data entity.ConfigSpiderData) string {
// 如果 start_stage 设置了且在 stages 里,则返回
if data.StartStage != "" {
return data.StartStage
}
// 否则返回第一个 stage
for _, stage := range data.Stages {
return stage.Name
}
return ""
}

View File

@@ -1,263 +0,0 @@
package config_spider
import (
"crawlab/constants"
"crawlab/entity"
"crawlab/model"
"crawlab/utils"
"errors"
"fmt"
"path/filepath"
)
type ScrapyGenerator struct {
Spider model.Spider
ConfigData entity.ConfigSpiderData
}
// 生成爬虫文件
func (g ScrapyGenerator) Generate() error {
// 生成 items.py
if err := g.ProcessItems(); err != nil {
return err
}
// 生成 spider.py
if err := g.ProcessSpider(); err != nil {
return err
}
return nil
}
// 生成 items.py
func (g ScrapyGenerator) ProcessItems() error {
// 待处理文件名
src := g.Spider.Src
filePath := filepath.Join(src, "config_spider", "items.py")
// 获取所有字段
fields := g.GetAllFields()
// 字段名列表(包含默认字段名)
fieldNames := []string{
"_id",
"task_id",
"ts",
}
// 加入字段
for _, field := range fields {
fieldNames = append(fieldNames, field.Name)
}
// 将字段名转化为python代码
str := ""
for _, fieldName := range fieldNames {
line := g.PadCode(fmt.Sprintf("%s = scrapy.Field()", fieldName), 1)
str += line
}
// 将占位符替换为代码
if err := utils.SetFileVariable(filePath, constants.AnchorItems, str); err != nil {
return err
}
return nil
}
// 生成 spider.py
func (g ScrapyGenerator) ProcessSpider() error {
// 待处理文件名
src := g.Spider.Src
filePath := filepath.Join(src, "config_spider", "spiders", "spider.py")
// 替换 start_stage
if err := utils.SetFileVariable(filePath, constants.AnchorStartStage, "parse_"+GetStartStageName(g.ConfigData)); err != nil {
return err
}
// 替换 start_url
if err := utils.SetFileVariable(filePath, constants.AnchorStartUrl, g.ConfigData.StartUrl); err != nil {
return err
}
// 替换 parsers
strParser := ""
for _, stage := range g.ConfigData.Stages {
stageName := stage.Name
stageStr := g.GetParserString(stageName, stage)
strParser += stageStr
}
if err := utils.SetFileVariable(filePath, constants.AnchorParsers, strParser); err != nil {
return err
}
return nil
}
func (g ScrapyGenerator) GetParserString(stageName string, stage entity.Stage) string {
// 构造函数定义行
strDef := g.PadCode(fmt.Sprintf("def parse_%s(self, response):", stageName), 1)
strParse := ""
if stage.IsList {
// 列表逻辑
strParse = g.GetListParserString(stageName, stage)
} else {
// 非列表逻辑
strParse = g.GetNonListParserString(stageName, stage)
}
// 构造
str := fmt.Sprintf(`%s%s`, strDef, strParse)
return str
}
func (g ScrapyGenerator) PadCode(str string, num int) string {
res := ""
for i := 0; i < num; i++ {
res += " "
}
res += str
res += "\n"
return res
}
func (g ScrapyGenerator) GetNonListParserString(stageName string, stage entity.Stage) string {
str := ""
// 获取或构造item
str += g.PadCode("item = Item() if response.meta.get('item') is None else response.meta.get('item')", 2)
// 遍历字段列表
for _, f := range stage.Fields {
line := fmt.Sprintf(`item['%s'] = response.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
line = g.PadCode(line, 2)
str += line
}
// next stage 字段
if f, err := g.GetNextStageField(stage); err == nil {
// 如果找到 next stage 字段,进行下一个回调
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url="get_real_url(response, item['%s'])", callback=self.parse_%s, meta={'item': item})`, f.Name, f.NextStage), 2)
} else {
// 如果没找到 next stage 字段,返回 item
str += g.PadCode(fmt.Sprintf(`yield item`), 2)
}
// 加入末尾换行
str += g.PadCode("", 0)
return str
}
func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stage) string {
str := ""
// 获取前一个 stage 的 item
str += g.PadCode(`prev_item = response.meta.get('item')`, 2)
// for 循环遍历列表
str += g.PadCode(fmt.Sprintf(`for elem in response.%s:`, g.GetListString(stage)), 2)
// 构造item
str += g.PadCode(`item = Item()`, 3)
// 遍历字段列表
for _, f := range stage.Fields {
line := fmt.Sprintf(`item['%s'] = elem.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
line = g.PadCode(line, 3)
str += line
}
// 把前一个 stage 的 item 值赋给当前 item
str += g.PadCode(`if prev_item is not None:`, 3)
str += g.PadCode(`for key, value in prev_item.items():`, 4)
str += g.PadCode(`item[key] = value`, 5)
// next stage 字段
if f, err := g.GetNextStageField(stage); err == nil {
// 如果 url 为空,则不进入下一个 stage
str += g.PadCode(fmt.Sprintf(`if not item['%s']:`, f.Name), 3)
str += g.PadCode(`continue`, 4)
// 如果找到 next stage 字段,进行下一个回调
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, item['%s']), callback=self.parse_%s, meta={'item': item})`, f.Name, f.NextStage), 3)
} else {
// 如果没找到 next stage 字段,返回 item
str += g.PadCode(fmt.Sprintf(`yield item`), 3)
}
// 分页
if stage.PageCss != "" || stage.PageXpath != "" {
str += g.PadCode(fmt.Sprintf(`next_url = response.%s.extract_first()`, g.GetExtractStringFromStage(stage)), 2)
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': prev_item})`, stageName), 2)
}
// 加入末尾换行
str += g.PadCode("", 0)
return str
}
// 获取所有字段
func (g ScrapyGenerator) GetAllFields() []entity.Field {
return GetAllFields(g.ConfigData)
}
// 获取包含 next stage 的字段
func (g ScrapyGenerator) GetNextStageField(stage entity.Stage) (entity.Field, error) {
for _, field := range stage.Fields {
if field.NextStage != "" {
return field, nil
}
}
return entity.Field{}, errors.New("cannot find next stage field")
}
func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
if f.Css != "" {
// 如果为CSS
if f.Attr == "" {
// 文本
return fmt.Sprintf(`css('%s::text')`, f.Css)
} else {
// 属性
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
}
} else {
// 如果为XPath
if f.Attr == "" {
// 文本
return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath)
} else {
// 属性
return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr)
}
}
}
func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
// 分页元素属性,默认为 href
pageAttr := "href"
if stage.PageAttr != "" {
pageAttr = stage.PageAttr
}
if stage.PageCss != "" {
// 如果为CSS
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
} else {
// 如果为XPath
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
}
}
func (g ScrapyGenerator) GetListString(stage entity.Stage) string {
if stage.ListCss != "" {
return fmt.Sprintf(`css('%s')`, stage.ListCss)
} else {
return fmt.Sprintf(`xpath('%s')`, stage.ListXpath)
}
}

View File

@@ -1,78 +0,0 @@
package model
import (
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"os"
"runtime/debug"
"time"
)
type GridFs struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
ChunkSize int32 `json:"chunk_size" bson:"chunkSize"`
UploadDate time.Time `json:"upload_date" bson:"uploadDate"`
Length int32 `json:"length" bson:"length"`
Md5 string `json:"md_5" bson:"md5"`
Filename string `json:"filename" bson:"filename"`
}
type File struct {
Name string `json:"name"`
Path string `json:"path"`
RelativePath string `json:"relative_path"`
IsDir bool `json:"is_dir"`
Size int64 `json:"size"`
Children []File `json:"children"`
Label string `json:"label"`
}
func (f *GridFs) Remove() {
s, gf := database.GetGridFs("files")
defer s.Close()
if err := gf.RemoveId(f.Id); err != nil {
log.Errorf("remove file id error: %s, id: %s", err.Error(), f.Id.Hex())
debug.PrintStack()
}
}
func GetAllGridFs() []*GridFs {
s, gf := database.GetGridFs("files")
defer s.Close()
var files []*GridFs
if err := gf.Find(nil).All(&files); err != nil {
log.Errorf("get all files error: {}", err.Error())
debug.PrintStack()
return nil
}
return files
}
func GetGridFs(id bson.ObjectId) *GridFs {
s, gf := database.GetGridFs("files")
defer s.Close()
var gfFile GridFs
err := gf.Find(bson.M{"_id": id}).One(&gfFile)
if err != nil {
log.Errorf("get gf file error: %s, file_id: %s", err.Error(), id.Hex())
debug.PrintStack()
return nil
}
return &gfFile
}
func RemoveFile(path string) error {
if !utils.Exists(path) {
log.Info("file not found: " + path)
debug.PrintStack()
return nil
}
if err := os.RemoveAll(path); err != nil {
return err
}
return nil
}

View File

@@ -1,167 +0,0 @@
package model
import (
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"os"
"runtime/debug"
"time"
)
type LogItem struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Message string `json:"msg" bson:"msg"`
TaskId string `json:"task_id" bson:"task_id"`
Seq int64 `json:"seq" bson:"seq"`
Ts time.Time `json:"ts" bson:"ts"`
ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"`
}
type ErrorLogItem struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
TaskId string `json:"task_id" bson:"task_id"`
Message string `json:"msg" bson:"msg"`
LogId bson.ObjectId `json:"log_id" bson:"log_id"`
Seq int64 `json:"seq" bson:"seq"`
Ts time.Time `json:"ts" bson:"ts"`
ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"`
}
// 获取本地日志
func GetLocalLog(logPath string) (fileBytes []byte, err error) {
f, err := os.Open(logPath)
if err != nil {
log.Error(err.Error())
debug.PrintStack()
return nil, err
}
fi, err := f.Stat()
if err != nil {
log.Error(err.Error())
debug.PrintStack()
return nil, err
}
defer utils.Close(f)
const bufLen = 2 * 1024 * 1024
logBuf := make([]byte, bufLen)
off := int64(0)
if fi.Size() > int64(len(logBuf)) {
off = fi.Size() - int64(len(logBuf))
}
n, err := f.ReadAt(logBuf, off)
//到文件结尾会有EOF标识
if err != nil && err.Error() != "EOF" {
log.Error(err.Error())
debug.PrintStack()
return nil, err
}
logBuf = logBuf[:n]
return logBuf, nil
}
func AddLogItem(l LogItem) error {
s, c := database.GetCol("logs")
defer s.Close()
if err := c.Insert(l); err != nil {
log.Errorf("insert log error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
func AddLogItems(ls []LogItem) error {
if len(ls) == 0 {
return nil
}
s, c := database.GetCol("logs")
defer s.Close()
var docs []interface{}
for _, l := range ls {
docs = append(docs, l)
}
if err := c.Insert(docs...); err != nil {
log.Errorf("insert log error: " + err.Error())
debug.PrintStack()
return err
}
return nil
}
func AddErrorLogItem(e ErrorLogItem) error {
s, c := database.GetCol("error_logs")
defer s.Close()
var l LogItem
err := c.FindId(bson.M{"log_id": e.LogId}).One(&l)
if err != nil && err == mgo.ErrNotFound {
if err := c.Insert(e); err != nil {
log.Errorf("insert log error: " + err.Error())
debug.PrintStack()
return err
}
}
return nil
}
func GetLogItemList(query bson.M, keyword string, skip int, limit int, sortStr string) ([]LogItem, error) {
s, c := database.GetCol("logs")
defer s.Close()
filter := query
var logItems []LogItem
if keyword == "" {
filter["seq"] = bson.M{
"$gte": skip,
"$lt": skip + limit,
}
if err := c.Find(filter).Sort(sortStr).All(&logItems); err != nil {
debug.PrintStack()
return logItems, err
}
} else {
filter["msg"] = bson.M{
"$regex": bson.RegEx{
Pattern: keyword,
Options: "i",
},
}
if err := c.Find(filter).Sort(sortStr).Skip(skip).Limit(limit).All(&logItems); err != nil {
debug.PrintStack()
return logItems, err
}
}
return logItems, nil
}
func GetLogItemTotal(query bson.M, keyword string) (int, error) {
s, c := database.GetCol("logs")
defer s.Close()
filter := query
if keyword != "" {
filter["msg"] = bson.M{
"$regex": bson.RegEx{
Pattern: keyword,
Options: "i",
},
}
}
total, err := c.Find(filter).Count()
if err != nil {
debug.PrintStack()
return total, err
}
return total, nil
}

View File

@@ -1,4 +0,0 @@
package market
type Repo struct {
}

View File

@@ -1,232 +0,0 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"errors"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"github.com/spf13/viper"
"runtime/debug"
"time"
)
type Node struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Status string `json:"status" bson:"status"`
Ip string `json:"ip" bson:"ip"`
Port string `json:"port" bson:"port"`
Mac string `json:"mac" bson:"mac"`
Hostname string `json:"hostname" bson:"hostname"`
Description string `json:"description" bson:"description"`
// 用于唯一标识节点可能是mac地址可能是ip地址
Key string `json:"key" bson:"key"`
// 前端展示
IsMaster bool `json:"is_master" bson:"is_master"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTsUnix int64 `json:"update_ts_unix" bson:"update_ts_unix"`
}
const (
Yes = "Y"
)
// 当前节点是否为主节点
func IsMaster() bool {
return viper.GetString("server.master") == Yes
}
func (n *Node) Save() error {
s, c := database.GetCol("nodes")
defer s.Close()
n.UpdateTs = time.Now()
if err := c.UpdateId(n.Id, n); err != nil {
return err
}
return nil
}
func (n *Node) Add() error {
s, c := database.GetCol("nodes")
defer s.Close()
n.Id = bson.NewObjectId()
n.UpdateTs = time.Now()
n.UpdateTsUnix = time.Now().Unix()
n.CreateTs = time.Now()
if err := c.Insert(&n); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (n *Node) Delete() error {
s, c := database.GetCol("nodes")
defer s.Close()
if err := c.RemoveId(n.Id); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (n *Node) GetTasks() ([]Task, error) {
tasks, err := GetTaskList(bson.M{"node_id": n.Id}, 0, 10, "-create_ts")
//tasks, err := GetTaskList(nil, 0, 10, "-create_ts")
if err != nil {
debug.PrintStack()
return []Task{}, err
}
return tasks, nil
}
// 节点列表
func GetNodeList(filter interface{}) ([]Node, error) {
s, c := database.GetCol("nodes")
defer s.Close()
var results []Node
if err := c.Find(filter).All(&results); err != nil {
log.Error("get node list error: " + err.Error())
debug.PrintStack()
return results, err
}
return results, nil
}
// 节点信息
func GetNode(id bson.ObjectId) (Node, error) {
var node Node
if id.Hex() == "" {
log.Infof("id is empty")
debug.PrintStack()
return node, errors.New("id is empty")
}
s, c := database.GetCol("nodes")
defer s.Close()
if err := c.FindId(id).One(&node); err != nil {
//log.Errorf("get node error: %s, id: %s", err.Error(), id.Hex())
//debug.PrintStack()
return node, err
}
return node, nil
}
// 节点信息
func GetNodeByKey(key string) (Node, error) {
s, c := database.GetCol("nodes")
defer s.Close()
var node Node
if err := c.Find(bson.M{"key": key}).One(&node); err != nil {
if err != mgo.ErrNotFound {
log.Errorf(err.Error())
debug.PrintStack()
}
return node, err
}
return node, nil
}
// 更新节点
func UpdateNode(id bson.ObjectId, item Node) error {
s, c := database.GetCol("nodes")
defer s.Close()
var node Node
if err := c.FindId(id).One(&node); err != nil {
return err
}
if err := item.Save(); err != nil {
return err
}
return nil
}
// 任务列表
func GetNodeTaskList(id bson.ObjectId) ([]Task, error) {
node, err := GetNode(id)
if err != nil {
return []Task{}, err
}
tasks, err := node.GetTasks()
if err != nil {
return []Task{}, err
}
return tasks, nil
}
// 节点数
func GetNodeCount(query interface{}) (int, error) {
s, c := database.GetCol("nodes")
defer s.Close()
count, err := c.Find(query).Count()
if err != nil {
return 0, err
}
return count, nil
}
// 根据redis的key值重置node节点为offline
func ResetNodeStatusToOffline(list []string) {
nodes, _ := GetNodeList(nil)
for _, node := range nodes {
hasNode := false
for _, key := range list {
if key == node.Key {
hasNode = true
break
}
}
if !hasNode || node.Status == "" {
node.Status = constants.StatusOffline
if err := node.Save(); err != nil {
log.Errorf(err.Error())
return
}
continue
}
}
}
func UpdateMasterNodeInfo(key string, ip string, mac string, hostname string) error {
s, c := database.GetCol("nodes")
defer s.Close()
c.UpdateAll(bson.M{
"is_master": true,
}, bson.M{
"is_master": false,
})
_, err := c.Upsert(bson.M{
"key": key,
}, bson.M{
"$set": bson.M{
"ip": ip,
"port": "8000",
"mac": mac,
"hostname": hostname,
"is_master": true,
"update_ts": time.Now(),
"update_ts_unix": time.Now().Unix(),
},
"$setOnInsert": bson.M{
"key": key,
"name": key,
"create_ts": time.Now(),
"_id": bson.NewObjectId(),
},
})
return err
}

View File

@@ -1,50 +0,0 @@
package model
import (
"crawlab/config"
"crawlab/constants"
"crawlab/database"
"github.com/apex/log"
. "github.com/smartystreets/goconvey/convey"
"runtime/debug"
"testing"
)
func TestAddNode(t *testing.T) {
Convey("Test AddNode", t, func() {
if err := config.InitConfig("../conf/config.yml"); err != nil {
log.Error("init config error:" + err.Error())
panic(err)
}
log.Info("初始化配置成功")
// 初始化Mongodb数据库
if err := database.InitMongo(); err != nil {
log.Error("init mongodb error:" + err.Error())
debug.PrintStack()
panic(err)
}
log.Info("初始化Mongodb数据库成功")
// 初始化Redis数据库
if err := database.InitRedis(); err != nil {
log.Error("init redis error:" + err.Error())
debug.PrintStack()
panic(err)
}
var node = Node{
Key: "c4:b3:01:bd:b5:e7",
Name: "10.27.238.101",
Ip: "10.27.238.101",
Port: "8000",
Mac: "c4:b3:01:bd:b5:e7",
Status: constants.StatusOnline,
IsMaster: true,
}
if err := node.Add(); err != nil {
log.Error("add node error:" + err.Error())
panic(err)
}
})
}

View File

@@ -1,167 +0,0 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
)
type Project struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
Tags []string `json:"tags" bson:"tags"`
// 前端展示
Spiders []Spider `json:"spiders" bson:"spiders"`
Username string `json:"username" bson:"username"`
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (p *Project) Save() error {
s, c := database.GetCol("projects")
defer s.Close()
p.UpdateTs = time.Now()
if err := c.UpdateId(p.Id, p); err != nil {
debug.PrintStack()
return err
}
return nil
}
func (p *Project) Add() error {
s, c := database.GetCol("projects")
defer s.Close()
p.Id = bson.NewObjectId()
p.UpdateTs = time.Now()
p.CreateTs = time.Now()
if err := c.Insert(p); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return err
}
return nil
}
func (p *Project) GetSpiders() ([]Spider, error) {
s, c := database.GetCol("spiders")
defer s.Close()
var query interface{}
if p.Id.Hex() == constants.ObjectIdNull {
query = bson.M{
"$or": []bson.M{
{"project_id": p.Id},
{"project_id": bson.M{"$exists": false}},
},
}
} else {
query = bson.M{"project_id": p.Id}
}
var spiders []Spider
if err := c.Find(query).All(&spiders); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return spiders, err
}
return spiders, nil
}
func GetProject(id bson.ObjectId) (Project, error) {
s, c := database.GetCol("projects")
defer s.Close()
var p Project
if err := c.Find(bson.M{"_id": id}).One(&p); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return p, err
}
return p, nil
}
func GetProjectList(filter interface{}, sortKey string) ([]Project, error) {
s, c := database.GetCol("projects")
defer s.Close()
var projects []Project
if err := c.Find(filter).Sort(sortKey).All(&projects); err != nil {
debug.PrintStack()
return projects, err
}
for i, p := range projects {
// 获取用户名称
user, _ := GetUser(p.UserId)
projects[i].Username = user.Username
}
return projects, nil
}
func GetProjectListTotal(filter interface{}) (int, error) {
s, c := database.GetCol("projects")
defer s.Close()
var result int
result, err := c.Find(filter).Count()
if err != nil {
return result, err
}
return result, nil
}
func UpdateProject(id bson.ObjectId, item Project) error {
s, c := database.GetCol("projects")
defer s.Close()
var result Project
if err := c.FindId(id).One(&result); err != nil {
debug.PrintStack()
return err
}
if err := item.Save(); err != nil {
return err
}
return nil
}
func RemoveProject(id bson.ObjectId) error {
s, c := database.GetCol("projects")
defer s.Close()
var result User
if err := c.FindId(id).One(&result); err != nil {
return err
}
if err := c.RemoveId(id); err != nil {
return err
}
return nil
}
func GetProjectCount(filter interface{}) (int, error) {
s, c := database.GetCol("projects")
defer s.Close()
count, err := c.Find(filter).Count()
if err != nil {
return 0, err
}
return count, nil
}

View File

@@ -1,177 +0,0 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"crawlab/lib/cron"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
)
type Schedule struct {
Id bson.ObjectId `json:"_id" bson:"_id"`
Name string `json:"name" bson:"name"`
Description string `json:"description" bson:"description"`
SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"`
Cron string `json:"cron" bson:"cron"`
EntryId cron.EntryID `json:"entry_id" bson:"entry_id"`
Param string `json:"param" bson:"param"`
RunType string `json:"run_type" bson:"run_type"`
NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"`
Status string `json:"status" bson:"status"`
Enabled bool `json:"enabled" bson:"enabled"`
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
ScrapySpider string `json:"scrapy_spider" bson:"scrapy_spider"`
ScrapyLogLevel string `json:"scrapy_log_level" bson:"scrapy_log_level"`
// 前端展示
SpiderName string `json:"spider_name" bson:"spider_name"`
Username string `json:"user_name" bson:"user_name"`
Nodes []Node `json:"nodes" bson:"nodes"`
Message string `json:"message" bson:"message"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
func (sch *Schedule) Save() error {
s, c := database.GetCol("schedules")
defer s.Close()
sch.UpdateTs = time.Now()
if err := c.UpdateId(sch.Id, sch); err != nil {
return err
}
return nil
}
func (sch *Schedule) Delete() error {
s, c := database.GetCol("schedules")
defer s.Close()
return c.RemoveId(sch.Id)
}
func GetScheduleList(filter interface{}) ([]Schedule, error) {
s, c := database.GetCol("schedules")
defer s.Close()
var schedules []Schedule
if err := c.Find(filter).All(&schedules); err != nil {
return schedules, err
}
var schs []Schedule
for _, schedule := range schedules {
// 获取节点名称
schedule.Nodes = []Node{}
if schedule.RunType == constants.RunTypeSelectedNodes {
for _, nodeId := range schedule.NodeIds {
// 选择单一节点
node, err := GetNode(nodeId)
if err != nil {
continue
}
schedule.Nodes = append(schedule.Nodes, node)
}
}
// 获取爬虫名称
spider, err := GetSpider(schedule.SpiderId)
if err != nil {
log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error())
schedule.Status = constants.ScheduleStatusError
if err == mgo.ErrNotFound {
schedule.Message = constants.ScheduleStatusErrorNotFoundSpider
} else {
schedule.Message = err.Error()
}
} else {
schedule.SpiderName = spider.Name
}
// 获取用户名称
user, _ := GetUser(schedule.UserId)
schedule.Username = user.Username
schs = append(schs, schedule)
}
return schs, nil
}
func GetSchedule(id bson.ObjectId) (Schedule, error) {
s, c := database.GetCol("schedules")
defer s.Close()
var schedule Schedule
if err := c.FindId(id).One(&schedule); err != nil {
return schedule, err
}
// 获取用户名称
user, _ := GetUser(schedule.UserId)
schedule.Username = user.Username
return schedule, nil
}
func UpdateSchedule(id bson.ObjectId, item Schedule) error {
s, c := database.GetCol("schedules")
defer s.Close()
var result Schedule
if err := c.FindId(id).One(&result); err != nil {
return err
}
item.UpdateTs = time.Now()
if err := item.Save(); err != nil {
return err
}
return nil
}
func AddSchedule(item Schedule) error {
s, c := database.GetCol("schedules")
defer s.Close()
item.Id = bson.NewObjectId()
item.CreateTs = time.Now()
item.UpdateTs = time.Now()
if err := c.Insert(&item); err != nil {
debug.PrintStack()
log.Errorf(err.Error())
return err
}
return nil
}
func RemoveSchedule(id bson.ObjectId) error {
s, c := database.GetCol("schedules")
defer s.Close()
var result Schedule
if err := c.FindId(id).One(&result); err != nil {
return err
}
if err := c.RemoveId(id); err != nil {
return err
}
return nil
}
func GetScheduleCount(filter interface{}) (int, error) {
s, c := database.GetCol("schedules")
defer s.Close()
count, err := c.Find(filter).Count()
if err != nil {
return 0, err
}
return count, nil
}

View File

@@ -1,45 +0,0 @@
package model
import (
"crawlab/database"
"github.com/globalsign/mgo/bson"
"time"
)
type Setting struct {
Keyword string
Document bson.Raw
}
func GetRawSetting(keyword string, pointer interface{}) error {
s, col := database.GetCol("settings")
defer s.Close()
var setting Setting
err := col.Find(bson.M{"keyword": keyword}).One(&setting)
if err != nil {
return err
}
return setting.Document.Unmarshal(pointer)
}
type DocumentMeta struct {
DocumentVersion int
DocStructVersion int
UpdateTime time.Time
CreateTime time.Time
DeleteTime time.Time
}
//demo
type SecuritySetting struct {
EnableRegister bool
EnableInvitation bool
DocumentMeta `bson:"inline" json:"inline"`
}
func GetSecuritySetting() (SecuritySetting, error) {
var app SecuritySetting
err := GetRawSetting("security", &app)
return app, err
}

View File

@@ -1,414 +0,0 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"crawlab/entity"
"crawlab/utils"
"errors"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"gopkg.in/yaml.v2"
"io/ioutil"
"path/filepath"
"runtime/debug"
"time"
)
type Env struct {
Name string `json:"name" bson:"name"`
Value string `json:"value" bson:"value"`
}
type Spider struct {
Id bson.ObjectId `json:"_id" bson:"_id"` // 爬虫ID
Name string `json:"name" bson:"name"` // 爬虫名称(唯一)
DisplayName string `json:"display_name" bson:"display_name"` // 爬虫显示名称
Type string `json:"type" bson:"type"` // 爬虫类别
FileId bson.ObjectId `json:"file_id" bson:"file_id"` // GridFS文件ID
Col string `json:"col" bson:"col"` // 结果储存位置
Site string `json:"site" bson:"site"` // 爬虫网站
Envs []Env `json:"envs" bson:"envs"` // 环境变量
Remark string `json:"remark" bson:"remark"` // 备注
Src string `json:"src" bson:"src"` // 源码位置
ProjectId bson.ObjectId `json:"project_id" bson:"project_id"` // 项目ID
IsPublic bool `json:"is_public" bson:"is_public"` // 是否公开
// 自定义爬虫
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
// Scrapy 爬虫(属于自定义爬虫)
IsScrapy bool `json:"is_scrapy" bson:"is_scrapy"` // 是否为 Scrapy 爬虫
SpiderNames []string `json:"spider_names" bson:"spider_names"` // 爬虫名称列表
// 可配置爬虫
Template string `json:"template" bson:"template"` // Spiderfile模版
// Git 设置
IsGit bool `json:"is_git" bson:"is_git"` // 是否为 Git
GitUrl string `json:"git_url" bson:"git_url"` // Git URL
GitBranch string `json:"git_branch" bson:"git_branch"` // Git 分支
GitHasCredential bool `json:"git_has_credential" bson:"git_has_credential"` // Git 是否加密
GitUsername string `json:"git_username" bson:"git_username"` // Git 用户名
GitPassword string `json:"git_password" bson:"git_password"` // Git 密码
GitAutoSync bool `json:"git_auto_sync" bson:"git_auto_sync"` // Git 是否自动同步
GitSyncFrequency string `json:"git_sync_frequency" bson:"git_sync_frequency"` // Git 同步频率
GitSyncError string `json:"git_sync_error" bson:"git_sync_error"` // Git 同步错误
// 长任务
IsLongTask bool `json:"is_long_task" bson:"is_long_task"` // 是否为长任务
// 去重
IsDedup bool `json:"is_dedup" bson:"is_dedup"` // 是否去重
DedupField string `json:"dedup_field" bson:"dedup_field"` // 去重字段
DedupMethod string `json:"dedup_method" bson:"dedup_method"` // 去重方式
// Web Hook
IsWebHook bool `json:"is_web_hook" bson:"is_web_hook"` // 是否开启 Web Hook
WebHookUrl string `json:"web_hook_url" bson:"web_hook_url"` // Web Hook URL
// 前端展示
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
LastStatus string `json:"last_status"` // 最后执行状态
Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置
LatestTasks []Task `json:"latest_tasks"` // 最近任务列表
Username string `json:"username"` // 用户名称
ProjectName string `json:"project_name"` // 项目名称
// 时间
UserId bson.ObjectId `json:"user_id" bson:"user_id"`
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
}
// 更新爬虫
func (spider *Spider) Save() error {
s, c := database.GetCol("spiders")
defer s.Close()
spider.UpdateTs = time.Now()
// 兼容没有项目ID的爬虫
if spider.ProjectId.Hex() == "" {
spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull)
}
if err := c.UpdateId(spider.Id, spider); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return err
}
return nil
}
// 新增爬虫
func (spider *Spider) Add() error {
s, c := database.GetCol("spiders")
defer s.Close()
spider.Id = bson.NewObjectId()
spider.CreateTs = time.Now()
spider.UpdateTs = time.Now()
if !spider.ProjectId.Valid() {
spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull)
}
if err := c.Insert(&spider); err != nil {
return err
}
return nil
}
// 获取爬虫的任务
func (spider *Spider) GetTasks() ([]Task, error) {
tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 10, "-create_ts")
if err != nil {
return tasks, err
}
return tasks, nil
}
// 爬虫最新的任务
func (spider *Spider) GetLastTask() (Task, error) {
tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 1, "-create_ts")
if err != nil {
return Task{}, err
}
if tasks == nil {
return Task{}, nil
}
return tasks[0], nil
}
// 爬虫正在运行的任务
func (spider *Spider) GetLatestTasks(latestN int) (tasks []Task, err error) {
tasks, err = GetTaskList(bson.M{"spider_id": spider.Id}, 0, latestN, "-create_ts")
if err != nil {
return tasks, err
}
if tasks == nil {
return tasks, err
}
return tasks, nil
}
// 删除爬虫
func (spider *Spider) Delete() error {
s, c := database.GetCol("spiders")
defer s.Close()
return c.RemoveId(spider.Id)
}
// 获取爬虫列表
func GetSpiderList(filter interface{}, skip int, limit int, sortStr string) ([]Spider, int, error) {
s, c := database.GetCol("spiders")
defer s.Close()
// 获取爬虫列表
var spiders []Spider
if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortStr).All(&spiders); err != nil {
debug.PrintStack()
return spiders, 0, err
}
if spiders == nil {
spiders = []Spider{}
}
// 遍历爬虫列表
for i, spider := range spiders {
// 获取最后一次任务
task, err := spider.GetLastTask()
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
}
// 获取正在运行的爬虫
latestTasks, err := spider.GetLatestTasks(50) // TODO: latestN 暂时写死,后面加入数据库
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
}
// 获取用户
var user User
if spider.UserId.Valid() && spider.UserId.Hex() != constants.ObjectIdNull {
user, err = GetUser(spider.UserId)
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
}
}
// 获取项目
var project Project
if spider.ProjectId.Valid() && spider.ProjectId.Hex() != constants.ObjectIdNull {
project, err = GetProject(spider.ProjectId)
if err != nil {
if err != mgo.ErrNotFound {
log.Errorf(err.Error())
debug.PrintStack()
}
}
}
// 赋值
spiders[i].LastRunTs = task.CreateTs
spiders[i].LastStatus = task.Status
spiders[i].LatestTasks = latestTasks
spiders[i].Username = user.Username
spiders[i].ProjectName = project.Name
}
count, _ := c.Find(filter).Count()
return spiders, count, nil
}
// 获取所有爬虫列表
func GetSpiderAllList(filter interface{}) (spiders []Spider, err error) {
spiders, _, err = GetSpiderList(filter, 0, constants.Infinite, "_id")
if err != nil {
return spiders, err
}
return spiders, nil
}
// 获取爬虫(根据FileId)
func GetSpiderByFileId(fileId bson.ObjectId) *Spider {
s, c := database.GetCol("spiders")
defer s.Close()
var result *Spider
if err := c.Find(bson.M{"file_id": fileId}).One(&result); err != nil {
log.Errorf("get spider error: %s, file_id: %s", err.Error(), fileId.Hex())
debug.PrintStack()
return nil
}
return result
}
// 获取爬虫(根据名称)
func GetSpiderByName(name string) Spider {
s, c := database.GetCol("spiders")
defer s.Close()
var spider Spider
if err := c.Find(bson.M{"name": name}).One(&spider); err != nil && err != mgo.ErrNotFound {
log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name)
//debug.PrintStack()
return spider
}
// 获取用户
var user User
if spider.UserId.Valid() {
user, _ = GetUser(spider.UserId)
}
spider.Username = user.Username
return spider
}
// 获取爬虫(根据ID)
func GetSpider(id bson.ObjectId) (Spider, error) {
s, c := database.GetCol("spiders")
defer s.Close()
// 获取爬虫
var spider Spider
if err := c.FindId(id).One(&spider); err != nil {
if err != mgo.ErrNotFound {
log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex())
debug.PrintStack()
}
return spider, err
}
// 如果为可配置爬虫,获取爬虫配置
if spider.Type == constants.Configurable && utils.Exists(filepath.Join(spider.Src, "Spiderfile")) {
config, err := GetConfigSpiderData(spider)
if err != nil {
return spider, err
}
spider.Config = config
}
// 获取用户名称
var user User
if spider.UserId.Valid() {
user, _ = GetUser(spider.UserId)
}
spider.Username = user.Username
return spider, nil
}
// 更新爬虫
func UpdateSpider(id bson.ObjectId, item Spider) error {
s, c := database.GetCol("spiders")
defer s.Close()
var result Spider
if err := c.FindId(id).One(&result); err != nil {
debug.PrintStack()
return err
}
if err := item.Save(); err != nil {
return err
}
return nil
}
// 删除爬虫
func RemoveSpider(id bson.ObjectId) error {
s, c := database.GetCol("spiders")
defer s.Close()
var result Spider
if err := c.FindId(id).One(&result); err != nil {
log.Errorf("find spider error: %s, id:%s", err.Error(), id.Hex())
debug.PrintStack()
return err
}
if err := c.RemoveId(id); err != nil {
log.Errorf("remove spider error: %s, id:%s", err.Error(), id.Hex())
debug.PrintStack()
return err
}
// gf上的文件
s, gf := database.GetGridFs("files")
defer s.Close()
if result.FileId.Hex() != constants.ObjectIdNull {
if err := gf.RemoveId(result.FileId); err != nil {
log.Error("remove file error, id:" + result.FileId.Hex())
debug.PrintStack()
}
}
return nil
}
// 删除所有爬虫
func RemoveAllSpider() error {
s, c := database.GetCol("spiders")
defer s.Close()
var spiders []Spider
err := c.Find(nil).All(&spiders)
if err != nil {
log.Error("get all spiders error:" + err.Error())
return err
}
for _, spider := range spiders {
if err := RemoveSpider(spider.Id); err != nil {
log.Error("remove spider error:" + err.Error())
}
}
return nil
}
// 获取爬虫总数
func GetSpiderCount(filter interface{}) (int, error) {
s, c := database.GetCol("spiders")
defer s.Close()
count, err := c.Find(filter).Count()
if err != nil {
return 0, err
}
return count, nil
}
// 获取爬虫定时任务
func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) {
// 构造配置数据
configData := entity.ConfigSpiderData{}
// 校验爬虫类别
if spider.Type != constants.Configurable {
return configData, errors.New("not a configurable spider")
}
// Spiderfile 目录
sfPath := filepath.Join(spider.Src, "Spiderfile")
// 读取YAML文件
yamlFile, err := ioutil.ReadFile(sfPath)
if err != nil {
return configData, err
}
// 反序列化
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
return configData, err
}
return configData, nil
}

View File

@@ -1,98 +0,0 @@
package model
import (
"crawlab/entity"
"github.com/apex/log"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"runtime/debug"
"strings"
)
var executableNameMap = map[string]string{
// python
"python": "Python",
"python2": "Python 2",
"python2.7": "Python 2.7",
"python3": "Python 3",
"python3.5": "Python 3.5",
"python3.6": "Python 3.6",
"python3.7": "Python 3.7",
"python3.8": "Python 3.8",
// java
"java": "Java",
// go
"go": "Go",
// node
"node": "NodeJS",
// php
"php": "PHP",
// windows command
"cmd": "Windows Command Prompt",
// linux shell
"sh": "Shell",
"bash": "bash",
}
func GetLocalSystemInfo() (sysInfo entity.SystemInfo, err error) {
executables, err := GetExecutables()
if err != nil {
return sysInfo, err
}
hostname, err := os.Hostname()
if err != nil {
debug.PrintStack()
return sysInfo, err
}
return entity.SystemInfo{
ARCH: runtime.GOARCH,
OS: runtime.GOOS,
NumCpu: runtime.GOMAXPROCS(0),
Hostname: hostname,
Executables: executables,
}, nil
}
func GetSystemEnv(key string) string {
return os.Getenv(key)
}
func GetPathValues() (paths []string) {
pathEnv := GetSystemEnv("PATH")
return strings.Split(pathEnv, ":")
}
func GetExecutables() (executables []entity.Executable, err error) {
pathValues := GetPathValues()
cache := map[string]string{}
for _, path := range pathValues {
fileList, err := ioutil.ReadDir(path)
if err != nil {
log.Errorf(err.Error())
debug.PrintStack()
continue
}
for _, file := range fileList {
displayName := executableNameMap[file.Name()]
filePath := filepath.Join(path, file.Name())
if cache[filePath] == "" {
if displayName != "" {
executables = append(executables, entity.Executable{
Path: filePath,
FileName: file.Name(),
DisplayName: displayName,
})
}
cache[filePath] = filePath
}
}
}
return executables, nil
}

Some files were not shown because too many files have changed in this diff Show More