diff --git a/backend/conf/config.yml b/backend/conf/config.yml index bc06935b..d3dc39a3 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -60,8 +60,4 @@ notification: senderIdentity: '' smtp: user: '' - password: '' -repo: - apiUrl: "https://center.crawlab.cn/api" -# apiUrl: "http://localhost:8002" - ossUrl: "https://repo.crawlab.cn" \ No newline at end of file + password: '' \ No newline at end of file diff --git a/backend/config/config.go b/backend/config/config.go deleted file mode 100644 index 79be808e..00000000 --- a/backend/config/config.go +++ /dev/null @@ -1,57 +0,0 @@ -package config - -import ( - "github.com/fsnotify/fsnotify" - "github.com/spf13/viper" - "log" - "strings" -) - -type Config struct { - Name string -} - -// 监控配置文件变化并热加载程序 -func (c *Config) WatchConfig() { - viper.WatchConfig() - viper.OnConfigChange(func(e fsnotify.Event) { - log.Printf("Config file changed: %s", e.Name) - }) -} - -func (c *Config) Init() error { - if c.Name != "" { - viper.SetConfigFile(c.Name) // 如果指定了配置文件,则解析指定的配置文件 - } else { - viper.AddConfigPath("./conf") // 如果没有指定配置文件,则解析默认的配置文件 - viper.SetConfigName("config") - } - viper.SetConfigType("yaml") // 设置配置文件格式为YAML - viper.AutomaticEnv() // 读取匹配的环境变量 - viper.SetEnvPrefix("CRAWLAB") // 读取环境变量的前缀为CRAWLAB - replacer := strings.NewReplacer(".", "_") - viper.SetEnvKeyReplacer(replacer) - if err := viper.ReadInConfig(); err != nil { // viper解析配置文件 - return err - } - - return nil -} - -func InitConfig(cfg string) error { - c := Config{ - Name: cfg, - } - - // 初始化配置文件 - if err := c.Init(); err != nil { - return err - } - - // 监控配置文件变化并热加载程序 - c.WatchConfig() - - return nil -} - - diff --git a/backend/config/config_test.go b/backend/config/config_test.go deleted file mode 100644 index 0068e6ad..00000000 --- a/backend/config/config_test.go +++ /dev/null @@ -1,16 +0,0 @@ -package config - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func TestInitConfig(t *testing.T) { - Convey("Test InitConfig func", t, func() { - x := InitConfig("../conf/config.yml") - - Convey("The value should be nil", func() { - So(x, ShouldEqual, nil) - }) - }) -} diff --git a/backend/constants/action.go b/backend/constants/action.go deleted file mode 100644 index 389a11bc..00000000 --- a/backend/constants/action.go +++ /dev/null @@ -1,8 +0,0 @@ -package constants - -const ( - ActionTypeVisit = "visit" - ActionTypeInstallDep = "install_dep" - ActionTypeInstallLang = "install_lang" - ActionTypeViewDisclaimer = "view_disclaimer" -) diff --git a/backend/constants/anchor.go b/backend/constants/anchor.go deleted file mode 100644 index f462135f..00000000 --- a/backend/constants/anchor.go +++ /dev/null @@ -1,8 +0,0 @@ -package constants - -const ( - AnchorStartStage = "START_STAGE" - AnchorStartUrl = "START_URL" - AnchorItems = "ITEMS" - AnchorParsers = "PARSERS" -) diff --git a/backend/constants/auth.go b/backend/constants/auth.go deleted file mode 100644 index 136391a0..00000000 --- a/backend/constants/auth.go +++ /dev/null @@ -1,7 +0,0 @@ -package constants - -const ( - OwnerTypeAll = "all" - OwnerTypeMe = "me" - OwnerTypePublic = "public" -) diff --git a/backend/constants/challenge.go b/backend/constants/challenge.go deleted file mode 100644 index 5c056e91..00000000 --- a/backend/constants/challenge.go +++ /dev/null @@ -1,20 +0,0 @@ -package constants - -const ( - ChallengeLogin7d = "login_7d" - ChallengeLogin30d = "login_30d" - ChallengeLogin90d = "login_90d" - ChallengeLogin180d = "login_180d" - ChallengeCreateCustomizedSpider = "create_customized_spider" - ChallengeCreateConfigurableSpider = "create_configurable_spider" - ChallengeCreateSchedule = "create_schedule" - ChallengeCreateNodes = "create_nodes" - ChallengeCreateUser = "create_user" - ChallengeRunRandom = "run_random" - ChallengeScrape1k = "scrape_1k" - ChallengeScrape10k = "scrape_10k" - ChallengeScrape100k = "scrape_100k" - ChallengeInstallDep = "install_dep" - ChallengeInstallLang = "install_lang" - ChallengeViewDisclaimer = "view_disclaimer" -) diff --git a/backend/constants/channels.go b/backend/constants/channels.go deleted file mode 100644 index c38a5ac9..00000000 --- a/backend/constants/channels.go +++ /dev/null @@ -1,9 +0,0 @@ -package constants - -const ( - ChannelAllNode = "nodes:public" - - ChannelWorkerNode = "nodes:" - - ChannelMasterNode = "nodes:master" -) diff --git a/backend/constants/common.go b/backend/constants/common.go deleted file mode 100644 index 9ac6cdbc..00000000 --- a/backend/constants/common.go +++ /dev/null @@ -1,6 +0,0 @@ -package constants - -const ( - ASCENDING = "ascending" - DESCENDING = "descending" -) diff --git a/backend/constants/config_spider.go b/backend/constants/config_spider.go deleted file mode 100644 index c29624dc..00000000 --- a/backend/constants/config_spider.go +++ /dev/null @@ -1,6 +0,0 @@ -package constants - -const ( - EngineScrapy = "scrapy" - EngineColly = "colly" -) diff --git a/backend/constants/context.go b/backend/constants/context.go deleted file mode 100644 index 0759b54b..00000000 --- a/backend/constants/context.go +++ /dev/null @@ -1,5 +0,0 @@ -package constants - -const ( - ContextUser = "currentUser" -) diff --git a/backend/constants/errors.go b/backend/constants/errors.go deleted file mode 100644 index a273cb75..00000000 --- a/backend/constants/errors.go +++ /dev/null @@ -1,13 +0,0 @@ -package constants - -import ( - "crawlab/errors" - "net/http" -) - -var ( - ErrorMongoError = errors.NewSystemOPError(1001, "system error:[mongo]%s", http.StatusInternalServerError) - //users - ErrorUserNotFound = errors.NewBusinessError(10001, "user not found.", http.StatusUnauthorized) - ErrorUsernameOrPasswordInvalid = errors.NewBusinessError(11001, "username or password invalid", http.StatusUnauthorized) -) diff --git a/backend/constants/log.go b/backend/constants/log.go deleted file mode 100644 index 5f0b4a66..00000000 --- a/backend/constants/log.go +++ /dev/null @@ -1,5 +0,0 @@ -package constants - -const ( - ErrorRegexPattern = "(?:[ :,.]|^)((?:error|exception|traceback)s?)(?:[ :,.]|$)" -) diff --git a/backend/constants/message.go b/backend/constants/message.go deleted file mode 100644 index 72e5fab2..00000000 --- a/backend/constants/message.go +++ /dev/null @@ -1,9 +0,0 @@ -package constants - -const ( - MsgTypeGetLog = "get-log" - MsgTypeGetSystemInfo = "get-sys-info" - MsgTypeCancelTask = "cancel-task" - MsgTypeRemoveLog = "remove-log" - MsgTypeRemoveSpider = "remove-spider" -) diff --git a/backend/constants/model.go b/backend/constants/model.go deleted file mode 100644 index da66b15f..00000000 --- a/backend/constants/model.go +++ /dev/null @@ -1,6 +0,0 @@ -package constants - -const ( - ObjectIdNull = "000000000000000000000000" - Infinite = 999999999 -) diff --git a/backend/constants/node.go b/backend/constants/node.go deleted file mode 100644 index 29b0b7c1..00000000 --- a/backend/constants/node.go +++ /dev/null @@ -1,6 +0,0 @@ -package constants - -const ( - StatusOnline = "online" - StatusOffline = "offline" -) diff --git a/backend/constants/notification.go b/backend/constants/notification.go deleted file mode 100644 index cf3da062..00000000 --- a/backend/constants/notification.go +++ /dev/null @@ -1,13 +0,0 @@ -package constants - -const ( - NotificationTriggerOnTaskEnd = "notification_trigger_on_task_end" - NotificationTriggerOnTaskError = "notification_trigger_on_task_error" - NotificationTriggerNever = "notification_trigger_never" -) - -const ( - NotificationTypeMail = "notification_type_mail" - NotificationTypeDingTalk = "notification_type_ding_talk" - NotificationTypeWechat = "notification_type_wechat" -) diff --git a/backend/constants/register.go b/backend/constants/register.go deleted file mode 100644 index 4ed1e396..00000000 --- a/backend/constants/register.go +++ /dev/null @@ -1,8 +0,0 @@ -package constants - -const ( - RegisterTypeMac = "mac" - RegisterTypeIp = "ip" - RegisterTypeHostname = "hostname" - RegisterTypeCustomName = "customName" -) diff --git a/backend/constants/rpc.go b/backend/constants/rpc.go deleted file mode 100644 index 0fd7ad9f..00000000 --- a/backend/constants/rpc.go +++ /dev/null @@ -1,12 +0,0 @@ -package constants - -const ( - RpcInstallLang = "install_lang" - RpcInstallDep = "install_dep" - RpcUninstallDep = "uninstall_dep" - RpcGetInstalledDepList = "get_installed_dep_list" - RpcGetLang = "get_lang" - RpcCancelTask = "cancel_task" - RpcGetSystemInfoService = "get_system_info" - RpcRemoveSpider = "remove_spider" -) diff --git a/backend/constants/schedule.go b/backend/constants/schedule.go deleted file mode 100644 index 520626a9..00000000 --- a/backend/constants/schedule.go +++ /dev/null @@ -1,10 +0,0 @@ -package constants - -const ( - ScheduleStatusStop = "stopped" - ScheduleStatusRunning = "running" - ScheduleStatusError = "error" - - ScheduleStatusErrorNotFoundNode = "Not Found Node" - ScheduleStatusErrorNotFoundSpider = "Not Found Spider" -) diff --git a/backend/constants/scrapy.go b/backend/constants/scrapy.go deleted file mode 100644 index bc82508f..00000000 --- a/backend/constants/scrapy.go +++ /dev/null @@ -1,5 +0,0 @@ -package constants - -const ScrapyProtectedStageNames = "" - -const ScrapyProtectedFieldNames = "_id,task_id,ts" diff --git a/backend/constants/spider.go b/backend/constants/spider.go deleted file mode 100644 index 5119aa67..00000000 --- a/backend/constants/spider.go +++ /dev/null @@ -1,7 +0,0 @@ -package constants - -const ( - Customized = "customized" - Configurable = "configurable" - Plugin = "plugin" -) diff --git a/backend/constants/system.go b/backend/constants/system.go deleted file mode 100644 index 14c45698..00000000 --- a/backend/constants/system.go +++ /dev/null @@ -1,25 +0,0 @@ -package constants - -const ( - Windows = "windows" - Linux = "linux" - Darwin = "darwin" -) - -const ( - Python = "python" - Nodejs = "node" - Java = "java" -) - -const ( - InstallStatusNotInstalled = "not-installed" - InstallStatusInstalling = "installing" - InstallStatusInstallingOther = "installing-other" - InstallStatusInstalled = "installed" -) - -const ( - LangTypeLang = "lang" - LangTypeWebDriver = "webdriver" -) diff --git a/backend/constants/task.go b/backend/constants/task.go deleted file mode 100644 index 08539432..00000000 --- a/backend/constants/task.go +++ /dev/null @@ -1,32 +0,0 @@ -package constants - -const ( - // 调度中 - StatusPending string = "pending" - // 运行中 - StatusRunning string = "running" - // 已完成 - StatusFinished string = "finished" - // 错误 - StatusError string = "error" - // 取消 - StatusCancelled string = "cancelled" - // 节点重启导致的异常终止 - StatusAbnormal string = "abnormal" -) - -const ( - TaskFinish string = "finish" - TaskCancel string = "cancel" -) - -const ( - RunTypeAllNodes string = "all-nodes" - RunTypeRandom string = "random" - RunTypeSelectedNodes string = "selected-nodes" -) - -const ( - TaskTypeSpider string = "spider" - TaskTypeSystem string = "system" -) diff --git a/backend/constants/user.go b/backend/constants/user.go deleted file mode 100644 index bc225c94..00000000 --- a/backend/constants/user.go +++ /dev/null @@ -1,6 +0,0 @@ -package constants - -const ( - RoleAdmin = "admin" - RoleNormal = "normal" -) diff --git a/backend/constants/variable.go b/backend/constants/variable.go deleted file mode 100644 index 713fbe2d..00000000 --- a/backend/constants/variable.go +++ /dev/null @@ -1,9 +0,0 @@ -package constants - -const ( - String = "string" - Number = "number" - Boolean = "boolean" - Array = "array" - Object = "object" -) diff --git a/backend/data/challenge_data.json b/backend/data/challenge_data.json deleted file mode 100644 index 5a51dc33..00000000 --- a/backend/data/challenge_data.json +++ /dev/null @@ -1,142 +0,0 @@ -[ - { - "name": "login_7d", - "title_cn": "连续登录 7 天", - "title_en": "Logged-in for 7 days", - "description_cn": "连续 7 天登录 Crawlab,即可完成挑战!", - "description_en": "Logged-in for consecutive 7 days to complete the challenge", - "difficulty": 1 - }, - { - "name": "login_30d", - "title_cn": "连续登录 30 天", - "title_en": "Logged-in for 30 days", - "description_cn": "连续 30 天登录 Crawlab,即可完成挑战!", - "description_en": "Logged-in for consecutive 30 days to complete the challenge", - "difficulty": 2 - }, - { - "name": "login_90d", - "title_cn": "连续登录 90 天", - "title_en": "Logged-in for 90 days", - "description_cn": "连续 90 天登录 Crawlab,即可完成挑战!", - "description_en": "Logged-in for consecutive 90 days to complete the challenge", - "difficulty": 3 - }, - { - "name": "login_180d", - "title_cn": "连续登录 180 天", - "title_en": "Logged-in for 180 days", - "description_cn": "连续 180 天登录 Crawlab,即可完成挑战!", - "description_en": "Logged-in for consecutive 180 days to complete the challenge", - "difficulty": 4 - }, - { - "name": "create_customized_spider", - "title_cn": "创建 1 个自定义爬虫", - "title_en": "Create a customized spider", - "description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '自定义爬虫',输入相应的参数,点击添加,即可完成挑战!", - "description_en": "In Spider List page, click 'Add Spider', select 'Customized Spider', enter params, click 'Add' to finish the challenge.", - "difficulty": 1, - "path": "/spiders" - }, - { - "name": "create_configurable_spider", - "title_cn": "创建 1 个可配置爬虫", - "title_en": "Create a configurable spider", - "description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '可配置爬虫',输入相应的参数,点击添加,即可完成挑战!", - "description_en": "In Spider List page, click 'Add Spider', select 'Configurable Spider', enter params, click 'Add' to finish the challenge.", - "difficulty": 1, - "path": "/spiders" - }, - { - "name": "run_random", - "title_cn": "用随机模式成功运行爬虫", - "title_en": "Run a spider in random mode successfully", - "description_cn": "在您创建好的爬虫中,导航到其对应的详情页(爬虫列表中点击爬虫),选择随机模式运行一个爬虫,并能运行成功。", - "description_en": "In your created spiders, navigate to corresponding detail page (click spider in Spider List page), run a spider in random mode successfully.", - "difficulty": 1, - "path": "/spiders" - }, - { - "name": "scrape_1k", - "title_cn": "抓取 1 千条数据", - "title_en": "Scrape 1k records", - "description_cn": "运行您创建好的爬虫,抓取 1 千条及以上的结果数据,即可完成挑战!", - "description_en": "Run your created spiders, scrape 1k and more results to finish the challenge.", - "difficulty": 2, - "path": "/spiders" - }, - { - "name": "scrape_10k", - "title_cn": "抓取 1 万条数据", - "title_en": "Scrape 10k records", - "description_cn": "运行您创建好的爬虫,抓取 1 万条及以上的结果数据,即可完成挑战!", - "description_en": "Run your created spiders, scrape 10k and more results to finish the challenge.", - "difficulty": 3, - "path": "/spiders" - }, - { - "name": "scrape_100k", - "title_cn": "抓取 10 万条数据", - "title_en": "Scrape 100k records", - "description_cn": "运行您创建好的爬虫,抓取 10 万条及以上的结果数据,即可完成挑战!", - "description_en": "Run your created spiders, scrape 100k and more results to finish the challenge.", - "difficulty": 4, - "path": "/spiders" - }, - { - "name": "create_schedule", - "title_cn": "创建 1 个定时任务", - "title_en": "Create a schedule", - "description_cn": "在定时任务列表中,创建一个定时任务,正确设置好 Cron 表达式,即可完成挑战!", - "description_en": "In Schedule List page, create a schedule and configure cron expression to finish the task.", - "difficulty": 1, - "path": "/schedules" - }, - { - "name": "create_nodes", - "title_cn": "创建 1 个节点集群", - "title_en": "Create a node cluster", - "description_cn": "按照文档的部署指南,部署含有 3 个节点的集群,即可完成挑战!", - "description_en": "Deploy a 3-node cluster according to the deployment guidance in documentation to finish the task.", - "difficulty": 3, - "path": "/nodes" - }, - { - "name": "install_dep", - "title_cn": "安装 1 个依赖", - "title_en": "Install a dependency successfully", - "description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,搜索并安装所需的 1 个依赖,即可完成挑战!", - "description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', search and install a dependency.", - "difficulty": 3, - "path": "/nodes" - }, - { - "name": "install_lang", - "title_cn": "安装 1 个语言环境", - "title_en": "Install a language successfully", - "description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,点击安装所需的 1 个语言环境,即可完成挑战!", - "description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', install a language.", - "difficulty": 3, - "path": "/nodes" - }, - { - "name": "view_disclaimer", - "title_cn": "阅读免责声明", - "title_en": "View disclaimer", - "description_cn": "在左侧菜单栏,点击 '免责声明' 查看其内容,即可完成挑战!", - "description_en": "In the left side menu, click 'Disclaimer' and view its content to finish the challenge.", - "difficulty": 1, - "path": "/disclaimer" - }, - { - "name": "create_user", - "title_cn": "创建 1 个用户", - "title_en": "Create a user", - "description_cn": "在用户管理页面中创建一个新用户,即可完成挑战!", - "description_en": "In User Admin page, create a new user to finish the challenge.", - "difficulty": 1, - "path": "/users" - } -] \ No newline at end of file diff --git a/backend/database/es_base.go b/backend/database/es_base.go deleted file mode 100644 index b255958a..00000000 --- a/backend/database/es_base.go +++ /dev/null @@ -1,44 +0,0 @@ -package database - -import ( - "context" - "github.com/apex/log" - "github.com/olivere/elastic/v7" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "sync" - "time" -) - -var doOnce sync.Once -var ctx context.Context -var ESClient *elastic.Client - -func InitEsClient() { - esClientStr := viper.GetString("setting.esClient") - ctx = context.Background() - ESClient, _ = elastic.NewClient(elastic.SetURL(esClientStr), elastic.SetSniff(false)) -} - -// WriteMsg will write the msg and level into es -func WriteMsgToES(when time.Time, msg chan string, index string) { - doOnce.Do(InitEsClient) - vals := make(map[string]interface{}) - vals["@timestamp"] = when.Format(time.RFC3339) - for { - select { - case vals["@msg"] = <-msg: - uid := uuid.NewV4().String() - _, err := ESClient.Index().Index(index).Id(uid).BodyJson(vals).Refresh("wait_for").Do(ctx) - if err != nil { - log.Error(err.Error()) - log.Error("send msg log to es error") - return - } - case <-time.After(6 * time.Second): - return - } - } - - return -} diff --git a/backend/database/mongo.go b/backend/database/mongo.go deleted file mode 100644 index 01204a70..00000000 --- a/backend/database/mongo.go +++ /dev/null @@ -1,112 +0,0 @@ -package database - -import ( - "crawlab/constants" - "github.com/apex/log" - "github.com/cenkalti/backoff/v4" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "net" - "reflect" - "time" -) - -var Session *mgo.Session - -func GetSession() *mgo.Session { - return Session.Copy() -} - -func GetDb() (*mgo.Session, *mgo.Database) { - s := GetSession() - return s, s.DB(viper.GetString("mongo.db")) -} - -func GetCol(collectionName string) (*mgo.Session, *mgo.Collection) { - s := GetSession() - db := s.DB(viper.GetString("mongo.db")) - col := db.C(collectionName) - return s, col -} - -func GetGridFs(prefix string) (*mgo.Session, *mgo.GridFS) { - s, db := GetDb() - gf := db.GridFS(prefix) - return s, gf -} - -func FillNullObjectId(doc interface{}) { - t := reflect.TypeOf(doc) - if t.Kind() == reflect.Ptr { - t = t.Elem() - } - if t.Kind() != reflect.Struct { - return - } - v := reflect.ValueOf(doc) - for i := 0; i < t.NumField(); i++ { - ft := t.Field(i) - fv := v.Elem().Field(i) - val := fv.Interface() - switch val.(type) { - case bson.ObjectId: - if !val.(bson.ObjectId).Valid() { - v.FieldByName(ft.Name).Set(reflect.ValueOf(bson.ObjectIdHex(constants.ObjectIdNull))) - } - } - } -} - -func InitMongo() error { - var mongoHost = viper.GetString("mongo.host") - var mongoPort = viper.GetString("mongo.port") - var mongoDb = viper.GetString("mongo.db") - var mongoUsername = viper.GetString("mongo.username") - var mongoPassword = viper.GetString("mongo.password") - var mongoAuth = viper.GetString("mongo.authSource") - - if Session == nil { - var dialInfo mgo.DialInfo - addr := net.JoinHostPort(mongoHost, mongoPort) - timeout := time.Second * 10 - dialInfo = mgo.DialInfo{ - Addrs: []string{addr}, - Timeout: timeout, - Database: mongoDb, - PoolLimit: 100, - PoolTimeout: timeout, - ReadTimeout: timeout, - WriteTimeout: timeout, - AppName: "crawlab", - FailFast: true, - MinPoolSize: 10, - MaxIdleTimeMS: 1000 * 30, - } - if mongoUsername != "" { - dialInfo.Username = mongoUsername - dialInfo.Password = mongoPassword - dialInfo.Source = mongoAuth - } - bp := backoff.NewExponentialBackOff() - var err error - - err = backoff.Retry(func() error { - Session, err = mgo.DialWithInfo(&dialInfo) - if err != nil { - log.WithError(err).Warnf("waiting for connect mongo database, after %f seconds try again.", bp.NextBackOff().Seconds()) - } - return err - }, bp) - } - //Add Unique index for 'key' - keyIndex := mgo.Index{ - Key: []string{"key"}, - Unique: true, - } - s, c := GetCol("nodes") - defer s.Close() - c.EnsureIndex(keyIndex) - - return nil -} diff --git a/backend/database/mongo_test.go b/backend/database/mongo_test.go deleted file mode 100644 index ed6044ee..00000000 --- a/backend/database/mongo_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package database - -import ( - "crawlab/config" - "github.com/apex/log" - "github.com/globalsign/mgo" - . "github.com/smartystreets/goconvey/convey" - "github.com/spf13/viper" - "reflect" - "testing" -) - -func init() { - if err := config.InitConfig("../conf/config.yml"); err != nil { - log.Fatal("Init config failed") - } - log.Infof("初始化配置成功") - err := InitMongo() - if err != nil { - log.Fatal("Init mongodb failed") - } - -} - -func TestGetDb(t *testing.T) { - Convey("Test GetDb", t, func() { - if err := config.InitConfig("../conf/config.yml"); err != nil { - t.Fatal("Init config failed") - } - t.Log("初始化配置成功") - err := InitMongo() - if err != nil { - t.Fatal("Init mongodb failed") - } - s, db := GetDb() - Convey("The value should be Session.Copy", func() { - So(s, ShouldResemble, Session.Copy()) - }) - Convey("The value should be reference of database", func() { - So(db, ShouldResemble, s.DB(viper.GetString("mongo.db"))) - }) - }) -} - -func TestGetCol(t *testing.T) { - var c = "nodes" - var colActual *mgo.Collection - Convey("Test GetCol", t, func() { - s, col := GetCol(c) - Convey("s should resemble Session.Copy", func() { - So(s, ShouldResemble, Session.Copy()) - So(reflect.TypeOf(col), ShouldResemble, reflect.TypeOf(colActual)) - }) - }) -} - -func TestGetGridFs(t *testing.T) { - var prefix = "files" - var gfActual *mgo.GridFS - - Convey("Test GetGridFs", t, func() { - s, gf := GetGridFs(prefix) - Convey("s should be session.copy", func() { - So(s, ShouldResemble, Session.Copy()) - }) - Convey("gf should be *mgo.GridFS", func() { - So(reflect.TypeOf(gf), ShouldResemble, reflect.TypeOf(gfActual)) - }) - }) -} diff --git a/backend/database/pubsub.go b/backend/database/pubsub.go deleted file mode 100644 index f9eae535..00000000 --- a/backend/database/pubsub.go +++ /dev/null @@ -1,96 +0,0 @@ -package database - -import ( - "context" - "crawlab/utils" - "fmt" - "github.com/apex/log" - "github.com/gomodule/redigo/redis" - errors2 "github.com/pkg/errors" - "time" -) - -type ConsumeFunc func(message redis.Message) error - -func (r *Redis) Close() { - err := r.pool.Close() - if err != nil { - log.Errorf("redis close error.") - } -} -func (r *Redis) subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error { - psc := redis.PubSubConn{Conn: r.pool.Get()} - if err := psc.Subscribe(redis.Args{}.AddFlat(channel)...); err != nil { - return err - } - done := make(chan error, 1) - tick := time.NewTicker(time.Second * 3) - defer tick.Stop() - go func() { - defer utils.Close(psc) - for { - switch msg := psc.Receive().(type) { - case error: - done <- fmt.Errorf("redis pubsub receive err: %v", msg) - return - case redis.Message: - if err := consume(msg); err != nil { - fmt.Printf("redis pubsub consume message err: %v", err) - continue - } - case redis.Subscription: - if msg.Count == 0 { - // all channels are unsubscribed - return - } - } - - } - }() - // start a new goroutine to receive message - for { - select { - case <-ctx.Done(): - if err := psc.Unsubscribe(); err != nil { - fmt.Printf("redis pubsub unsubscribe err: %v \n", err) - } - done <- nil - case <-tick.C: - if err := psc.Ping(""); err != nil { - fmt.Printf("ping message error: %s \n", err) - //done <- err - } - case err := <-done: - close(done) - return err - } - } - -} -func (r *Redis) Subscribe(ctx context.Context, consume ConsumeFunc, channel ...string) error { - index := 0 - go func() { - for { - err := r.subscribe(ctx, consume, channel...) - fmt.Println(err) - - if err == nil { - index = 0 - break - } - time.Sleep(5 * time.Second) - index += 1 - fmt.Printf("try reconnect %d times \n", index) - } - }() - return nil -} -func (r *Redis) Publish(channel, message string) (n int, err error) { - conn := r.pool.Get() - defer utils.Close(conn) - n, err = redis.Int(conn.Do("PUBLISH", channel, message)) - if err != nil { - return 0, errors2.Wrapf(err, "redis publish %s %s", channel, message) - } - return -} diff --git a/backend/database/redis.go b/backend/database/redis.go deleted file mode 100644 index e87b688f..00000000 --- a/backend/database/redis.go +++ /dev/null @@ -1,289 +0,0 @@ -package database - -import ( - "context" - "crawlab/entity" - "crawlab/utils" - "errors" - "github.com/apex/log" - "github.com/cenkalti/backoff/v4" - "github.com/gomodule/redigo/redis" - "github.com/spf13/viper" - "runtime/debug" - "strings" - "time" -) - -var RedisClient *Redis - -type Redis struct { - pool *redis.Pool -} - -type Mutex struct { - Name string - expiry time.Duration - tries int - delay time.Duration - value string -} - -func NewRedisClient() *Redis { - return &Redis{pool: NewRedisPool()} -} - -func (r *Redis) RPush(collection string, value interface{}) error { - c := r.pool.Get() - defer utils.Close(c) - - if _, err := c.Do("RPUSH", collection, value); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func (r *Redis) LPush(collection string, value interface{}) error { - c := r.pool.Get() - defer utils.Close(c) - - if _, err := c.Do("RPUSH", collection, value); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func (r *Redis) LPop(collection string) (string, error) { - c := r.pool.Get() - defer utils.Close(c) - - value, err2 := redis.String(c.Do("LPOP", collection)) - if err2 != nil { - return value, err2 - } - return value, nil -} - -func (r *Redis) HSet(collection string, key string, value string) error { - c := r.pool.Get() - defer utils.Close(c) - - if _, err := c.Do("HSET", collection, key, value); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - return nil -} -func (r *Redis) Ping() error { - c := r.pool.Get() - defer utils.Close(c) - _, err2 := redis.String(c.Do("PING")) - return err2 -} -func (r *Redis) HGet(collection string, key string) (string, error) { - c := r.pool.Get() - defer utils.Close(c) - value, err2 := redis.String(c.Do("HGET", collection, key)) - if err2 != nil && err2 != redis.ErrNil { - log.Error(err2.Error()) - debug.PrintStack() - return value, err2 - } - return value, nil -} - -func (r *Redis) HDel(collection string, key string) error { - c := r.pool.Get() - defer utils.Close(c) - - if _, err := c.Do("HDEL", collection, key); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - return nil -} -func (r *Redis) HScan(collection string) (results []string, err error) { - c := r.pool.Get() - defer utils.Close(c) - var ( - cursor int64 - items []string - ) - - for { - values, err := redis.Values(c.Do("HSCAN", collection, cursor)) - if err != nil { - return results, err - } - - values, err = redis.Scan(values, &cursor, &items) - if err != nil { - return results, err - } - for i := 0; i < len(items); i += 2 { - cur := items[i+1] - results = append(results, cur) - } - if cursor == 0 { - break - } - } - return results, err - -} -func (r *Redis) HKeys(collection string) ([]string, error) { - c := r.pool.Get() - defer utils.Close(c) - - value, err2 := redis.Strings(c.Do("HKEYS", collection)) - if err2 != nil { - log.Error(err2.Error()) - debug.PrintStack() - return []string{}, err2 - } - return value, nil -} - -func (r *Redis) BRPop(collection string, timeout int) (string, error) { - if timeout <= 0 { - timeout = 60 - } - c := r.pool.Get() - defer utils.Close(c) - - values, err := redis.Strings(c.Do("BRPOP", collection, timeout)) - if err != nil { - return "", err - } - return values[1], nil -} - -func NewRedisPool() *redis.Pool { - var address = viper.GetString("redis.address") - var port = viper.GetString("redis.port") - var database = viper.GetString("redis.database") - var password = viper.GetString("redis.password") - - var url string - if password == "" { - url = "redis://" + address + ":" + port + "/" + database - } else { - url = "redis://x:" + password + "@" + address + ":" + port + "/" + database - } - return &redis.Pool{ - Dial: func() (conn redis.Conn, e error) { - return redis.DialURL(url, - redis.DialConnectTimeout(time.Second*10), - redis.DialReadTimeout(time.Second*600), - redis.DialWriteTimeout(time.Second*10), - ) - }, - TestOnBorrow: func(c redis.Conn, t time.Time) error { - if time.Since(t) < time.Minute { - return nil - } - _, err := c.Do("PING") - return err - }, - MaxIdle: 10, - MaxActive: 0, - IdleTimeout: 300 * time.Second, - Wait: false, - MaxConnLifetime: 0, - } -} - -func InitRedis() error { - RedisClient = NewRedisClient() - b := backoff.NewExponentialBackOff() - b.MaxInterval = 20 * time.Second - err := backoff.Retry(func() error { - err := RedisClient.Ping() - - if err != nil { - log.WithError(err).Warnf("waiting for redis pool active connection. will after %f seconds try again.", b.NextBackOff().Seconds()) - } - return err - }, b) - return err -} - -func Pub(channel string, msg entity.NodeMessage) error { - if _, err := RedisClient.Publish(channel, utils.GetJson(msg)); err != nil { - log.Errorf("publish redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func Sub(channel string, consume ConsumeFunc) error { - ctx := context.Background() - if err := RedisClient.Subscribe(ctx, consume, channel); err != nil { - log.Errorf("subscribe redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -// 构建同步锁key -func (r *Redis) getLockKey(lockKey string) string { - lockKey = strings.ReplaceAll(lockKey, ":", "-") - return "nodes:lock:" + lockKey -} - -// 获得锁 -func (r *Redis) Lock(lockKey string) (int64, error) { - c := r.pool.Get() - defer utils.Close(c) - lockKey = r.getLockKey(lockKey) - - ts := time.Now().Unix() - ok, err := c.Do("SET", lockKey, ts, "NX", "PX", 30000) - if err != nil { - log.Errorf("get lock fail with error: %s", err.Error()) - debug.PrintStack() - return 0, err - } - if ok == nil { - log.Errorf("the lockKey is locked: key=%s", lockKey) - return 0, errors.New("the lockKey is locked") - } - return ts, nil -} - -func (r *Redis) UnLock(lockKey string, value int64) { - c := r.pool.Get() - defer utils.Close(c) - lockKey = r.getLockKey(lockKey) - - getValue, err := redis.Int64(c.Do("GET", lockKey)) - if err != nil { - log.Errorf("get lockKey error: %s", err.Error()) - debug.PrintStack() - return - } - - if getValue != value { - log.Errorf("the lockKey value diff: %d, %d", value, getValue) - return - } - - v, err := redis.Int64(c.Do("DEL", lockKey)) - if err != nil { - log.Errorf("unlock failed, error: %s", err.Error()) - debug.PrintStack() - return - } - - if v == 0 { - log.Errorf("unlock failed: key=%s", lockKey) - return - } -} diff --git a/backend/docs/docs.go b/backend/docs/docs.go deleted file mode 100644 index feaa3214..00000000 --- a/backend/docs/docs.go +++ /dev/null @@ -1,4808 +0,0 @@ -// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT -// This file was generated by swaggo/swag at -// 2020-05-05 11:09:10.499886 +0800 CST m=+0.084916029 - -package docs - -import ( - "bytes" - "encoding/json" - "strings" - - "github.com/alecthomas/template" - "github.com/swaggo/swag" -) - -var doc = `{ - "schemes": {{ marshal .Schemes }}, - "swagger": "2.0", - "info": { - "description": "{{.Description}}", - "title": "{{.Title}}", - "contact": {}, - "license": {}, - "version": "{{.Version}}" - }, - "host": "{{.Host}}", - "basePath": "{{.BasePath}}", - "paths": { - "/config_spiders": { - "put": { - "description": "Put config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Put config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/config": { - "get": { - "description": "Get config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Get config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post config spider config", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Post config spider config", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/spiderfile": { - "post": { - "description": "Post config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Post config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/upload": { - "post": { - "description": "Upload config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Upload config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders_templates": { - "get": { - "description": "Get config spider template list", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Get config spider template list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/docs": { - "get": { - "description": "Get docs", - "produces": [ - "application/json" - ], - "tags": [ - "docs" - ], - "summary": "Get docs", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/file": { - "get": { - "description": "Get file", - "produces": [ - "application/json" - ], - "tags": [ - "file" - ], - "summary": "Get file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes": { - "get": { - "description": "Get nodes", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get nodes", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}": { - "get": { - "description": "Get node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post node", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Post node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "post node", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Delete node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps": { - "get": { - "description": "Get dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "query", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_name", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/install": { - "post": { - "description": "Install dep", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Install dep", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/installed": { - "get": { - "description": "Get installed dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get installed dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/uninstall": { - "post": { - "description": "Uninstall dep", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Uninstall dep", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/langs": { - "get": { - "description": "Get language list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get language list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/langs/install": { - "post": { - "description": "Install language", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Install language", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/system": { - "get": { - "description": "Get system info", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get system info", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/tasks": { - "get": { - "description": "Get tasks on node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get tasks on node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects": { - "get": { - "description": "Get projects", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Get projects", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "projects", - "name": "tag", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put project", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Put project", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "post project", - "name": "p", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Project" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects/tags": { - "get": { - "description": "Get projects tags", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Get project tags", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects/{id}": { - "post": { - "description": "Post project", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Post project", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "project id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "project item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Project" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete project", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Delete project", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "project id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/releases/latest": { - "get": { - "description": "Get latest release", - "produces": [ - "application/json" - ], - "tags": [ - "version" - ], - "summary": "Get latest release", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules": { - "get": { - "description": "Get spider list", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "page num", - "name": "page_num", - "in": "query" - }, - { - "type": "string", - "description": "page size", - "name": "page_size", - "in": "query" - }, - { - "type": "string", - "description": "keyword", - "name": "keyword", - "in": "query" - }, - { - "type": "string", - "description": "project_id", - "name": "project_id", - "in": "query" - }, - { - "type": "string", - "description": "type", - "name": "type", - "in": "query" - }, - { - "type": "string", - "description": "sort_key", - "name": "sort_key", - "in": "query" - }, - { - "type": "string", - "description": "sort_direction", - "name": "sort_direction", - "in": "query" - }, - { - "type": "string", - "description": "owner_type", - "name": "owner_type", - "in": "query" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Put schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "schedule item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Schedule" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}": { - "get": { - "description": "Get schedule by id", - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Get schedule by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Post schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "schedule item", - "name": "newItem", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Schedule" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete schedule", - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Delete schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}/disable": { - "post": { - "description": "disable schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "disable schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}/enable": { - "post": { - "description": "enable schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "enable schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/setting": { - "get": { - "description": "Get setting", - "produces": [ - "application/json" - ], - "tags": [ - "setting" - ], - "summary": "Get setting", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders": { - "put": { - "description": "Put spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "delete spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "delete spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders-cancel": { - "post": { - "description": "cancel spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "cancel spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders-run": { - "post": { - "description": "run spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "run spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}": { - "get": { - "description": "Get spider by id", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "spider item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete spider by id", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Delete spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/copy": { - "post": { - "description": "Copy spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Copy spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/dir": { - "get": { - "description": "Get spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "path", - "name": "path", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file": { - "get": { - "description": "Get spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "path", - "name": "path", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Post spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Put spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Delete spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file/rename": { - "post": { - "description": "Rename spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Rename spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file/tree": { - "get": { - "description": "Get spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/git/reset": { - "post": { - "description": "Post spider reset git", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider reset git", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/git/sync": { - "post": { - "description": "Post spider sync git", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider sync git", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/publish": { - "post": { - "description": "Publish spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Publish spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/schedules": { - "get": { - "description": "Get schedules", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get schedules", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/items": { - "get": { - "description": "Get scrapy spider items", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider items", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post scrapy spider items", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post scrapy spider items", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "req data", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "entity.ScrapyItem", - "items": { - "$ref": "#/definitions/entity.ScrapyItem" - } - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/pipelines": { - "get": { - "description": "Get scrapy spider pipelines", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider pipelines", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/settings": { - "get": { - "description": "Get scrapy spider settings", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider settings", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Get scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "req data", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "entity.ScrapySettingParam", - "items": { - "$ref": "#/definitions/entity.ScrapySettingParam" - } - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/spider/filepath": { - "get": { - "description": "Get scrapy spider file path", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file path", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/spiders": { - "get": { - "description": "Get scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/stats": { - "get": { - "description": "Get spider stats", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider stats", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/tasks": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/upload": { - "post": { - "description": "Upload spider by id", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Upload spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "file", - "description": "spider file to upload", - "name": "file", - "in": "formData", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/stats/home": { - "get": { - "description": "Get home stats", - "produces": [ - "application/json" - ], - "tags": [ - "version" - ], - "summary": "Get home stats", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/system/deps/": { - "get": { - "description": "Get all dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get all dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_nane", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/system/deps/{lang}/{dep_name}/json": { - "get": { - "description": "Get dep json", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get dep json", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_name", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/task/{id}": { - "delete": { - "description": "Delete task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "req data", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.TaskListRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Put task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete tasks", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete tasks", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}": { - "get": { - "description": "Get task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/cancel": { - "post": { - "description": "Cancel task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Cancel task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/error-log": { - "delete": { - "description": "Get task error log", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task error log", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/log": { - "delete": { - "description": "Get task log", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task log", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/restart": { - "post": { - "description": "Restart task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Restart task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/results": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "req data", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.TaskResultsRequestData" - } - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/results/download": { - "get": { - "description": "Get task results", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task results", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks_by_status": { - "delete": { - "description": "Delete task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task status", - "name": "status", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tokens": { - "get": { - "description": "token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Get token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Put token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tokens/{id}": { - "delete": { - "description": "Delete token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Delete token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "token id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/users": { - "get": { - "description": "Get user list", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Get user list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "data body", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.UserListRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Put user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.UserRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/users/{id}": { - "get": { - "description": "user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Get user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Post user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "user body", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.User" - } - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Delete user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variable": { - "put": { - "description": "Put variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Put variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "variable", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Variable" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variable/{id}": { - "post": { - "description": "Post variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Post variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "variable", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Variable" - } - }, - { - "type": "string", - "description": "variable id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Delete variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "variable id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variables": { - "get": { - "description": "Get variable list", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Get variable list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/version": { - "get": { - "description": "Get version", - "produces": [ - "application/json" - ], - "tags": [ - "setting" - ], - "summary": "Get version", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - } - }, - "definitions": { - "entity.ConfigSpiderData": { - "type": "object", - "properties": { - "cmd": { - "description": "自定义爬虫", - "type": "string" - }, - "col": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "engine": { - "description": "可配置爬虫", - "type": "string" - }, - "name": { - "description": "通用", - "type": "string" - }, - "remark": { - "type": "string" - }, - "settings": { - "type": "object" - }, - "stages": { - "type": "array", - "items": { - "$ref": "#/definitions/entity.Stage" - } - }, - "start_stage": { - "type": "string" - }, - "start_url": { - "type": "string" - }, - "type": { - "type": "string" - } - } - }, - "entity.Field": { - "type": "object", - "properties": { - "attr": { - "type": "string" - }, - "css": { - "type": "string" - }, - "name": { - "type": "string" - }, - "next_stage": { - "type": "string" - }, - "remark": { - "type": "string" - }, - "xpath": { - "type": "string" - } - } - }, - "entity.ScrapyItem": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "type": "string" - } - }, - "name": { - "type": "string" - } - } - }, - "entity.ScrapySettingParam": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "type": { - "type": "string" - }, - "value": { - "type": "object" - } - } - }, - "entity.Stage": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "$ref": "#/definitions/entity.Field" - } - }, - "is_list": { - "type": "boolean" - }, - "list_css": { - "type": "string" - }, - "list_xpath": { - "type": "string" - }, - "name": { - "type": "string" - }, - "page_attr": { - "type": "string" - }, - "page_css": { - "type": "string" - }, - "page_xpath": { - "type": "string" - } - } - }, - "model.Env": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "model.Node": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "description": { - "type": "string" - }, - "hostname": { - "type": "string" - }, - "ip": { - "type": "string" - }, - "is_master": { - "description": "前端展示", - "type": "boolean" - }, - "key": { - "description": "用于唯一标识节点,可能是mac地址,可能是ip地址", - "type": "string" - }, - "mac": { - "type": "string" - }, - "name": { - "type": "string" - }, - "port": { - "type": "string" - }, - "status": { - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "update_ts_unix": { - "type": "integer" - } - } - }, - "model.Project": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "description": { - "type": "string" - }, - "name": { - "type": "string" - }, - "spiders": { - "description": "前端展示", - "type": "array", - "items": { - "$ref": "#/definitions/model.Spider" - } - }, - "tags": { - "type": "array", - "items": { - "type": "string" - } - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - } - } - }, - "model.Schedule": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "cron": { - "type": "string" - }, - "description": { - "type": "string" - }, - "enabled": { - "type": "boolean" - }, - "entry_id": { - "type": "integer" - }, - "message": { - "type": "string" - }, - "name": { - "type": "string" - }, - "node_ids": { - "type": "array", - "items": { - "type": "string" - } - }, - "nodes": { - "type": "array", - "items": { - "$ref": "#/definitions/model.Node" - } - }, - "param": { - "type": "string" - }, - "run_type": { - "type": "string" - }, - "scrapy_log_level": { - "type": "string" - }, - "scrapy_spider": { - "type": "string" - }, - "spider_id": { - "type": "string" - }, - "spider_name": { - "description": "前端展示", - "type": "string" - }, - "status": { - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "user_name": { - "type": "string" - } - } - }, - "model.Spider": { - "type": "object", - "properties": { - "_id": { - "description": "爬虫ID", - "type": "string" - }, - "cmd": { - "description": "自定义爬虫", - "type": "string" - }, - "col": { - "description": "结果储存位置", - "type": "string" - }, - "config": { - "description": "可配置爬虫配置", - "type": "object", - "$ref": "#/definitions/entity.ConfigSpiderData" - }, - "create_ts": { - "type": "string" - }, - "dedup_field": { - "description": "去重字段", - "type": "string" - }, - "dedup_method": { - "description": "去重方式", - "type": "string" - }, - "display_name": { - "description": "爬虫显示名称", - "type": "string" - }, - "envs": { - "description": "环境变量", - "type": "array", - "items": { - "$ref": "#/definitions/model.Env" - } - }, - "file_id": { - "description": "GridFS文件ID", - "type": "string" - }, - "git_auto_sync": { - "description": "Git 是否自动同步", - "type": "boolean" - }, - "git_branch": { - "description": "Git 分支", - "type": "string" - }, - "git_has_credential": { - "description": "Git 是否加密", - "type": "boolean" - }, - "git_password": { - "description": "Git 密码", - "type": "string" - }, - "git_sync_error": { - "description": "Git 同步错误", - "type": "string" - }, - "git_sync_frequency": { - "description": "Git 同步频率", - "type": "string" - }, - "git_url": { - "description": "Git URL", - "type": "string" - }, - "git_username": { - "description": "Git 用户名", - "type": "string" - }, - "is_dedup": { - "description": "去重", - "type": "boolean" - }, - "is_git": { - "description": "Git 设置", - "type": "boolean" - }, - "is_long_task": { - "description": "长任务", - "type": "boolean" - }, - "is_public": { - "description": "是否公开", - "type": "boolean" - }, - "is_scrapy": { - "description": "Scrapy 爬虫(属于自定义爬虫)", - "type": "boolean" - }, - "is_web_hook": { - "description": "Web Hook", - "type": "boolean" - }, - "last_run_ts": { - "description": "前端展示", - "type": "string" - }, - "last_status": { - "description": "最后执行状态", - "type": "string" - }, - "latest_tasks": { - "description": "最近任务列表", - "type": "array", - "items": { - "$ref": "#/definitions/model.Task" - } - }, - "name": { - "description": "爬虫名称(唯一)", - "type": "string" - }, - "project_id": { - "description": "项目ID", - "type": "string" - }, - "remark": { - "description": "备注", - "type": "string" - }, - "site": { - "description": "爬虫网站", - "type": "string" - }, - "spider_names": { - "description": "爬虫名称列表", - "type": "array", - "items": { - "type": "string" - } - }, - "src": { - "description": "源码位置", - "type": "string" - }, - "template": { - "description": "可配置爬虫", - "type": "string" - }, - "type": { - "description": "爬虫类别", - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "description": "时间", - "type": "string" - }, - "username": { - "description": "用户名称", - "type": "string" - }, - "web_hook_url": { - "description": "Web Hook URL", - "type": "string" - } - } - }, - "model.Task": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "cmd": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "error": { - "type": "string" - }, - "error_log_count": { - "type": "integer" - }, - "finish_ts": { - "type": "string" - }, - "log_path": { - "type": "string" - }, - "node_id": { - "type": "string" - }, - "node_name": { - "type": "string" - }, - "param": { - "type": "string" - }, - "pid": { - "type": "integer" - }, - "result_count": { - "type": "integer" - }, - "run_type": { - "type": "string" - }, - "runtime_duration": { - "type": "number" - }, - "schedule_id": { - "type": "string" - }, - "spider_id": { - "type": "string" - }, - "spider_name": { - "description": "前端数据", - "type": "string" - }, - "start_ts": { - "type": "string" - }, - "status": { - "type": "string" - }, - "total_duration": { - "type": "number" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - }, - "wait_duration": { - "type": "number" - } - } - }, - "model.User": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "email": { - "type": "string" - }, - "password": { - "type": "string" - }, - "role": { - "type": "string" - }, - "setting": { - "type": "object", - "$ref": "#/definitions/model.UserSetting" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - } - } - }, - "model.UserSetting": { - "type": "object", - "properties": { - "ding_talk_robot_webhook": { - "type": "string" - }, - "enabled_notifications": { - "type": "array", - "items": { - "type": "string" - } - }, - "error_regex_pattern": { - "type": "string" - }, - "log_expire_duration": { - "type": "integer" - }, - "max_error_log": { - "type": "integer" - }, - "notification_trigger": { - "type": "string" - }, - "wechat_robot_webhook": { - "type": "string" - } - } - }, - "model.Variable": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "key": { - "type": "string" - }, - "remark": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "routes.SpiderFileReqBody": { - "type": "object", - "properties": { - "content": { - "type": "string" - }, - "new_path": { - "type": "string" - }, - "path": { - "type": "string" - } - } - }, - "routes.TaskListRequestData": { - "type": "object", - "properties": { - "nodeId": { - "type": "string" - }, - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - }, - "scheduleId": { - "type": "string" - }, - "spiderId": { - "type": "string" - }, - "status": { - "type": "string" - } - } - }, - "routes.TaskResultsRequestData": { - "type": "object", - "properties": { - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - } - } - }, - "routes.UserListRequestData": { - "type": "object", - "properties": { - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - } - } - }, - "routes.UserRequestData": { - "type": "object", - "properties": { - "email": { - "type": "string" - }, - "password": { - "type": "string" - }, - "role": { - "type": "string" - }, - "username": { - "type": "string" - } - } - } - } -}` - -type swaggerInfo struct { - Version string - Host string - BasePath string - Schemes []string - Title string - Description string -} - -// SwaggerInfo holds exported Swagger Info so clients can modify it -var SwaggerInfo = swaggerInfo{ - Version: "", - Host: "", - BasePath: "", - Schemes: []string{}, - Title: "", - Description: "", -} - -type s struct{} - -func (s *s) ReadDoc() string { - sInfo := SwaggerInfo - sInfo.Description = strings.Replace(sInfo.Description, "\n", "\\n", -1) - - t, err := template.New("swagger_info").Funcs(template.FuncMap{ - "marshal": func(v interface{}) string { - a, _ := json.Marshal(v) - return string(a) - }, - }).Parse(doc) - if err != nil { - return doc - } - - var tpl bytes.Buffer - if err := t.Execute(&tpl, sInfo); err != nil { - return doc - } - - return tpl.String() -} - -func init() { - swag.Register(swag.Name, &s{}) -} diff --git a/backend/docs/swagger.json b/backend/docs/swagger.json deleted file mode 100644 index 47986662..00000000 --- a/backend/docs/swagger.json +++ /dev/null @@ -1,4740 +0,0 @@ -{ - "swagger": "2.0", - "info": { - "contact": {}, - "license": {} - }, - "paths": { - "/config_spiders": { - "put": { - "description": "Put config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Put config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/config": { - "get": { - "description": "Get config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Get config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post config spider config", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Post config spider config", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/spiderfile": { - "post": { - "description": "Post config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Post config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders/{id}/upload": { - "post": { - "description": "Upload config spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Upload config spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/config_spiders_templates": { - "get": { - "description": "Get config spider template list", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "config spider" - ], - "summary": "Get config spider template list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/docs": { - "get": { - "description": "Get docs", - "produces": [ - "application/json" - ], - "tags": [ - "docs" - ], - "summary": "Get docs", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/file": { - "get": { - "description": "Get file", - "produces": [ - "application/json" - ], - "tags": [ - "file" - ], - "summary": "Get file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes": { - "get": { - "description": "Get nodes", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get nodes", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}": { - "get": { - "description": "Get node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post node", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Post node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "post node", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Delete node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps": { - "get": { - "description": "Get dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "query", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_name", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/install": { - "post": { - "description": "Install dep", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Install dep", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/installed": { - "get": { - "description": "Get installed dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get installed dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/deps/uninstall": { - "post": { - "description": "Uninstall dep", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Uninstall dep", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/langs": { - "get": { - "description": "Get language list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get language list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/langs/install": { - "post": { - "description": "Install language", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Install language", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/system": { - "get": { - "description": "Get system info", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get system info", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/nodes/{id}/tasks": { - "get": { - "description": "Get tasks on node", - "produces": [ - "application/json" - ], - "tags": [ - "node" - ], - "summary": "Get tasks on node", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "node id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects": { - "get": { - "description": "Get projects", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Get projects", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "projects", - "name": "tag", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put project", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Put project", - "parameters": [ - { - "type": "string", - "description": "With the bearer started", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "post project", - "name": "p", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Project" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects/tags": { - "get": { - "description": "Get projects tags", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Get project tags", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/projects/{id}": { - "post": { - "description": "Post project", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Post project", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "project id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "project item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Project" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete project", - "produces": [ - "application/json" - ], - "tags": [ - "project" - ], - "summary": "Delete project", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "project id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/releases/latest": { - "get": { - "description": "Get latest release", - "produces": [ - "application/json" - ], - "tags": [ - "version" - ], - "summary": "Get latest release", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules": { - "get": { - "description": "Get spider list", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "page num", - "name": "page_num", - "in": "query" - }, - { - "type": "string", - "description": "page size", - "name": "page_size", - "in": "query" - }, - { - "type": "string", - "description": "keyword", - "name": "keyword", - "in": "query" - }, - { - "type": "string", - "description": "project_id", - "name": "project_id", - "in": "query" - }, - { - "type": "string", - "description": "type", - "name": "type", - "in": "query" - }, - { - "type": "string", - "description": "sort_key", - "name": "sort_key", - "in": "query" - }, - { - "type": "string", - "description": "sort_direction", - "name": "sort_direction", - "in": "query" - }, - { - "type": "string", - "description": "owner_type", - "name": "owner_type", - "in": "query" - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Put schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "schedule item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Schedule" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}": { - "get": { - "description": "Get schedule by id", - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Get schedule by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Post schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "schedule item", - "name": "newItem", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Schedule" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete schedule", - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "Delete schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}/disable": { - "post": { - "description": "disable schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "disable schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/schedules/{id}/enable": { - "post": { - "description": "enable schedule", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "schedule" - ], - "summary": "enable schedule", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/setting": { - "get": { - "description": "Get setting", - "produces": [ - "application/json" - ], - "tags": [ - "setting" - ], - "summary": "Get setting", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders": { - "put": { - "description": "Put spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "spider item", - "name": "spider", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "delete spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "delete spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders-cancel": { - "post": { - "description": "cancel spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "cancel spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders-run": { - "post": { - "description": "run spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "run spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}": { - "get": { - "description": "Get spider by id", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "spider item", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Spider" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete spider by id", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Delete spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/copy": { - "post": { - "description": "Copy spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Copy spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/dir": { - "get": { - "description": "Get spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "path", - "name": "path", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file": { - "get": { - "description": "Get spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "path", - "name": "path", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Post spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Put spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Delete spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file/rename": { - "post": { - "description": "Rename spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Rename spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "path", - "name": "reqBody", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.SpiderFileReqBody" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/file/tree": { - "get": { - "description": "Get spider dir", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider dir", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/git/reset": { - "post": { - "description": "Post spider reset git", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider reset git", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/git/sync": { - "post": { - "description": "Post spider sync git", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post spider sync git", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/publish": { - "post": { - "description": "Publish spider", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Publish spider", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "schedule id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/schedules": { - "get": { - "description": "Get schedules", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get schedules", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/items": { - "get": { - "description": "Get scrapy spider items", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider items", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post scrapy spider items", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Post scrapy spider items", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "req data", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "entity.ScrapyItem", - "items": { - "$ref": "#/definitions/entity.ScrapyItem" - } - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/pipelines": { - "get": { - "description": "Get scrapy spider pipelines", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider pipelines", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/settings": { - "get": { - "description": "Get scrapy spider settings", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider settings", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Get scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - }, - { - "description": "req data", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "entity.ScrapySettingParam", - "items": { - "$ref": "#/definitions/entity.ScrapySettingParam" - } - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/spider/filepath": { - "get": { - "description": "Get scrapy spider file path", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file path", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/scrapy/spiders": { - "get": { - "description": "Get scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put scrapy spider file", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Put scrapy spider file", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/stats": { - "get": { - "description": "Get spider stats", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get spider stats", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/tasks": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/spiders/{id}/upload": { - "post": { - "description": "Upload spider by id", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "spider" - ], - "summary": "Upload spider by id", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "file", - "description": "spider file to upload", - "name": "file", - "in": "formData", - "required": true - }, - { - "type": "string", - "description": "spider id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "json" - } - } - } - } - }, - "/stats/home": { - "get": { - "description": "Get home stats", - "produces": [ - "application/json" - ], - "tags": [ - "version" - ], - "summary": "Get home stats", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/system/deps/": { - "get": { - "description": "Get all dep list", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get all dep list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_nane", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/system/deps/{lang}/{dep_name}/json": { - "get": { - "description": "Get dep json", - "produces": [ - "application/json" - ], - "tags": [ - "system" - ], - "summary": "Get dep json", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "language", - "name": "lang", - "in": "path", - "required": true - }, - { - "type": "string", - "description": "dep name", - "name": "dep_name", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/task/{id}": { - "delete": { - "description": "Delete task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "req data", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.TaskListRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Put task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete tasks", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete tasks", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}": { - "get": { - "description": "Get task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/cancel": { - "post": { - "description": "Cancel task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Cancel task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/error-log": { - "delete": { - "description": "Get task error log", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task error log", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/log": { - "delete": { - "description": "Get task log", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task log", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/restart": { - "post": { - "description": "Restart task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Restart task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/results": { - "get": { - "description": "Get task list", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "req data", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.TaskResultsRequestData" - } - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks/{id}/results/download": { - "get": { - "description": "Get task results", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Get task results", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tasks_by_status": { - "delete": { - "description": "Delete task", - "produces": [ - "application/json" - ], - "tags": [ - "task" - ], - "summary": "Delete task", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "task status", - "name": "status", - "in": "query", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tokens": { - "get": { - "description": "token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Get token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Put token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/tokens/{id}": { - "delete": { - "description": "Delete token", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Delete token", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "token id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/users": { - "get": { - "description": "Get user list", - "produces": [ - "application/json" - ], - "tags": [ - "token" - ], - "summary": "Get user list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "data body", - "name": "data", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.UserListRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "put": { - "description": "Put user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Put user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "reqData", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/routes.UserRequestData" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/users/{id}": { - "get": { - "description": "user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Get user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "post": { - "description": "Post user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Post user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "user body", - "name": "item", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.User" - } - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete user", - "produces": [ - "application/json" - ], - "tags": [ - "user" - ], - "summary": "Delete user", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "user id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variable": { - "put": { - "description": "Put variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Put variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "variable", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Variable" - } - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variable/{id}": { - "post": { - "description": "Post variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Post variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "description": "reqData body", - "name": "variable", - "in": "body", - "required": true, - "schema": { - "type": "object", - "$ref": "#/definitions/model.Variable" - } - }, - { - "type": "string", - "description": "variable id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - }, - "delete": { - "description": "Delete variable", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Delete variable", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - }, - { - "type": "string", - "description": "variable id", - "name": "id", - "in": "path", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/variables": { - "get": { - "description": "Get variable list", - "produces": [ - "application/json" - ], - "tags": [ - "variable" - ], - "summary": "Get variable list", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - }, - "/version": { - "get": { - "description": "Get version", - "produces": [ - "application/json" - ], - "tags": [ - "setting" - ], - "summary": "Get version", - "parameters": [ - { - "type": "string", - "description": "Authorization token", - "name": "Authorization", - "in": "header", - "required": true - } - ], - "responses": { - "200": { - "description": "OK", - "schema": { - "type": "json" - } - }, - "400": { - "description": "Bad Request", - "schema": { - "type": "json" - } - } - } - } - } - }, - "definitions": { - "entity.ConfigSpiderData": { - "type": "object", - "properties": { - "cmd": { - "description": "自定义爬虫", - "type": "string" - }, - "col": { - "type": "string" - }, - "display_name": { - "type": "string" - }, - "engine": { - "description": "可配置爬虫", - "type": "string" - }, - "name": { - "description": "通用", - "type": "string" - }, - "remark": { - "type": "string" - }, - "settings": { - "type": "object" - }, - "stages": { - "type": "array", - "items": { - "$ref": "#/definitions/entity.Stage" - } - }, - "start_stage": { - "type": "string" - }, - "start_url": { - "type": "string" - }, - "type": { - "type": "string" - } - } - }, - "entity.Field": { - "type": "object", - "properties": { - "attr": { - "type": "string" - }, - "css": { - "type": "string" - }, - "name": { - "type": "string" - }, - "next_stage": { - "type": "string" - }, - "remark": { - "type": "string" - }, - "xpath": { - "type": "string" - } - } - }, - "entity.ScrapyItem": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "type": "string" - } - }, - "name": { - "type": "string" - } - } - }, - "entity.ScrapySettingParam": { - "type": "object", - "properties": { - "key": { - "type": "string" - }, - "type": { - "type": "string" - }, - "value": { - "type": "object" - } - } - }, - "entity.Stage": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "$ref": "#/definitions/entity.Field" - } - }, - "is_list": { - "type": "boolean" - }, - "list_css": { - "type": "string" - }, - "list_xpath": { - "type": "string" - }, - "name": { - "type": "string" - }, - "page_attr": { - "type": "string" - }, - "page_css": { - "type": "string" - }, - "page_xpath": { - "type": "string" - } - } - }, - "model.Env": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "model.Node": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "description": { - "type": "string" - }, - "hostname": { - "type": "string" - }, - "ip": { - "type": "string" - }, - "is_master": { - "description": "前端展示", - "type": "boolean" - }, - "key": { - "description": "用于唯一标识节点,可能是mac地址,可能是ip地址", - "type": "string" - }, - "mac": { - "type": "string" - }, - "name": { - "type": "string" - }, - "port": { - "type": "string" - }, - "status": { - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "update_ts_unix": { - "type": "integer" - } - } - }, - "model.Project": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "description": { - "type": "string" - }, - "name": { - "type": "string" - }, - "spiders": { - "description": "前端展示", - "type": "array", - "items": { - "$ref": "#/definitions/model.Spider" - } - }, - "tags": { - "type": "array", - "items": { - "type": "string" - } - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - } - } - }, - "model.Schedule": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "cron": { - "type": "string" - }, - "description": { - "type": "string" - }, - "enabled": { - "type": "boolean" - }, - "entry_id": { - "type": "integer" - }, - "message": { - "type": "string" - }, - "name": { - "type": "string" - }, - "node_ids": { - "type": "array", - "items": { - "type": "string" - } - }, - "nodes": { - "type": "array", - "items": { - "$ref": "#/definitions/model.Node" - } - }, - "param": { - "type": "string" - }, - "run_type": { - "type": "string" - }, - "scrapy_log_level": { - "type": "string" - }, - "scrapy_spider": { - "type": "string" - }, - "spider_id": { - "type": "string" - }, - "spider_name": { - "description": "前端展示", - "type": "string" - }, - "status": { - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "user_name": { - "type": "string" - } - } - }, - "model.Spider": { - "type": "object", - "properties": { - "_id": { - "description": "爬虫ID", - "type": "string" - }, - "cmd": { - "description": "自定义爬虫", - "type": "string" - }, - "col": { - "description": "结果储存位置", - "type": "string" - }, - "config": { - "description": "可配置爬虫配置", - "type": "object", - "$ref": "#/definitions/entity.ConfigSpiderData" - }, - "create_ts": { - "type": "string" - }, - "dedup_field": { - "description": "去重字段", - "type": "string" - }, - "dedup_method": { - "description": "去重方式", - "type": "string" - }, - "display_name": { - "description": "爬虫显示名称", - "type": "string" - }, - "envs": { - "description": "环境变量", - "type": "array", - "items": { - "$ref": "#/definitions/model.Env" - } - }, - "file_id": { - "description": "GridFS文件ID", - "type": "string" - }, - "git_auto_sync": { - "description": "Git 是否自动同步", - "type": "boolean" - }, - "git_branch": { - "description": "Git 分支", - "type": "string" - }, - "git_has_credential": { - "description": "Git 是否加密", - "type": "boolean" - }, - "git_password": { - "description": "Git 密码", - "type": "string" - }, - "git_sync_error": { - "description": "Git 同步错误", - "type": "string" - }, - "git_sync_frequency": { - "description": "Git 同步频率", - "type": "string" - }, - "git_url": { - "description": "Git URL", - "type": "string" - }, - "git_username": { - "description": "Git 用户名", - "type": "string" - }, - "is_dedup": { - "description": "去重", - "type": "boolean" - }, - "is_git": { - "description": "Git 设置", - "type": "boolean" - }, - "is_long_task": { - "description": "长任务", - "type": "boolean" - }, - "is_public": { - "description": "是否公开", - "type": "boolean" - }, - "is_scrapy": { - "description": "Scrapy 爬虫(属于自定义爬虫)", - "type": "boolean" - }, - "is_web_hook": { - "description": "Web Hook", - "type": "boolean" - }, - "last_run_ts": { - "description": "前端展示", - "type": "string" - }, - "last_status": { - "description": "最后执行状态", - "type": "string" - }, - "latest_tasks": { - "description": "最近任务列表", - "type": "array", - "items": { - "$ref": "#/definitions/model.Task" - } - }, - "name": { - "description": "爬虫名称(唯一)", - "type": "string" - }, - "project_id": { - "description": "项目ID", - "type": "string" - }, - "remark": { - "description": "备注", - "type": "string" - }, - "site": { - "description": "爬虫网站", - "type": "string" - }, - "spider_names": { - "description": "爬虫名称列表", - "type": "array", - "items": { - "type": "string" - } - }, - "src": { - "description": "源码位置", - "type": "string" - }, - "template": { - "description": "可配置爬虫", - "type": "string" - }, - "type": { - "description": "爬虫类别", - "type": "string" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "description": "时间", - "type": "string" - }, - "username": { - "description": "用户名称", - "type": "string" - }, - "web_hook_url": { - "description": "Web Hook URL", - "type": "string" - } - } - }, - "model.Task": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "cmd": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "error": { - "type": "string" - }, - "error_log_count": { - "type": "integer" - }, - "finish_ts": { - "type": "string" - }, - "log_path": { - "type": "string" - }, - "node_id": { - "type": "string" - }, - "node_name": { - "type": "string" - }, - "param": { - "type": "string" - }, - "pid": { - "type": "integer" - }, - "result_count": { - "type": "integer" - }, - "run_type": { - "type": "string" - }, - "runtime_duration": { - "type": "number" - }, - "schedule_id": { - "type": "string" - }, - "spider_id": { - "type": "string" - }, - "spider_name": { - "description": "前端数据", - "type": "string" - }, - "start_ts": { - "type": "string" - }, - "status": { - "type": "string" - }, - "total_duration": { - "type": "number" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - }, - "wait_duration": { - "type": "number" - } - } - }, - "model.User": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "create_ts": { - "type": "string" - }, - "email": { - "type": "string" - }, - "password": { - "type": "string" - }, - "role": { - "type": "string" - }, - "setting": { - "type": "object", - "$ref": "#/definitions/model.UserSetting" - }, - "update_ts": { - "type": "string" - }, - "user_id": { - "type": "string" - }, - "username": { - "type": "string" - } - } - }, - "model.UserSetting": { - "type": "object", - "properties": { - "ding_talk_robot_webhook": { - "type": "string" - }, - "enabled_notifications": { - "type": "array", - "items": { - "type": "string" - } - }, - "error_regex_pattern": { - "type": "string" - }, - "log_expire_duration": { - "type": "integer" - }, - "max_error_log": { - "type": "integer" - }, - "notification_trigger": { - "type": "string" - }, - "wechat_robot_webhook": { - "type": "string" - } - } - }, - "model.Variable": { - "type": "object", - "properties": { - "_id": { - "type": "string" - }, - "key": { - "type": "string" - }, - "remark": { - "type": "string" - }, - "value": { - "type": "string" - } - } - }, - "routes.SpiderFileReqBody": { - "type": "object", - "properties": { - "content": { - "type": "string" - }, - "new_path": { - "type": "string" - }, - "path": { - "type": "string" - } - } - }, - "routes.TaskListRequestData": { - "type": "object", - "properties": { - "nodeId": { - "type": "string" - }, - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - }, - "scheduleId": { - "type": "string" - }, - "spiderId": { - "type": "string" - }, - "status": { - "type": "string" - } - } - }, - "routes.TaskResultsRequestData": { - "type": "object", - "properties": { - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - } - } - }, - "routes.UserListRequestData": { - "type": "object", - "properties": { - "pageNum": { - "type": "integer" - }, - "pageSize": { - "type": "integer" - } - } - }, - "routes.UserRequestData": { - "type": "object", - "properties": { - "email": { - "type": "string" - }, - "password": { - "type": "string" - }, - "role": { - "type": "string" - }, - "username": { - "type": "string" - } - } - } - } -} \ No newline at end of file diff --git a/backend/docs/swagger.yaml b/backend/docs/swagger.yaml deleted file mode 100644 index 26598b52..00000000 --- a/backend/docs/swagger.yaml +++ /dev/null @@ -1,3177 +0,0 @@ -definitions: - entity.ConfigSpiderData: - properties: - cmd: - description: 自定义爬虫 - type: string - col: - type: string - display_name: - type: string - engine: - description: 可配置爬虫 - type: string - name: - description: 通用 - type: string - remark: - type: string - settings: - type: object - stages: - items: - $ref: '#/definitions/entity.Stage' - type: array - start_stage: - type: string - start_url: - type: string - type: - type: string - type: object - entity.Field: - properties: - attr: - type: string - css: - type: string - name: - type: string - next_stage: - type: string - remark: - type: string - xpath: - type: string - type: object - entity.ScrapyItem: - properties: - fields: - items: - type: string - type: array - name: - type: string - type: object - entity.ScrapySettingParam: - properties: - key: - type: string - type: - type: string - value: - type: object - type: object - entity.Stage: - properties: - fields: - items: - $ref: '#/definitions/entity.Field' - type: array - is_list: - type: boolean - list_css: - type: string - list_xpath: - type: string - name: - type: string - page_attr: - type: string - page_css: - type: string - page_xpath: - type: string - type: object - model.Env: - properties: - name: - type: string - value: - type: string - type: object - model.Node: - properties: - _id: - type: string - create_ts: - type: string - description: - type: string - hostname: - type: string - ip: - type: string - is_master: - description: 前端展示 - type: boolean - key: - description: 用于唯一标识节点,可能是mac地址,可能是ip地址 - type: string - mac: - type: string - name: - type: string - port: - type: string - status: - type: string - update_ts: - type: string - update_ts_unix: - type: integer - type: object - model.Project: - properties: - _id: - type: string - create_ts: - type: string - description: - type: string - name: - type: string - spiders: - description: 前端展示 - items: - $ref: '#/definitions/model.Spider' - type: array - tags: - items: - type: string - type: array - update_ts: - type: string - user_id: - type: string - username: - type: string - type: object - model.Schedule: - properties: - _id: - type: string - create_ts: - type: string - cron: - type: string - description: - type: string - enabled: - type: boolean - entry_id: - type: integer - message: - type: string - name: - type: string - node_ids: - items: - type: string - type: array - nodes: - items: - $ref: '#/definitions/model.Node' - type: array - param: - type: string - run_type: - type: string - scrapy_log_level: - type: string - scrapy_spider: - type: string - spider_id: - type: string - spider_name: - description: 前端展示 - type: string - status: - type: string - update_ts: - type: string - user_id: - type: string - user_name: - type: string - type: object - model.Spider: - properties: - _id: - description: 爬虫ID - type: string - cmd: - description: 自定义爬虫 - type: string - col: - description: 结果储存位置 - type: string - config: - $ref: '#/definitions/entity.ConfigSpiderData' - description: 可配置爬虫配置 - type: object - create_ts: - type: string - dedup_field: - description: 去重字段 - type: string - dedup_method: - description: 去重方式 - type: string - display_name: - description: 爬虫显示名称 - type: string - envs: - description: 环境变量 - items: - $ref: '#/definitions/model.Env' - type: array - file_id: - description: GridFS文件ID - type: string - git_auto_sync: - description: Git 是否自动同步 - type: boolean - git_branch: - description: Git 分支 - type: string - git_has_credential: - description: Git 是否加密 - type: boolean - git_password: - description: Git 密码 - type: string - git_sync_error: - description: Git 同步错误 - type: string - git_sync_frequency: - description: Git 同步频率 - type: string - git_url: - description: Git URL - type: string - git_username: - description: Git 用户名 - type: string - is_dedup: - description: 去重 - type: boolean - is_git: - description: Git 设置 - type: boolean - is_long_task: - description: 长任务 - type: boolean - is_public: - description: 是否公开 - type: boolean - is_scrapy: - description: Scrapy 爬虫(属于自定义爬虫) - type: boolean - is_web_hook: - description: Web Hook - type: boolean - last_run_ts: - description: 前端展示 - type: string - last_status: - description: 最后执行状态 - type: string - latest_tasks: - description: 最近任务列表 - items: - $ref: '#/definitions/model.Task' - type: array - name: - description: 爬虫名称(唯一) - type: string - project_id: - description: 项目ID - type: string - remark: - description: 备注 - type: string - site: - description: 爬虫网站 - type: string - spider_names: - description: 爬虫名称列表 - items: - type: string - type: array - src: - description: 源码位置 - type: string - template: - description: 可配置爬虫 - type: string - type: - description: 爬虫类别 - type: string - update_ts: - type: string - user_id: - description: 时间 - type: string - username: - description: 用户名称 - type: string - web_hook_url: - description: Web Hook URL - type: string - type: object - model.Task: - properties: - _id: - type: string - cmd: - type: string - create_ts: - type: string - error: - type: string - error_log_count: - type: integer - finish_ts: - type: string - log_path: - type: string - node_id: - type: string - node_name: - type: string - param: - type: string - pid: - type: integer - result_count: - type: integer - run_type: - type: string - runtime_duration: - type: number - schedule_id: - type: string - spider_id: - type: string - spider_name: - description: 前端数据 - type: string - start_ts: - type: string - status: - type: string - total_duration: - type: number - update_ts: - type: string - user_id: - type: string - username: - type: string - wait_duration: - type: number - type: object - model.User: - properties: - _id: - type: string - create_ts: - type: string - email: - type: string - password: - type: string - role: - type: string - setting: - $ref: '#/definitions/model.UserSetting' - type: object - update_ts: - type: string - user_id: - type: string - username: - type: string - type: object - model.UserSetting: - properties: - ding_talk_robot_webhook: - type: string - enabled_notifications: - items: - type: string - type: array - error_regex_pattern: - type: string - log_expire_duration: - type: integer - max_error_log: - type: integer - notification_trigger: - type: string - wechat_robot_webhook: - type: string - type: object - model.Variable: - properties: - _id: - type: string - key: - type: string - remark: - type: string - value: - type: string - type: object - routes.SpiderFileReqBody: - properties: - content: - type: string - new_path: - type: string - path: - type: string - type: object - routes.TaskListRequestData: - properties: - nodeId: - type: string - pageNum: - type: integer - pageSize: - type: integer - scheduleId: - type: string - spiderId: - type: string - status: - type: string - type: object - routes.TaskResultsRequestData: - properties: - pageNum: - type: integer - pageSize: - type: integer - type: object - routes.UserListRequestData: - properties: - pageNum: - type: integer - pageSize: - type: integer - type: object - routes.UserRequestData: - properties: - email: - type: string - password: - type: string - role: - type: string - username: - type: string - type: object -info: - contact: {} - license: {} -paths: - /config_spiders: - put: - consumes: - - application/json - description: Put config spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider item - in: body - name: spider - required: true - schema: - $ref: '#/definitions/model.Spider' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Put config spider - tags: - - config spider - /config_spiders/{id}/config: - get: - consumes: - - application/json - description: Get config spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Get config spider - tags: - - config spider - post: - consumes: - - application/json - description: Post config spider config - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider item - in: body - name: spider - required: true - schema: - $ref: '#/definitions/model.Spider' - type: object - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post config spider config - tags: - - config spider - /config_spiders/{id}/spiderfile: - post: - consumes: - - application/json - description: Post config spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post config spider - tags: - - config spider - /config_spiders/{id}/upload: - post: - consumes: - - application/json - description: Upload config spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider item - in: body - name: spider - required: true - schema: - $ref: '#/definitions/model.Spider' - type: object - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Upload config spider - tags: - - config spider - /config_spiders_templates: - get: - consumes: - - application/json - description: Get config spider template list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Get config spider template list - tags: - - config spider - /docs: - get: - description: Get docs - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get docs - tags: - - docs - /file: - get: - description: Get file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get file - tags: - - file - /nodes: - get: - description: Get nodes - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get nodes - tags: - - node - /nodes/{id}: - delete: - description: Delete node - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete node - tags: - - node - get: - description: Get node - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get node - tags: - - node - post: - consumes: - - application/json - description: Post node - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: post node - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post node - tags: - - node - /nodes/{id}/deps: - get: - description: Get dep list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - - description: language - in: query - name: lang - required: true - type: string - - description: dep name - in: query - name: dep_name - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get dep list - tags: - - system - /nodes/{id}/deps/install: - post: - description: Install dep - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Install dep - tags: - - system - /nodes/{id}/deps/installed: - get: - description: Get installed dep list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - - description: language - in: query - name: lang - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get installed dep list - tags: - - system - /nodes/{id}/deps/uninstall: - post: - description: Uninstall dep - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Uninstall dep - tags: - - system - /nodes/{id}/langs: - get: - description: Get language list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get language list - tags: - - system - /nodes/{id}/langs/install: - post: - description: Install language - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Install language - tags: - - system - /nodes/{id}/system: - get: - description: Get system info - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get system info - tags: - - node - /nodes/{id}/tasks: - get: - description: Get tasks on node - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: node id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get tasks on node - tags: - - node - /projects: - get: - description: Get projects - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: projects - in: query - name: tag - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get projects - tags: - - project - put: - consumes: - - application/json - description: Put project - parameters: - - description: With the bearer started - in: header - name: Authorization - required: true - type: string - - description: post project - in: body - name: p - required: true - schema: - $ref: '#/definitions/model.Project' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Put project - tags: - - project - /projects/{id}: - delete: - description: Delete project - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: project id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete project - tags: - - project - post: - consumes: - - application/json - description: Post project - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: project id - in: path - name: id - required: true - type: string - - description: project item - in: body - name: item - required: true - schema: - $ref: '#/definitions/model.Project' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post project - tags: - - project - /projects/tags: - get: - description: Get projects tags - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get project tags - tags: - - project - /releases/latest: - get: - description: Get latest release - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get latest release - tags: - - version - /schedules: - get: - description: Get spider list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: page num - in: query - name: page_num - type: string - - description: page size - in: query - name: page_size - type: string - - description: keyword - in: query - name: keyword - type: string - - description: project_id - in: query - name: project_id - type: string - - description: type - in: query - name: type - type: string - - description: sort_key - in: query - name: sort_key - type: string - - description: sort_direction - in: query - name: sort_direction - type: string - - description: owner_type - in: query - name: owner_type - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider list - tags: - - spider - put: - consumes: - - application/json - description: Put schedule - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule item - in: body - name: item - required: true - schema: - $ref: '#/definitions/model.Schedule' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Put schedule - tags: - - schedule - /schedules/{id}: - delete: - description: Delete schedule - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete schedule - tags: - - schedule - get: - description: Get schedule by id - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get schedule by id - tags: - - schedule - post: - consumes: - - application/json - description: Post schedule - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - - description: schedule item - in: body - name: newItem - required: true - schema: - $ref: '#/definitions/model.Schedule' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post schedule - tags: - - schedule - /schedules/{id}/disable: - post: - consumes: - - application/json - description: disable schedule - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: disable schedule - tags: - - schedule - /schedules/{id}/enable: - post: - consumes: - - application/json - description: enable schedule - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: enable schedule - tags: - - schedule - /setting: - get: - description: Get setting - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get setting - tags: - - setting - /spiders: - post: - consumes: - - application/json - description: delete spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: delete spider - tags: - - spider - put: - consumes: - - application/json - description: Put spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider item - in: body - name: spider - required: true - schema: - $ref: '#/definitions/model.Spider' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Put spider - tags: - - spider - /spiders-cancel: - post: - consumes: - - application/json - description: cancel spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: cancel spider - tags: - - spider - /spiders-run: - post: - consumes: - - application/json - description: run spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: run spider - tags: - - spider - /spiders/{id}: - delete: - description: Delete spider by id - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete spider by id - tags: - - spider - get: - description: Get spider by id - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider by id - tags: - - spider - post: - consumes: - - application/json - description: Post spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - - description: spider item - in: body - name: item - required: true - schema: - $ref: '#/definitions/model.Spider' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Post spider - tags: - - spider - /spiders/{id}/copy: - post: - consumes: - - application/json - description: Copy spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Copy spider - tags: - - spider - /spiders/{id}/dir: - get: - description: Get spider dir - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: query - name: path - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider dir - tags: - - spider - /spiders/{id}/file: - delete: - description: Delete spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: body - name: reqBody - required: true - schema: - $ref: '#/definitions/routes.SpiderFileReqBody' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete spider file - tags: - - spider - get: - description: Get spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: query - name: path - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider file - tags: - - spider - post: - description: Put spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: body - name: reqBody - required: true - schema: - $ref: '#/definitions/routes.SpiderFileReqBody' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put spider file - tags: - - spider - put: - description: Post spider dir - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: body - name: reqBody - required: true - schema: - $ref: '#/definitions/routes.SpiderFileReqBody' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post spider dir - tags: - - spider - /spiders/{id}/file/rename: - post: - description: Rename spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: path - in: body - name: reqBody - required: true - schema: - $ref: '#/definitions/routes.SpiderFileReqBody' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Rename spider file - tags: - - spider - /spiders/{id}/file/tree: - get: - description: Get spider dir - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider dir - tags: - - spider - /spiders/{id}/git/reset: - post: - description: Post spider reset git - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post spider reset git - tags: - - spider - /spiders/{id}/git/sync: - post: - description: Post spider sync git - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post spider sync git - tags: - - spider - /spiders/{id}/publish: - post: - consumes: - - application/json - description: Publish spider - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: schedule id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Publish spider - tags: - - spider - /spiders/{id}/schedules: - get: - description: Get schedules - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get schedules - tags: - - spider - /spiders/{id}/scrapy/items: - get: - description: Get scrapy spider items - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider items - tags: - - spider - post: - description: Post scrapy spider items - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: req data - in: body - name: reqData - required: true - schema: - items: - $ref: '#/definitions/entity.ScrapyItem' - type: entity.ScrapyItem - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post scrapy spider items - tags: - - spider - /spiders/{id}/scrapy/pipelines: - get: - description: Get scrapy spider pipelines - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider pipelines - tags: - - spider - /spiders/{id}/scrapy/settings: - get: - description: Get scrapy spider settings - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider settings - tags: - - spider - post: - description: Get scrapy spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - - description: req data - in: body - name: reqData - required: true - schema: - items: - $ref: '#/definitions/entity.ScrapySettingParam' - type: entity.ScrapySettingParam - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider file - tags: - - spider - /spiders/{id}/scrapy/spider/filepath: - get: - description: Get scrapy spider file path - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider file path - tags: - - spider - /spiders/{id}/scrapy/spiders: - get: - description: Get scrapy spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get scrapy spider file - tags: - - spider - put: - description: Put scrapy spider file - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put scrapy spider file - tags: - - spider - /spiders/{id}/stats: - get: - description: Get spider stats - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get spider stats - tags: - - spider - /spiders/{id}/tasks: - get: - description: Get task list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task list - tags: - - spider - /spiders/{id}/upload: - post: - consumes: - - application/json - description: Upload spider by id - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: spider file to upload - in: formData - name: file - required: true - type: file - - description: spider id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "500": - description: Internal Server Error - schema: - type: json - summary: Upload spider by id - tags: - - spider - /stats/home: - get: - description: Get home stats - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get home stats - tags: - - version - /system/deps/: - get: - description: Get all dep list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: language - in: path - name: lang - required: true - type: string - - description: dep name - in: query - name: dep_nane - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get all dep list - tags: - - system - /system/deps/{lang}/{dep_name}/json: - get: - description: Get dep json - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: language - in: path - name: lang - required: true - type: string - - description: dep name - in: path - name: dep_name - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get dep json - tags: - - system - /task/{id}: - delete: - description: Delete task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete task - tags: - - task - /tasks: - delete: - description: Delete tasks - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete tasks - tags: - - task - get: - description: Get task list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: req data - in: body - name: data - required: true - schema: - $ref: '#/definitions/routes.TaskListRequestData' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task list - tags: - - task - put: - description: Put task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put task - tags: - - task - /tasks/{id}: - get: - description: Get task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task - tags: - - task - /tasks/{id}/cancel: - post: - description: Cancel task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Cancel task - tags: - - task - /tasks/{id}/error-log: - delete: - description: Get task error log - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task error log - tags: - - task - /tasks/{id}/log: - delete: - description: Get task log - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task log - tags: - - task - /tasks/{id}/restart: - post: - description: Restart task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Restart task - tags: - - task - /tasks/{id}/results: - get: - description: Get task list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: req data - in: body - name: data - required: true - schema: - $ref: '#/definitions/routes.TaskResultsRequestData' - type: object - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task list - tags: - - task - /tasks/{id}/results/download: - get: - description: Get task results - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get task results - tags: - - task - /tasks_by_status: - delete: - description: Delete task - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: task status - in: query - name: status - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete task - tags: - - task - /tokens: - get: - description: token - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get token - tags: - - token - put: - description: token - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put token - tags: - - token - /tokens/{id}: - delete: - description: Delete token - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: token id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete token - tags: - - token - /users: - get: - description: Get user list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: data body - in: body - name: data - required: true - schema: - $ref: '#/definitions/routes.UserListRequestData' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get user list - tags: - - token - put: - description: Put user - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: reqData body - in: body - name: reqData - required: true - schema: - $ref: '#/definitions/routes.UserRequestData' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put user - tags: - - user - /users/{id}: - delete: - description: Delete user - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: user id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete user - tags: - - user - get: - description: user - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: user id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get user - tags: - - user - post: - description: Post user - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: user body - in: body - name: item - required: true - schema: - $ref: '#/definitions/model.User' - type: object - - description: user id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post user - tags: - - user - /variable: - put: - description: Put variable - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: reqData body - in: body - name: variable - required: true - schema: - $ref: '#/definitions/model.Variable' - type: object - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Put variable - tags: - - variable - /variable/{id}: - delete: - description: Delete variable - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: variable id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Delete variable - tags: - - variable - post: - description: Post variable - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - - description: reqData body - in: body - name: variable - required: true - schema: - $ref: '#/definitions/model.Variable' - type: object - - description: variable id - in: path - name: id - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Post variable - tags: - - variable - /variables: - get: - description: Get variable list - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get variable list - tags: - - variable - /version: - get: - description: Get version - parameters: - - description: Authorization token - in: header - name: Authorization - required: true - type: string - produces: - - application/json - responses: - "200": - description: OK - schema: - type: json - "400": - description: Bad Request - schema: - type: json - summary: Get version - tags: - - setting -swagger: "2.0" diff --git a/backend/dump.rdb b/backend/dump.rdb deleted file mode 100644 index 6f8d62d2..00000000 Binary files a/backend/dump.rdb and /dev/null differ diff --git a/backend/entity/common.go b/backend/entity/common.go deleted file mode 100644 index c46ae4f9..00000000 --- a/backend/entity/common.go +++ /dev/null @@ -1,17 +0,0 @@ -package entity - -import "strconv" - -type Page struct { - Skip int - Limit int - PageNum int - PageSize int -} - -func (p *Page) GetPage(pageNum string, pageSize string) { - p.PageNum, _ = strconv.Atoi(pageNum) - p.PageSize, _ = strconv.Atoi(pageSize) - p.Skip = p.PageSize * (p.PageNum - 1) - p.Limit = p.PageSize -} diff --git a/backend/entity/config_spider.go b/backend/entity/config_spider.go deleted file mode 100644 index 054ee2fe..00000000 --- a/backend/entity/config_spider.go +++ /dev/null @@ -1,40 +0,0 @@ -package entity - -type ConfigSpiderData struct { - // 通用 - Name string `yaml:"name" json:"name"` - DisplayName string `yaml:"display_name" json:"display_name"` - Col string `yaml:"col" json:"col"` - Remark string `yaml:"remark" json:"remark"` - Type string `yaml:"type" bson:"type"` - - // 可配置爬虫 - Engine string `yaml:"engine" json:"engine"` - StartUrl string `yaml:"start_url" json:"start_url"` - StartStage string `yaml:"start_stage" json:"start_stage"` - Stages []Stage `yaml:"stages" json:"stages"` - Settings map[string]string `yaml:"settings" json:"settings"` - - // 自定义爬虫 - Cmd string `yaml:"cmd" json:"cmd"` -} - -type Stage struct { - Name string `yaml:"name" json:"name"` - IsList bool `yaml:"is_list" json:"is_list"` - ListCss string `yaml:"list_css" json:"list_css"` - ListXpath string `yaml:"list_xpath" json:"list_xpath"` - PageCss string `yaml:"page_css" json:"page_css"` - PageXpath string `yaml:"page_xpath" json:"page_xpath"` - PageAttr string `yaml:"page_attr" json:"page_attr"` - Fields []Field `yaml:"fields" json:"fields"` -} - -type Field struct { - Name string `yaml:"name" json:"name"` - Css string `yaml:"css" json:"css"` - Xpath string `yaml:"xpath" json:"xpath"` - Attr string `yaml:"attr" json:"attr"` - NextStage string `yaml:"next_stage" json:"next_stage"` - Remark string `yaml:"remark" json:"remark"` -} diff --git a/backend/entity/doc.go b/backend/entity/doc.go deleted file mode 100644 index b356d38a..00000000 --- a/backend/entity/doc.go +++ /dev/null @@ -1,8 +0,0 @@ -package entity - -type DocItem struct { - Title string `json:"title"` - Url string `json:"url"` - Path string `json:"path"` - Children []DocItem `json:"children"` -} diff --git a/backend/entity/node.go b/backend/entity/node.go deleted file mode 100644 index ebe2047f..00000000 --- a/backend/entity/node.go +++ /dev/null @@ -1,28 +0,0 @@ -package entity - -type NodeMessage struct { - // 通信类别 - Type string `json:"type"` - - // 任务相关 - TaskId string `json:"task_id"` // 任务ID - - // 节点相关 - NodeId string `json:"node_id"` // 节点ID - - // 日志相关 - LogPath string `json:"log_path"` // 日志路径 - Log string `json:"log"` // 日志 - - // 系统信息 - SysInfo SystemInfo `json:"sys_info"` - - // 爬虫相关 - SpiderId string `json:"spider_id"` //爬虫ID - - // 语言相关 - Lang Lang `json:"lang"` - - // 错误相关 - Error string `json:"error"` -} diff --git a/backend/entity/rpc.go b/backend/entity/rpc.go deleted file mode 100644 index 48f14b26..00000000 --- a/backend/entity/rpc.go +++ /dev/null @@ -1,11 +0,0 @@ -package entity - -type RpcMessage struct { - Id string `json:"id"` // 消息ID - Method string `json:"method"` // 消息方法 - NodeId string `json:"node_id"` // 节点ID - Params map[string]string `json:"params"` // 参数 - Timeout int `json:"timeout"` // 超时 - Result string `json:"result"` // 结果 - Error string `json:"error"` // 错误 -} diff --git a/backend/entity/spider.go b/backend/entity/spider.go deleted file mode 100644 index 616d3bbf..00000000 --- a/backend/entity/spider.go +++ /dev/null @@ -1,17 +0,0 @@ -package entity - -type SpiderType struct { - Type string `json:"type" bson:"_id"` - Count int `json:"count" bson:"count"` -} - -type ScrapySettingParam struct { - Key string `json:"key"` - Value interface{} `json:"value"` - Type string `json:"type"` -} - -type ScrapyItem struct { - Name string `json:"name"` - Fields []string `json:"fields"` -} diff --git a/backend/entity/system.go b/backend/entity/system.go deleted file mode 100644 index f1a24f4b..00000000 --- a/backend/entity/system.go +++ /dev/null @@ -1,39 +0,0 @@ -package entity - -type SystemInfo struct { - ARCH string `json:"arch"` - OS string `json:"os"` - Hostname string `json:"host_name"` - NumCpu int `json:"num_cpu"` - Executables []Executable `json:"executables"` -} - -type Executable struct { - Path string `json:"path"` - FileName string `json:"file_name"` - DisplayName string `json:"display_name"` -} - -type Lang struct { - Name string `json:"name"` - ExecutableName string `json:"executable_name"` - ExecutablePaths []string `json:"executable_paths"` - DepExecutablePath string `json:"dep_executable_path"` - LockPath string `json:"lock_path"` - InstallScript string `json:"install_script"` - InstallStatus string `json:"install_status"` - DepFileName string `json:"dep_file_name"` - InstallDepArgs string `json:"install_dep_cmd"` - Type string `json:"type"` -} - -type Dependency struct { - Name string `json:"name"` - Version string `json:"version"` - Description string `json:"description"` - Installed bool `json:"installed"` -} - -type PackageJson struct { - Dependencies map[string]string `json:"dependencies"` -} diff --git a/backend/entity/version.go b/backend/entity/version.go deleted file mode 100644 index 97a0278d..00000000 --- a/backend/entity/version.go +++ /dev/null @@ -1,23 +0,0 @@ -package entity - -type Release struct { - Name string `json:"name"` - Draft bool `json:"draft"` - PreRelease bool `json:"pre_release"` - PublishedAt string `json:"published_at"` - Body string `json:"body"` -} - -type ReleaseSlices []Release - -func (r ReleaseSlices) Len() int { - return len(r) -} - -func (r ReleaseSlices) Less(i, j int) bool { - return r[i].PublishedAt < r[j].PublishedAt -} - -func (r ReleaseSlices) Swap(i, j int) { - r[i], r[j] = r[j], r[i] -} diff --git a/backend/errors/errors.go b/backend/errors/errors.go deleted file mode 100644 index d896e4d4..00000000 --- a/backend/errors/errors.go +++ /dev/null @@ -1,54 +0,0 @@ -package errors - -import ( - "fmt" - "net/http" -) - -type Scope int - -const ( - ScopeSystem Scope = 1 - ScopeBusiness Scope = 2 -) - -type OPError struct { - HttpCode int - Message string - Code int - Scope Scope -} - -func (O OPError) Error() string { - var scope string - switch O.Scope { - case ScopeSystem: - scope = "system" - case ScopeBusiness: - scope = "business" - } - return fmt.Sprintf("%s error: [%d]%s.", scope, O.Code, O.Message) -} - -func NewSystemOPError(code int, message string, httpCodes ...int) *OPError { - httpCode := http.StatusOK - if len(httpCodes) > 0 { - httpCode = httpCodes[0] - } - return NewOpError(code, message, ScopeSystem, httpCode) -} -func NewOpError(code int, message string, scope Scope, httpCode int) *OPError { - return &OPError{ - Message: message, - Code: code, - Scope: scope, - HttpCode: httpCode, - } -} -func NewBusinessError(code int, message string, httpCodes ...int) *OPError { - httpCode := http.StatusOK - if len(httpCodes) > 0 { - httpCode = httpCodes[0] - } - return NewOpError(code, message, ScopeBusiness, httpCode) -} diff --git a/backend/go.mod b/backend/go.mod index 3f1fda3a..6d2ab8d5 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -1,43 +1,47 @@ module crawlab -go 1.12 +go 1.15 + +replace ( + github.com/crawlab-team/crawlab-core => /Users/marvzhang/projects/crawlab-team/crawlab-core + github.com/crawlab-team/crawlab-db => /Users/marvzhang/projects/crawlab-team/crawlab-db +) require ( github.com/Masterminds/semver v1.4.2 // indirect github.com/Masterminds/sprig v2.16.0+incompatible // indirect - github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd - github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 + github.com/Unknwon/goconfig v0.0.0-20191126170842-860a72fb44fd // indirect github.com/aokoli/goutils v1.0.1 // indirect - github.com/apex/log v1.1.4 - github.com/cenkalti/backoff/v4 v4.0.2 - github.com/dgrijalva/jwt-go v3.2.0+incompatible - github.com/fsnotify/fsnotify v1.4.9 + github.com/apex/log v1.9.0 + github.com/cenkalti/backoff/v4 v4.1.0 // indirect + github.com/crawlab-team/crawlab-core v0.0.0-00010101000000-000000000000 + github.com/crawlab-team/crawlab-db v0.0.2 + github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect + github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/gin-gonic/gin v1.6.3 - github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 + github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 // indirect github.com/go-playground/validator/v10 v10.3.0 - github.com/gomodule/redigo v2.0.0+incompatible - github.com/hashicorp/go-sockaddr v1.0.0 + github.com/gomodule/redigo v2.0.0+incompatible // indirect + github.com/hashicorp/go-sockaddr v1.0.0 // indirect github.com/huandu/xstrings v1.2.0 // indirect github.com/imdario/mergo v0.3.6 // indirect - github.com/imroc/req v0.3.0 + github.com/imroc/req v0.3.0 // indirect github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0 // indirect - github.com/matcornic/hermes v1.2.0 + github.com/matcornic/hermes v1.2.0 // indirect github.com/mattn/go-runewidth v0.0.3 // indirect github.com/olekukonko/tablewriter v0.0.1 // indirect github.com/olivere/elastic/v7 v7.0.15 - github.com/pkg/errors v0.9.1 - github.com/satori/go.uuid v1.2.0 - github.com/smartystreets/goconvey v1.6.4 - github.com/spf13/viper v1.7.0 + github.com/pkg/errors v0.9.1 // indirect + github.com/satori/go.uuid v1.2.0 // indirect + github.com/smartystreets/goconvey v1.6.4 // indirect + github.com/spf13/viper v1.7.1 github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect - github.com/swaggo/gin-swagger v1.2.0 - github.com/swaggo/swag v1.6.6 - go.uber.org/atomic v1.6.0 + go.uber.org/atomic v1.6.0 // indirect golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f // indirect gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect - gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df + gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df // indirect gopkg.in/russross/blackfriday.v2 v2.0.0 // indirect - gopkg.in/src-d/go-git.v4 v4.13.1 - gopkg.in/yaml.v2 v2.3.0 + gopkg.in/src-d/go-git.v4 v4.13.1 // indirect + gopkg.in/yaml.v2 v2.3.0 // indirect ) diff --git a/backend/go.sum b/backend/go.sum index c53a2aaf..5779ab97 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -42,7 +42,10 @@ github.com/aokoli/goutils v1.0.1 h1:7fpzNGoJ3VA8qcrm++XEE1QUe0mIwNeLa02Nwq7RDkg= github.com/aokoli/goutils v1.0.1/go.mod h1:SijmP0QR8LtwsmDs8Yii5Z/S4trXFGFC2oO5g9DP+DQ= github.com/apex/log v1.1.4 h1:3Zk+boorIQAAGBrHn0JUtAau4ihMamT4WdnfdnXM1zQ= github.com/apex/log v1.1.4/go.mod h1:AlpoD9aScyQfJDVHmLMEcx4oU6LqzkWp4Mg9GdAcEvQ= +github.com/apex/log v1.9.0 h1:FHtw/xuaM8AgmvDDTI9fiwoAL25Sq2cxojnZICUU8l0= +github.com/apex/log v1.9.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA= github.com/apex/logs v0.0.4/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= +github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= github.com/aphistic/golf v0.0.0-20180712155816-02c07f170c5a/go.mod h1:3NqKYiepwy8kCu4PNA+aP7WUV72eXWJeP9/r3/K9aLE= github.com/aphistic/sweet v0.2.0/go.mod h1:fWDlIh/isSE9n6EPsRmC0det+whmX6dJid3stzu0Xys= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= @@ -59,6 +62,8 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4eamEDs= github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= +github.com/cenkalti/backoff/v4 v4.1.0 h1:c8LkOFQTzuO0WBM/ae5HdGQuZPfPxp7lqBRwQRm4fSc= +github.com/cenkalti/backoff/v4 v4.1.0/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= @@ -137,6 +142,7 @@ github.com/go-playground/validator/v10 v10.2.0 h1:KgJ0snyC2R9VXYN2rneOtQcw5aHQB1 github.com/go-playground/validator/v10 v10.2.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= github.com/go-playground/validator/v10 v10.3.0 h1:nZU+7q+yJoFmwvNgv/LnPUkwPal62+b2xXj0AU1Es7o= github.com/go-playground/validator/v10 v10.3.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI= +github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -213,6 +219,8 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= +github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA= +github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= @@ -232,6 +240,7 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxv github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= @@ -239,6 +248,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= +github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= +github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4= github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -261,6 +272,7 @@ github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHX github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-runewidth v0.0.3 h1:a+kO+98RDGEfo6asOGMmpodZq4FNtnGP54yps8BzLR4= github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= +github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= @@ -343,6 +355,8 @@ github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/viper v1.7.0 h1:xVKxvI7ouOI5I+U9s2eeiUfMaWBVoXA3AWskkrqK0VM= github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/spf13/viper v1.7.1 h1:pM5oEahlgWv/WnHXpgbKz7iLIxRf65tye2Ci+XFK5sk= +github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4= github.com/src-d/gcfg v1.4.0/go.mod h1:p/UMsR43ujA89BJY9duynAwIpvqEujIH/jFlfL7jWoI= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= @@ -356,6 +370,7 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14/go.mod h1:gxQT6pBGRuIGunNf/+tSOB5OHvguWi8Tbt82WOkf35E= @@ -366,6 +381,8 @@ github.com/swaggo/swag v1.5.1/go.mod h1:1Bl9F/ZBpVWh22nY0zmYyASPO1lI/zIwRDrpZU+t github.com/swaggo/swag v1.6.6 h1:3YX5hmuUyCMT/OqqnjW92gULAfHg3hVjpcPm53N64RY= github.com/swaggo/swag v1.6.6/go.mod h1:xDhTyuFIujYiN3DKWC/H/83xcfHp+UE/IzWWampG7Zc= github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0= +github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= +github.com/tj/go-buffer v1.1.0/go.mod h1:iyiJpfFcR2B9sXu7KvjbT9fpM4mOelRSDTbntVj52Uc= github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0= github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao= github.com/tj/go-spin v1.1.0/go.mod h1:Mg1mzmePZm4dva8Qz60H2lHwmJ2loum4VIrLgVnKwh4= @@ -580,6 +597,8 @@ gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/backend/lib/cron/.gitignore b/backend/lib/cron/.gitignore deleted file mode 100644 index 00268614..00000000 --- a/backend/lib/cron/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe diff --git a/backend/lib/cron/.travis.yml b/backend/lib/cron/.travis.yml deleted file mode 100644 index 4f2ee4d9..00000000 --- a/backend/lib/cron/.travis.yml +++ /dev/null @@ -1 +0,0 @@ -language: go diff --git a/backend/lib/cron/LICENSE b/backend/lib/cron/LICENSE deleted file mode 100644 index 3a0f627f..00000000 --- a/backend/lib/cron/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Copyright (C) 2012 Rob Figueiredo -All Rights Reserved. - -MIT LICENSE - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/backend/lib/cron/README.md b/backend/lib/cron/README.md deleted file mode 100644 index 979f71e6..00000000 --- a/backend/lib/cron/README.md +++ /dev/null @@ -1,125 +0,0 @@ -[![GoDoc](http://godoc.org/github.com/robfig/cron?status.png)](http://godoc.org/github.com/robfig/cron) -[![Build Status](https://travis-ci.org/robfig/cron.svg?branch=master)](https://travis-ci.org/robfig/cron) - -# cron - -Cron V3 has been released! - -To download the specific tagged release, run: - - go get github.com/robfig/cron/v3@v3.0.0 - -Import it in your program as: - - import "github.com/robfig/cron/v3" - -It requires Go 1.11 or later due to usage of Go Modules. - -Refer to the documentation here: -http://godoc.org/github.com/robfig/cron - -The rest of this document describes the the advances in v3 and a list of -breaking changes for users that wish to upgrade from an earlier version. - -## Upgrading to v3 (June 2019) - -cron v3 is a major upgrade to the library that addresses all outstanding bugs, -feature requests, and rough edges. It is based on a merge of master which -contains various fixes to issues found over the years and the v2 branch which -contains some backwards-incompatible features like the ability to remove cron -jobs. In addition, v3 adds support for Go Modules, cleans up rough edges like -the timezone support, and fixes a number of bugs. - -New features: - -- Support for Go modules. Callers must now import this library as - `github.com/robfig/cron/v3`, instead of `gopkg.in/...` - -- Fixed bugs: - - 0f01e6b parser: fix combining of Dow and Dom (#70) - - dbf3220 adjust times when rolling the clock forward to handle non-existent midnight (#157) - - eeecf15 spec_test.go: ensure an error is returned on 0 increment (#144) - - 70971dc cron.Entries(): update request for snapshot to include a reply channel (#97) - - 1cba5e6 cron: fix: removing a job causes the next scheduled job to run too late (#206) - -- Standard cron spec parsing by default (first field is "minute"), with an easy - way to opt into the seconds field (quartz-compatible). Although, note that the - year field (optional in Quartz) is not supported. - -- Extensible, key/value logging via an interface that complies with - the https://github.com/go-logr/logr project. - -- The new Chain & JobWrapper types allow you to install "interceptors" to add - cross-cutting behavior like the following: - - Recover any panics from jobs - - Delay a job's execution if the previous run hasn't completed yet - - Skip a job's execution if the previous run hasn't completed yet - - Log each job's invocations - - Notification when jobs are completed - -It is backwards incompatible with both v1 and v2. These updates are required: - -- The v1 branch accepted an optional seconds field at the beginning of the cron - spec. This is non-standard and has led to a lot of confusion. The new default - parser conforms to the standard as described by [the Cron wikipedia page]. - - UPDATING: To retain the old behavior, construct your Cron with a custom - parser: - - // Seconds field, required - cron.New(cron.WithSeconds()) - - // Seconds field, optional - cron.New( - cron.WithParser( - cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)) - -- The Cron type now accepts functional options on construction rather than the - previous ad-hoc behavior modification mechanisms (setting a field, calling a setter). - - UPDATING: Code that sets Cron.ErrorLogger or calls Cron.SetLocation must be - updated to provide those values on construction. - -- CRON_TZ is now the recommended way to specify the timezone of a single - schedule, which is sanctioned by the specification. The legacy "TZ=" prefix - will continue to be supported since it is unambiguous and easy to do so. - - UPDATING: No update is required. - -- By default, cron will no longer recover panics in jobs that it runs. - Recovering can be surprising (see issue #192) and seems to be at odds with - typical behavior of libraries. Relatedly, the `cron.WithPanicLogger` option - has been removed to accommodate the more general JobWrapper type. - - UPDATING: To opt into panic recovery and configure the panic logger: - - cron.New(cron.WithChain( - cron.Recover(logger), // or use cron.DefaultLogger - )) - -- In adding support for https://github.com/go-logr/logr, `cron.WithVerboseLogger` was - removed, since it is duplicative with the leveled logging. - - UPDATING: Callers should use `WithLogger` and specify a logger that does not - discard `Info` logs. For convenience, one is provided that wraps `*log.Logger`: - - cron.New( - cron.WithLogger(cron.VerbosePrintfLogger(logger))) - - -### Background - Cron spec format - -There are two cron spec formats in common usage: - -- The "standard" cron format, described on [the Cron wikipedia page] and used by - the cron Linux system utility. - -- The cron format used by [the Quartz Scheduler], commonly used for scheduled - jobs in Java software - -[the Cron wikipedia page]: https://en.wikipedia.org/wiki/Cron -[the Quartz Scheduler]: http://www.quartz-scheduler.org/documentation/quartz-2.x/tutorials/crontrigger.html - -The original version of this package included an optional "seconds" field, which -made it incompatible with both of these formats. Now, the "standard" format is -the default format accepted, and the Quartz format is opt-in. diff --git a/backend/lib/cron/chain.go b/backend/lib/cron/chain.go deleted file mode 100644 index 118e5bbe..00000000 --- a/backend/lib/cron/chain.go +++ /dev/null @@ -1,92 +0,0 @@ -package cron - -import ( - "fmt" - "runtime" - "sync" - "time" -) - -// JobWrapper decorates the given Job with some behavior. -type JobWrapper func(Job) Job - -// Chain is a sequence of JobWrappers that decorates submitted jobs with -// cross-cutting behaviors like logging or synchronization. -type Chain struct { - wrappers []JobWrapper -} - -// NewChain returns a Chain consisting of the given JobWrappers. -func NewChain(c ...JobWrapper) Chain { - return Chain{c} -} - -// Then decorates the given job with all JobWrappers in the chain. -// -// This: -// NewChain(m1, m2, m3).Then(job) -// is equivalent to: -// m1(m2(m3(job))) -func (c Chain) Then(j Job) Job { - for i := range c.wrappers { - j = c.wrappers[len(c.wrappers)-i-1](j) - } - return j -} - -// Recover panics in wrapped jobs and log them with the provided logger. -func Recover(logger Logger) JobWrapper { - return func(j Job) Job { - return FuncJob(func() { - defer func() { - if r := recover(); r != nil { - const size = 64 << 10 - buf := make([]byte, size) - buf = buf[:runtime.Stack(buf, false)] - err, ok := r.(error) - if !ok { - err = fmt.Errorf("%v", r) - } - logger.Error(err, "panic", "stack", "...\n"+string(buf)) - } - }() - j.Run() - }) - } -} - -// DelayIfStillRunning serializes jobs, delaying subsequent runs until the -// previous one is complete. Jobs running after a delay of more than a minute -// have the delay logged at Info. -func DelayIfStillRunning(logger Logger) JobWrapper { - return func(j Job) Job { - var mu sync.Mutex - return FuncJob(func() { - start := time.Now() - mu.Lock() - defer mu.Unlock() - if dur := time.Since(start); dur > time.Minute { - logger.Info("delay", "duration", dur) - } - j.Run() - }) - } -} - -// SkipIfStillRunning skips an invocation of the Job if a previous invocation is -// still running. It logs skips to the given logger at Info level. -func SkipIfStillRunning(logger Logger) JobWrapper { - var ch = make(chan struct{}, 1) - ch <- struct{}{} - return func(j Job) Job { - return FuncJob(func() { - select { - case v := <-ch: - j.Run() - ch <- v - default: - logger.Info("skip") - } - }) - } -} diff --git a/backend/lib/cron/chain_test.go b/backend/lib/cron/chain_test.go deleted file mode 100644 index 2561bd7f..00000000 --- a/backend/lib/cron/chain_test.go +++ /dev/null @@ -1,221 +0,0 @@ -package cron - -import ( - "io/ioutil" - "log" - "reflect" - "sync" - "testing" - "time" -) - -func appendingJob(slice *[]int, value int) Job { - var m sync.Mutex - return FuncJob(func() { - m.Lock() - *slice = append(*slice, value) - m.Unlock() - }) -} - -func appendingWrapper(slice *[]int, value int) JobWrapper { - return func(j Job) Job { - return FuncJob(func() { - appendingJob(slice, value).Run() - j.Run() - }) - } -} - -func TestChain(t *testing.T) { - var nums []int - var ( - append1 = appendingWrapper(&nums, 1) - append2 = appendingWrapper(&nums, 2) - append3 = appendingWrapper(&nums, 3) - append4 = appendingJob(&nums, 4) - ) - NewChain(append1, append2, append3).Then(append4).Run() - if !reflect.DeepEqual(nums, []int{1, 2, 3, 4}) { - t.Error("unexpected order of calls:", nums) - } -} - -func TestChainRecover(t *testing.T) { - panickingJob := FuncJob(func() { - panic("panickingJob panics") - }) - - t.Run("panic exits job by default", func(t *testing.T) { - defer func() { - if err := recover(); err == nil { - t.Errorf("panic expected, but none received") - } - }() - NewChain().Then(panickingJob). - Run() - }) - - t.Run("Recovering JobWrapper recovers", func(t *testing.T) { - NewChain(Recover(PrintfLogger(log.New(ioutil.Discard, "", 0)))). - Then(panickingJob). - Run() - }) - - t.Run("composed with the *IfStillRunning wrappers", func(t *testing.T) { - NewChain(Recover(PrintfLogger(log.New(ioutil.Discard, "", 0)))). - Then(panickingJob). - Run() - }) -} - -type countJob struct { - m sync.Mutex - started int - done int - delay time.Duration -} - -func (j *countJob) Run() { - j.m.Lock() - j.started++ - j.m.Unlock() - time.Sleep(j.delay) - j.m.Lock() - j.done++ - j.m.Unlock() -} - -func (j *countJob) Started() int { - defer j.m.Unlock() - j.m.Lock() - return j.started -} - -func (j *countJob) Done() int { - defer j.m.Unlock() - j.m.Lock() - return j.done -} - -func TestChainDelayIfStillRunning(t *testing.T) { - - t.Run("runs immediately", func(t *testing.T) { - var j countJob - wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j) - go wrappedJob.Run() - time.Sleep(2 * time.Millisecond) // Give the job 2ms to complete. - if c := j.Done(); c != 1 { - t.Errorf("expected job run once, immediately, got %d", c) - } - }) - - t.Run("second run immediate if first done", func(t *testing.T) { - var j countJob - wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j) - go func() { - go wrappedJob.Run() - time.Sleep(time.Millisecond) - go wrappedJob.Run() - }() - time.Sleep(3 * time.Millisecond) // Give both jobs 3ms to complete. - if c := j.Done(); c != 2 { - t.Errorf("expected job run twice, immediately, got %d", c) - } - }) - - t.Run("second run delayed if first not done", func(t *testing.T) { - var j countJob - j.delay = 10 * time.Millisecond - wrappedJob := NewChain(DelayIfStillRunning(DiscardLogger)).Then(&j) - go func() { - go wrappedJob.Run() - time.Sleep(time.Millisecond) - go wrappedJob.Run() - }() - - // After 5ms, the first job is still in progress, and the second job was - // run but should be waiting for it to finish. - time.Sleep(5 * time.Millisecond) - started, done := j.Started(), j.Done() - if started != 1 || done != 0 { - t.Error("expected first job started, but not finished, got", started, done) - } - - // Verify that the second job completes. - time.Sleep(25 * time.Millisecond) - started, done = j.Started(), j.Done() - if started != 2 || done != 2 { - t.Error("expected both jobs done, got", started, done) - } - }) - -} - -func TestChainSkipIfStillRunning(t *testing.T) { - - t.Run("runs immediately", func(t *testing.T) { - var j countJob - wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j) - go wrappedJob.Run() - time.Sleep(2 * time.Millisecond) // Give the job 2ms to complete. - if c := j.Done(); c != 1 { - t.Errorf("expected job run once, immediately, got %d", c) - } - }) - - t.Run("second run immediate if first done", func(t *testing.T) { - var j countJob - wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j) - go func() { - go wrappedJob.Run() - time.Sleep(time.Millisecond) - go wrappedJob.Run() - }() - time.Sleep(3 * time.Millisecond) // Give both jobs 3ms to complete. - if c := j.Done(); c != 2 { - t.Errorf("expected job run twice, immediately, got %d", c) - } - }) - - t.Run("second run skipped if first not done", func(t *testing.T) { - var j countJob - j.delay = 10 * time.Millisecond - wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j) - go func() { - go wrappedJob.Run() - time.Sleep(time.Millisecond) - go wrappedJob.Run() - }() - - // After 5ms, the first job is still in progress, and the second job was - // aleady skipped. - time.Sleep(5 * time.Millisecond) - started, done := j.Started(), j.Done() - if started != 1 || done != 0 { - t.Error("expected first job started, but not finished, got", started, done) - } - - // Verify that the first job completes and second does not run. - time.Sleep(25 * time.Millisecond) - started, done = j.Started(), j.Done() - if started != 1 || done != 1 { - t.Error("expected second job skipped, got", started, done) - } - }) - - t.Run("skip 10 jobs on rapid fire", func(t *testing.T) { - var j countJob - j.delay = 10 * time.Millisecond - wrappedJob := NewChain(SkipIfStillRunning(DiscardLogger)).Then(&j) - for i := 0; i < 11; i++ { - go wrappedJob.Run() - } - time.Sleep(200 * time.Millisecond) - done := j.Done() - if done != 1 { - t.Error("expected 1 jobs executed, 10 jobs dropped, got", done) - } - }) - -} diff --git a/backend/lib/cron/constantdelay.go b/backend/lib/cron/constantdelay.go deleted file mode 100644 index cd6e7b1b..00000000 --- a/backend/lib/cron/constantdelay.go +++ /dev/null @@ -1,27 +0,0 @@ -package cron - -import "time" - -// ConstantDelaySchedule represents a simple recurring duty cycle, e.g. "Every 5 minutes". -// It does not support jobs more frequent than once a second. -type ConstantDelaySchedule struct { - Delay time.Duration -} - -// Every returns a crontab Schedule that activates once every duration. -// Delays of less than a second are not supported (will round up to 1 second). -// Any fields less than a Second are truncated. -func Every(duration time.Duration) ConstantDelaySchedule { - if duration < time.Second { - duration = time.Second - } - return ConstantDelaySchedule{ - Delay: duration - time.Duration(duration.Nanoseconds())%time.Second, - } -} - -// Next returns the next time this should be run. -// This rounds so that the next activation time will be on the second. -func (schedule ConstantDelaySchedule) Next(t time.Time) time.Time { - return t.Add(schedule.Delay - time.Duration(t.Nanosecond())*time.Nanosecond) -} diff --git a/backend/lib/cron/constantdelay_test.go b/backend/lib/cron/constantdelay_test.go deleted file mode 100644 index f43a58ad..00000000 --- a/backend/lib/cron/constantdelay_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package cron - -import ( - "testing" - "time" -) - -func TestConstantDelayNext(t *testing.T) { - tests := []struct { - time string - delay time.Duration - expected string - }{ - // Simple cases - {"Mon Jul 9 14:45 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"}, - {"Mon Jul 9 14:59 2012", 15 * time.Minute, "Mon Jul 9 15:14 2012"}, - {"Mon Jul 9 14:59:59 2012", 15 * time.Minute, "Mon Jul 9 15:14:59 2012"}, - - // Wrap around hours - {"Mon Jul 9 15:45 2012", 35 * time.Minute, "Mon Jul 9 16:20 2012"}, - - // Wrap around days - {"Mon Jul 9 23:46 2012", 14 * time.Minute, "Tue Jul 10 00:00 2012"}, - {"Mon Jul 9 23:45 2012", 35 * time.Minute, "Tue Jul 10 00:20 2012"}, - {"Mon Jul 9 23:35:51 2012", 44*time.Minute + 24*time.Second, "Tue Jul 10 00:20:15 2012"}, - {"Mon Jul 9 23:35:51 2012", 25*time.Hour + 44*time.Minute + 24*time.Second, "Thu Jul 11 01:20:15 2012"}, - - // Wrap around months - {"Mon Jul 9 23:35 2012", 91*24*time.Hour + 25*time.Minute, "Thu Oct 9 00:00 2012"}, - - // Wrap around minute, hour, day, month, and year - {"Mon Dec 31 23:59:45 2012", 15 * time.Second, "Tue Jan 1 00:00:00 2013"}, - - // Round to nearest second on the delay - {"Mon Jul 9 14:45 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"}, - - // Round up to 1 second if the duration is less. - {"Mon Jul 9 14:45:00 2012", 15 * time.Millisecond, "Mon Jul 9 14:45:01 2012"}, - - // Round to nearest second when calculating the next time. - {"Mon Jul 9 14:45:00.005 2012", 15 * time.Minute, "Mon Jul 9 15:00 2012"}, - - // Round to nearest second for both. - {"Mon Jul 9 14:45:00.005 2012", 15*time.Minute + 50*time.Nanosecond, "Mon Jul 9 15:00 2012"}, - } - - for _, c := range tests { - actual := Every(c.delay).Next(getTime(c.time)) - expected := getTime(c.expected) - if actual != expected { - t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.delay, expected, actual) - } - } -} diff --git a/backend/lib/cron/cron.go b/backend/lib/cron/cron.go deleted file mode 100644 index f6e451db..00000000 --- a/backend/lib/cron/cron.go +++ /dev/null @@ -1,350 +0,0 @@ -package cron - -import ( - "context" - "sort" - "sync" - "time" -) - -// Cron keeps track of any number of entries, invoking the associated func as -// specified by the schedule. It may be started, stopped, and the entries may -// be inspected while running. -type Cron struct { - entries []*Entry - chain Chain - stop chan struct{} - add chan *Entry - remove chan EntryID - snapshot chan chan []Entry - running bool - logger Logger - runningMu sync.Mutex - location *time.Location - parser Parser - nextID EntryID - jobWaiter sync.WaitGroup -} - -// Job is an interface for submitted cron jobs. -type Job interface { - Run() -} - -// Schedule describes a job's duty cycle. -type Schedule interface { - // Next returns the next activation time, later than the given time. - // Next is invoked initially, and then each time the job is run. - Next(time.Time) time.Time -} - -// EntryID identifies an entry within a Cron instance -type EntryID int - -// Entry consists of a schedule and the func to execute on that schedule. -type Entry struct { - // ID is the cron-assigned ID of this entry, which may be used to look up a - // snapshot or remove it. - ID EntryID - - // Schedule on which this job should be run. - Schedule Schedule - - // Next time the job will run, or the zero time if Cron has not been - // started or this entry's schedule is unsatisfiable - Next time.Time - - // Prev is the last time this job was run, or the zero time if never. - Prev time.Time - - // WrappedJob is the thing to run when the Schedule is activated. - WrappedJob Job - - // Job is the thing that was submitted to cron. - // It is kept around so that user code that needs to get at the job later, - // e.g. via Entries() can do so. - Job Job -} - -// Valid returns true if this is not the zero entry. -func (e Entry) Valid() bool { return e.ID != 0 } - -// byTime is a wrapper for sorting the entry array by time -// (with zero time at the end). -type byTime []*Entry - -func (s byTime) Len() int { return len(s) } -func (s byTime) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s byTime) Less(i, j int) bool { - // Two zero times should return false. - // Otherwise, zero is "greater" than any other time. - // (To sort it at the end of the list.) - if s[i].Next.IsZero() { - return false - } - if s[j].Next.IsZero() { - return true - } - return s[i].Next.Before(s[j].Next) -} - -// New returns a new Cron job runner, modified by the given options. -// -// Available Settings -// -// Time Zone -// Description: The time zone in which schedules are interpreted -// Default: time.Local -// -// Parser -// Description: Parser converts cron spec strings into cron.Schedules. -// Default: Accepts this spec: https://en.wikipedia.org/wiki/Cron -// -// Chain -// Description: Wrap submitted jobs to customize behavior. -// Default: A chain that recovers panics and logs them to stderr. -// -// See "cron.With*" to modify the default behavior. -func New(opts ...Option) *Cron { - c := &Cron{ - entries: nil, - chain: NewChain(), - add: make(chan *Entry), - stop: make(chan struct{}), - snapshot: make(chan chan []Entry), - remove: make(chan EntryID), - running: false, - runningMu: sync.Mutex{}, - logger: DefaultLogger, - location: time.Local, - parser: standardParser, - } - for _, opt := range opts { - opt(c) - } - return c -} - -// FuncJob is a wrapper that turns a func() into a cron.Job -type FuncJob func() - -func (f FuncJob) Run() { f() } - -// AddFunc adds a func to the Cron to be run on the given schedule. -// The spec is parsed using the time zone of this Cron instance as the default. -// An opaque ID is returned that can be used to later remove it. -func (c *Cron) AddFunc(spec string, cmd func()) (EntryID, error) { - return c.AddJob(spec, FuncJob(cmd)) -} - -// AddJob adds a Job to the Cron to be run on the given schedule. -// The spec is parsed using the time zone of this Cron instance as the default. -// An opaque ID is returned that can be used to later remove it. -func (c *Cron) AddJob(spec string, cmd Job) (EntryID, error) { - schedule, err := c.parser.Parse(spec) - if err != nil { - return 0, err - } - return c.Schedule(schedule, cmd), nil -} - -// Schedule adds a Job to the Cron to be run on the given schedule. -// The job is wrapped with the configured Chain. -func (c *Cron) Schedule(schedule Schedule, cmd Job) EntryID { - c.runningMu.Lock() - defer c.runningMu.Unlock() - c.nextID++ - entry := &Entry{ - ID: c.nextID, - Schedule: schedule, - WrappedJob: c.chain.Then(cmd), - Job: cmd, - } - if !c.running { - c.entries = append(c.entries, entry) - } else { - c.add <- entry - } - return entry.ID -} - -// Entries returns a snapshot of the cron entries. -func (c *Cron) Entries() []Entry { - c.runningMu.Lock() - defer c.runningMu.Unlock() - if c.running { - replyChan := make(chan []Entry, 1) - c.snapshot <- replyChan - return <-replyChan - } - return c.entrySnapshot() -} - -// Location gets the time zone location -func (c *Cron) Location() *time.Location { - return c.location -} - -// Entry returns a snapshot of the given entry, or nil if it couldn't be found. -func (c *Cron) Entry(id EntryID) Entry { - for _, entry := range c.Entries() { - if id == entry.ID { - return entry - } - } - return Entry{} -} - -// Remove an entry from being run in the future. -func (c *Cron) Remove(id EntryID) { - c.runningMu.Lock() - defer c.runningMu.Unlock() - if c.running { - c.remove <- id - } else { - c.removeEntry(id) - } -} - -// Start the cron scheduler in its own goroutine, or no-op if already started. -func (c *Cron) Start() { - c.runningMu.Lock() - defer c.runningMu.Unlock() - if c.running { - return - } - c.running = true - go c.run() -} - -// Run the cron scheduler, or no-op if already running. -func (c *Cron) Run() { - c.runningMu.Lock() - if c.running { - c.runningMu.Unlock() - return - } - c.running = true - c.runningMu.Unlock() - c.run() -} - -// run the scheduler.. this is private just due to the need to synchronize -// access to the 'running' state variable. -func (c *Cron) run() { - c.logger.Info("start") - - // Figure out the next activation times for each entry. - now := c.now() - for _, entry := range c.entries { - entry.Next = entry.Schedule.Next(now) - c.logger.Info("schedule", "now", now, "entry", entry.ID, "next", entry.Next) - } - - for { - // Determine the next entry to run. - sort.Sort(byTime(c.entries)) - - var timer *time.Timer - if len(c.entries) == 0 || c.entries[0].Next.IsZero() { - // If there are no entries yet, just sleep - it still handles new entries - // and stop requests. - timer = time.NewTimer(100000 * time.Hour) - } else { - timer = time.NewTimer(c.entries[0].Next.Sub(now)) - } - - for { - select { - case now = <-timer.C: - now = now.In(c.location) - c.logger.Info("wake", "now", now) - - // Run every entry whose next time was less than now - for _, e := range c.entries { - if e.Next.After(now) || e.Next.IsZero() { - break - } - c.startJob(e.WrappedJob) - e.Prev = e.Next - e.Next = e.Schedule.Next(now) - c.logger.Info("run", "now", now, "entry", e.ID, "next", e.Next) - } - - case newEntry := <-c.add: - timer.Stop() - now = c.now() - newEntry.Next = newEntry.Schedule.Next(now) - c.entries = append(c.entries, newEntry) - c.logger.Info("added", "now", now, "entry", newEntry.ID, "next", newEntry.Next) - - case replyChan := <-c.snapshot: - replyChan <- c.entrySnapshot() - continue - - case <-c.stop: - timer.Stop() - c.logger.Info("stop") - return - - case id := <-c.remove: - timer.Stop() - now = c.now() - c.removeEntry(id) - c.logger.Info("removed", "entry", id) - } - - break - } - } -} - -// startJob runs the given job in a new goroutine. -func (c *Cron) startJob(j Job) { - c.jobWaiter.Add(1) - go func() { - defer c.jobWaiter.Done() - j.Run() - }() -} - -// now returns current time in c location -func (c *Cron) now() time.Time { - return time.Now().In(c.location) -} - -// Stop stops the cron scheduler if it is running; otherwise it does nothing. -// A context is returned so the caller can wait for running jobs to complete. -func (c *Cron) Stop() context.Context { - c.runningMu.Lock() - defer c.runningMu.Unlock() - if c.running { - c.stop <- struct{}{} - c.running = false - } - ctx, cancel := context.WithCancel(context.Background()) - go func() { - c.jobWaiter.Wait() - cancel() - }() - return ctx -} - -// entrySnapshot returns a copy of the current cron entry list. -func (c *Cron) entrySnapshot() []Entry { - var entries = make([]Entry, len(c.entries)) - for i, e := range c.entries { - entries[i] = *e - } - return entries -} - -func (c *Cron) removeEntry(id EntryID) { - var entries []*Entry - for _, e := range c.entries { - if e.ID != id { - entries = append(entries, e) - } - } - c.entries = entries -} diff --git a/backend/lib/cron/cron_test.go b/backend/lib/cron/cron_test.go deleted file mode 100644 index 35266df1..00000000 --- a/backend/lib/cron/cron_test.go +++ /dev/null @@ -1,699 +0,0 @@ -package cron - -import ( - "bytes" - "fmt" - "log" - "strings" - "sync" - "sync/atomic" - "testing" - "time" -) - -// Many tests schedule a job for every second, and then wait at most a second -// for it to run. This amount is just slightly larger than 1 second to -// compensate for a few milliseconds of runtime. -const OneSecond = 1*time.Second + 50*time.Millisecond - -type syncWriter struct { - wr bytes.Buffer - m sync.Mutex -} - -func (sw *syncWriter) Write(data []byte) (n int, err error) { - sw.m.Lock() - n, err = sw.wr.Write(data) - sw.m.Unlock() - return -} - -func (sw *syncWriter) String() string { - sw.m.Lock() - defer sw.m.Unlock() - return sw.wr.String() -} - -func newBufLogger(sw *syncWriter) Logger { - return PrintfLogger(log.New(sw, "", log.LstdFlags)) -} - -func TestFuncPanicRecovery(t *testing.T) { - var buf syncWriter - cron := New(WithParser(secondParser), - WithChain(Recover(newBufLogger(&buf)))) - cron.Start() - defer cron.Stop() - _, _ = cron.AddFunc("* * * * * ?", func() { - panic("YOLO") - }) - <-time.After(OneSecond) - if !strings.Contains(buf.String(), "YOLO") { - t.Error("expected a panic to be logged, got none") - } - -} - -type DummyJob struct{} - -func (d DummyJob) Run() { - panic("YOLO") -} - -func TestJobPanicRecovery(t *testing.T) { - var job DummyJob - - var buf syncWriter - cron := New(WithParser(secondParser), - WithChain(Recover(newBufLogger(&buf)))) - cron.Start() - defer cron.Stop() - _, _ = cron.AddJob("* * * * * ?", job) - - select { - case <-time.After(OneSecond): - if !strings.Contains(buf.String(), "YOLO") { - t.Error("expected a panic to be logged, got none") - } - return - } -} - -// Start and stop cron with no entries. -func TestNoEntries(t *testing.T) { - cron := newWithSeconds() - cron.Start() - - select { - case <-time.After(OneSecond): - t.Fatal("expected cron will be stopped immediately") - case <-stop(cron): - } -} - -// Start, stop, then add an entry. Verify entry doesn't run. -func TestStopCausesJobsToNotRun(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - cron.Start() - cron.Stop() - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - - select { - case <-time.After(OneSecond): - // No job ran! - case <-wait(wg): - t.Fatal("expected stopped cron does not run any job") - } -} - -// Add a job, start cron, expect it runs. -func TestAddBeforeRunning(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - cron.Start() - defer cron.Stop() - - // Give cron 2 seconds to run our job (which is always activated). - select { - case <-time.After(OneSecond): - t.Fatal("expected job runs") - case <-wait(wg): - } -} - -// Start cron, add a job, expect it runs. -func TestAddWhileRunning(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - cron.Start() - defer cron.Stop() - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - - select { - case <-time.After(OneSecond): - t.Fatal("expected job runs") - case <-wait(wg): - } -} - -// Test for #34. Adding a job after calling start results in multiple job invocations -func TestAddWhileRunningWithDelay(t *testing.T) { - cron := newWithSeconds() - cron.Start() - defer cron.Stop() - time.Sleep(5 * time.Second) - var calls int64 - _, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) - - <-time.After(OneSecond) - if atomic.LoadInt64(&calls) != 1 { - t.Errorf("called %d times, expected 1\n", calls) - } -} - -// Add a job, remove a job, start cron, expect nothing runs. -func TestRemoveBeforeRunning(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - id, _ := cron.AddFunc("* * * * * ?", func() { wg.Done() }) - cron.Remove(id) - cron.Start() - defer cron.Stop() - - select { - case <-time.After(OneSecond): - // Success, shouldn't run - case <-wait(wg): - t.FailNow() - } -} - -// Start cron, add a job, remove it, expect it doesn't run. -func TestRemoveWhileRunning(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - cron.Start() - defer cron.Stop() - id, _ := cron.AddFunc("* * * * * ?", func() { wg.Done() }) - cron.Remove(id) - - select { - case <-time.After(OneSecond): - case <-wait(wg): - t.FailNow() - } -} - -// Test timing with Entries. -func TestSnapshotEntries(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := New() - _, _ = cron.AddFunc("@every 2s", func() { wg.Done() }) - cron.Start() - defer cron.Stop() - - // Cron should fire in 2 seconds. After 1 second, call Entries. - select { - case <-time.After(OneSecond): - cron.Entries() - } - - // Even though Entries was called, the cron should fire at the 2 second mark. - select { - case <-time.After(OneSecond): - t.Error("expected job runs at 2 second mark") - case <-wait(wg): - } -} - -// Test that the entries are correctly sorted. -// Add a bunch of long-in-the-future entries, and an immediate entry, and ensure -// that the immediate entry runs immediately. -// Also: Test that multiple jobs run in the same instant. -func TestMultipleEntries(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(2) - - cron := newWithSeconds() - _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - id1, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() }) - id2, _ := cron.AddFunc("* * * * * ?", func() { t.Fatal() }) - _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - - cron.Remove(id1) - cron.Start() - cron.Remove(id2) - defer cron.Stop() - - select { - case <-time.After(OneSecond): - t.Error("expected job run in proper order") - case <-wait(wg): - } -} - -// Test running the same job twice. -func TestRunningJobTwice(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(2) - - cron := newWithSeconds() - _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) - _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - - cron.Start() - defer cron.Stop() - - select { - case <-time.After(2 * OneSecond): - t.Error("expected job fires 2 times") - case <-wait(wg): - } -} - -func TestRunningMultipleSchedules(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(2) - - cron := newWithSeconds() - _, _ = cron.AddFunc("0 0 0 1 1 ?", func() {}) - _, _ = cron.AddFunc("0 0 0 31 12 ?", func() {}) - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - cron.Schedule(Every(time.Minute), FuncJob(func() {})) - cron.Schedule(Every(time.Second), FuncJob(func() { wg.Done() })) - cron.Schedule(Every(time.Hour), FuncJob(func() {})) - - cron.Start() - defer cron.Stop() - - select { - case <-time.After(2 * OneSecond): - t.Error("expected job fires 2 times") - case <-wait(wg): - } -} - -// Test that the cron is run in the local time zone (as opposed to UTC). -func TestLocalTimezone(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(2) - - now := time.Now() - // FIX: Issue #205 - // This calculation doesn't work in seconds 58 or 59. - // Take the easy way out and sleep. - if now.Second() >= 58 { - time.Sleep(2 * time.Second) - now = time.Now() - } - spec := fmt.Sprintf("%d,%d %d %d %d %d ?", - now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month()) - - cron := newWithSeconds() - _, _ = cron.AddFunc(spec, func() { wg.Done() }) - cron.Start() - defer cron.Stop() - - select { - case <-time.After(OneSecond * 2): - t.Error("expected job fires 2 times") - case <-wait(wg): - } -} - -// Test that the cron is run in the given time zone (as opposed to local). -func TestNonLocalTimezone(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(2) - - loc, err := time.LoadLocation("Atlantic/Cape_Verde") - if err != nil { - fmt.Printf("Failed to load time zone Atlantic/Cape_Verde: %+v", err) - t.Fail() - } - - now := time.Now().In(loc) - // FIX: Issue #205 - // This calculation doesn't work in seconds 58 or 59. - // Take the easy way out and sleep. - if now.Second() >= 58 { - time.Sleep(2 * time.Second) - now = time.Now().In(loc) - } - spec := fmt.Sprintf("%d,%d %d %d %d %d ?", - now.Second()+1, now.Second()+2, now.Minute(), now.Hour(), now.Day(), now.Month()) - - cron := New(WithLocation(loc), WithParser(secondParser)) - _, _ = cron.AddFunc(spec, func() { wg.Done() }) - cron.Start() - defer cron.Stop() - - select { - case <-time.After(OneSecond * 2): - t.Error("expected job fires 2 times") - case <-wait(wg): - } -} - -// Test that calling stop before start silently returns without -// blocking the stop channel. -func TestStopWithoutStart(t *testing.T) { - cron := New() - cron.Stop() -} - -type testJob struct { - wg *sync.WaitGroup - name string -} - -func (t testJob) Run() { - t.wg.Done() -} - -// Test that adding an invalid job spec returns an error -func TestInvalidJobSpec(t *testing.T) { - cron := New() - _, err := cron.AddJob("this will not parse", nil) - if err == nil { - t.Errorf("expected an error with invalid spec, got nil") - } -} - -// Test blocking run method behaves as Start() -func TestBlockingRun(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * ?", func() { wg.Done() }) - - var unblockChan = make(chan struct{}) - - go func() { - cron.Run() - close(unblockChan) - }() - defer cron.Stop() - - select { - case <-time.After(OneSecond): - t.Error("expected job fires") - case <-unblockChan: - t.Error("expected that Run() blocks") - case <-wait(wg): - } -} - -// Test that double-running is a no-op -func TestStartNoop(t *testing.T) { - var tickChan = make(chan struct{}, 2) - - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * ?", func() { - tickChan <- struct{}{} - }) - - cron.Start() - defer cron.Stop() - - // Wait for the first firing to ensure the runner is going - <-tickChan - - cron.Start() - - <-tickChan - - // Fail if this job fires again in a short period, indicating a double-run - select { - case <-time.After(time.Millisecond): - case <-tickChan: - t.Error("expected job fires exactly twice") - } -} - -// Simple test using Runnables. -func TestJob(t *testing.T) { - wg := &sync.WaitGroup{} - wg.Add(1) - - cron := newWithSeconds() - _, _ = cron.AddJob("0 0 0 30 Feb ?", testJob{wg, "job0"}) - _, _ = cron.AddJob("0 0 0 1 1 ?", testJob{wg, "job1"}) - job2, _ := cron.AddJob("* * * * * ?", testJob{wg, "job2"}) - _, _ = cron.AddJob("1 0 0 1 1 ?", testJob{wg, "job3"}) - cron.Schedule(Every(5*time.Second+5*time.Nanosecond), testJob{wg, "job4"}) - job5 := cron.Schedule(Every(5*time.Minute), testJob{wg, "job5"}) - - // Test getting an Entry pre-Start. - if actualName := cron.Entry(job2).Job.(testJob).name; actualName != "job2" { - t.Error("wrong job retrieved:", actualName) - } - if actualName := cron.Entry(job5).Job.(testJob).name; actualName != "job5" { - t.Error("wrong job retrieved:", actualName) - } - - cron.Start() - defer cron.Stop() - - select { - case <-time.After(OneSecond): - t.FailNow() - case <-wait(wg): - } - - // Ensure the entries are in the right order. - expecteds := []string{"job2", "job4", "job5", "job1", "job3", "job0"} - - var actuals = make([]string, 0, len(cron.Entries())) - for _, entry := range cron.Entries() { - actuals = append(actuals, entry.Job.(testJob).name) - } - - for i, expected := range expecteds { - if actuals[i] != expected { - t.Fatalf("Jobs not in the right order. (expected) %s != %s (actual)", expecteds, actuals) - } - } - - // Test getting Entries. - if actualName := cron.Entry(job2).Job.(testJob).name; actualName != "job2" { - t.Error("wrong job retrieved:", actualName) - } - if actualName := cron.Entry(job5).Job.(testJob).name; actualName != "job5" { - t.Error("wrong job retrieved:", actualName) - } -} - -// Issue #206 -// Ensure that the next run of a job after removing an entry is accurate. -func TestScheduleAfterRemoval(t *testing.T) { - var wg1 sync.WaitGroup - var wg2 sync.WaitGroup - wg1.Add(1) - wg2.Add(1) - - // The first time this job is run, set a timer and remove the other job - // 750ms later. Correct behavior would be to still run the job again in - // 250ms, but the bug would cause it to run instead 1s later. - - var calls int - var mu sync.Mutex - - cron := newWithSeconds() - hourJob := cron.Schedule(Every(time.Hour), FuncJob(func() {})) - cron.Schedule(Every(time.Second), FuncJob(func() { - mu.Lock() - defer mu.Unlock() - switch calls { - case 0: - wg1.Done() - calls++ - case 1: - time.Sleep(750 * time.Millisecond) - cron.Remove(hourJob) - calls++ - case 2: - calls++ - wg2.Done() - case 3: - panic("unexpected 3rd call") - } - })) - - cron.Start() - defer cron.Stop() - - // the first run might be any length of time 0 - 1s, since the schedule - // rounds to the second. wait for the first run to true up. - wg1.Wait() - - select { - case <-time.After(2 * OneSecond): - t.Error("expected job fires 2 times") - case <-wait(&wg2): - } -} - -type ZeroSchedule struct{} - -func (*ZeroSchedule) Next(time.Time) time.Time { - return time.Time{} -} - -// Tests that job without time does not run -func TestJobWithZeroTimeDoesNotRun(t *testing.T) { - cron := newWithSeconds() - var calls int64 - _, _ = cron.AddFunc("* * * * * *", func() { atomic.AddInt64(&calls, 1) }) - cron.Schedule(new(ZeroSchedule), FuncJob(func() { t.Error("expected zero task will not run") })) - cron.Start() - defer cron.Stop() - <-time.After(OneSecond) - if atomic.LoadInt64(&calls) != 1 { - t.Errorf("called %d times, expected 1\n", calls) - } -} - -func TestStopAndWait(t *testing.T) { - t.Run("nothing running, returns immediately", func(t *testing.T) { - cron := newWithSeconds() - cron.Start() - ctx := cron.Stop() - select { - case <-ctx.Done(): - case <-time.After(time.Millisecond): - t.Error("context was not done immediately") - } - }) - - t.Run("repeated calls to Stop", func(t *testing.T) { - cron := newWithSeconds() - cron.Start() - _ = cron.Stop() - time.Sleep(time.Millisecond) - ctx := cron.Stop() - select { - case <-ctx.Done(): - case <-time.After(time.Millisecond): - t.Error("context was not done immediately") - } - }) - - t.Run("a couple fast jobs added, still returns immediately", func(t *testing.T) { - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * *", func() {}) - cron.Start() - _, _ = cron.AddFunc("* * * * * *", func() {}) - _, _ = cron.AddFunc("* * * * * *", func() {}) - _, _ = cron.AddFunc("* * * * * *", func() {}) - time.Sleep(time.Second) - ctx := cron.Stop() - select { - case <-ctx.Done(): - case <-time.After(time.Millisecond): - t.Error("context was not done immediately") - } - }) - - t.Run("a couple fast jobs and a slow job added, waits for slow job", func(t *testing.T) { - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * *", func() {}) - cron.Start() - _, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) - _, _ = cron.AddFunc("* * * * * *", func() {}) - time.Sleep(time.Second) - - ctx := cron.Stop() - - // Verify that it is not done for at least 750ms - select { - case <-ctx.Done(): - t.Error("context was done too quickly immediately") - case <-time.After(750 * time.Millisecond): - // expected, because the job sleeping for 1 second is still running - } - - // Verify that it IS done in the next 500ms (giving 250ms buffer) - select { - case <-ctx.Done(): - // expected - case <-time.After(1500 * time.Millisecond): - t.Error("context not done after job should have completed") - } - }) - - t.Run("repeated calls to stop, waiting for completion and after", func(t *testing.T) { - cron := newWithSeconds() - _, _ = cron.AddFunc("* * * * * *", func() {}) - _, _ = cron.AddFunc("* * * * * *", func() { time.Sleep(2 * time.Second) }) - cron.Start() - _, _ = cron.AddFunc("* * * * * *", func() {}) - time.Sleep(time.Second) - ctx := cron.Stop() - ctx2 := cron.Stop() - - // Verify that it is not done for at least 1500ms - select { - case <-ctx.Done(): - t.Error("context was done too quickly immediately") - case <-ctx2.Done(): - t.Error("context2 was done too quickly immediately") - case <-time.After(1500 * time.Millisecond): - // expected, because the job sleeping for 2 seconds is still running - } - - // Verify that it IS done in the next 1s (giving 500ms buffer) - select { - case <-ctx.Done(): - // expected - case <-time.After(time.Second): - t.Error("context not done after job should have completed") - } - - // Verify that ctx2 is also done. - select { - case <-ctx2.Done(): - // expected - case <-time.After(time.Millisecond): - t.Error("context2 not done even though context1 is") - } - - // Verify that a new context retrieved from stop is immediately done. - ctx3 := cron.Stop() - select { - case <-ctx3.Done(): - // expected - case <-time.After(time.Millisecond): - t.Error("context not done even when cron Stop is completed") - } - - }) -} - -func TestMultiThreadedStartAndStop(t *testing.T) { - cron := New() - go cron.Run() - time.Sleep(2 * time.Millisecond) - cron.Stop() -} - -func wait(wg *sync.WaitGroup) chan bool { - ch := make(chan bool) - go func() { - wg.Wait() - ch <- true - }() - return ch -} - -func stop(cron *Cron) chan bool { - ch := make(chan bool) - go func() { - cron.Stop() - ch <- true - }() - return ch -} - -// newWithSeconds returns a Cron with the seconds field enabled. -func newWithSeconds() *Cron { - return New(WithParser(secondParser), WithChain()) -} diff --git a/backend/lib/cron/doc.go b/backend/lib/cron/doc.go deleted file mode 100644 index fbee72c1..00000000 --- a/backend/lib/cron/doc.go +++ /dev/null @@ -1,212 +0,0 @@ -/* -Package cron implements a cron spec parser and job runner. - -Usage - -Callers may register Funcs to be invoked on a given schedule. Cron will run -them in their own goroutines. - - c := cron.New() - c.AddFunc("30 * * * *", func() { fmt.Println("Every hour on the half hour") }) - c.AddFunc("30 3-6,20-23 * * *", func() { fmt.Println(".. in the range 3-6am, 8-11pm") }) - c.AddFunc("CRON_TZ=Asia/Tokyo 30 04 * * * *", func() { fmt.Println("Runs at 04:30 Tokyo time every day") }) - c.AddFunc("@hourly", func() { fmt.Println("Every hour, starting an hour from now") }) - c.AddFunc("@every 1h30m", func() { fmt.Println("Every hour thirty, starting an hour thirty from now") }) - c.Start() - .. - // Funcs are invoked in their own goroutine, asynchronously. - ... - // Funcs may also be added to a running Cron - c.AddFunc("@daily", func() { fmt.Println("Every day") }) - .. - // Inspect the cron job entries' next and previous run times. - inspect(c.Entries()) - .. - c.Stop() // Stop the scheduler (does not stop any jobs already running). - -CRON Expression Format - -A cron expression represents a set of times, using 5 space-separated fields. - - Field name | Mandatory? | Allowed values | Allowed special characters - ---------- | ---------- | -------------- | -------------------------- - Minutes | Yes | 0-59 | * / , - - Hours | Yes | 0-23 | * / , - - Day of month | Yes | 1-31 | * / , - ? - Month | Yes | 1-12 or JAN-DEC | * / , - - Day of week | Yes | 0-6 or SUN-SAT | * / , - ? - -Month and Day-of-week field values are case insensitive. "SUN", "Sun", and -"sun" are equally accepted. - -The specific interpretation of the format is based on the Cron Wikipedia page: -https://en.wikipedia.org/wiki/Cron - -Alternative Formats - -Alternative Cron expression formats support other fields like seconds. You can -implement that by creating a custom Parser as follows. - - cron.New( - cron.WithParser( - cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)) - -The most popular alternative Cron expression format is Quartz: -http://www.quartz-scheduler.org/documentation/quartz-2.x/tutorials/crontrigger.html - -Special Characters - -Asterisk ( * ) - -The asterisk indicates that the cron expression will match for all values of the -field; e.g., using an asterisk in the 5th field (month) would indicate every -month. - -Slash ( / ) - -Slashes are used to describe increments of ranges. For example 3-59/15 in the -1st field (minutes) would indicate the 3rd minute of the hour and every 15 -minutes thereafter. The form "*\/..." is equivalent to the form "first-last/...", -that is, an increment over the largest possible range of the field. The form -"N/..." is accepted as meaning "N-MAX/...", that is, starting at N, use the -increment until the end of that specific range. It does not wrap around. - -Comma ( , ) - -Commas are used to separate items of a list. For example, using "MON,WED,FRI" in -the 5th field (day of week) would mean Mondays, Wednesdays and Fridays. - -Hyphen ( - ) - -Hyphens are used to define ranges. For example, 9-17 would indicate every -hour between 9am and 5pm inclusive. - -Question mark ( ? ) - -Question mark may be used instead of '*' for leaving either day-of-month or -day-of-week blank. - -Predefined schedules - -You may use one of several pre-defined schedules in place of a cron expression. - - Entry | Description | Equivalent To - ----- | ----------- | ------------- - @yearly (or @annually) | Run once a year, midnight, Jan. 1st | 0 0 1 1 * - @monthly | Run once a month, midnight, first of month | 0 0 1 * * - @weekly | Run once a week, midnight between Sat/Sun | 0 0 * * 0 - @daily (or @midnight) | Run once a day, midnight | 0 0 * * * - @hourly | Run once an hour, beginning of hour | 0 * * * * - -Intervals - -You may also schedule a job to execute at fixed intervals, starting at the time it's added -or cron is run. This is supported by formatting the cron spec like this: - - @every - -where "duration" is a string accepted by time.ParseDuration -(http://golang.org/pkg/time/#ParseDuration). - -For example, "@every 1h30m10s" would indicate a schedule that activates after -1 hour, 30 minutes, 10 seconds, and then every interval after that. - -Note: The interval does not take the job runtime into account. For example, -if a job takes 3 minutes to run, and it is scheduled to run every 5 minutes, -it will have only 2 minutes of idle time between each run. - -Time zones - -By default, all interpretation and scheduling is done in the machine's local -time zone (time.Local). You can specify a different time zone on construction: - - cron.New( - cron.WithLocation(time.UTC)) - -Individual cron schedules may also override the time zone they are to be -interpreted in by providing an additional space-separated field at the beginning -of the cron spec, of the form "CRON_TZ=Asia/Tokyo". - -For example: - - # Runs at 6am in time.Local - cron.New().AddFunc("0 6 * * ?", ...) - - # Runs at 6am in America/New_York - nyc, _ := time.LoadLocation("America/New_York") - c := cron.New(cron.WithLocation(nyc)) - c.AddFunc("0 6 * * ?", ...) - - # Runs at 6am in Asia/Tokyo - cron.New().AddFunc("CRON_TZ=Asia/Tokyo 0 6 * * ?", ...) - - # Runs at 6am in Asia/Tokyo - c := cron.New(cron.WithLocation(nyc)) - c.SetLocation("America/New_York") - c.AddFunc("CRON_TZ=Asia/Tokyo 0 6 * * ?", ...) - -The prefix "TZ=(TIME ZONE)" is also supported for legacy compatibility. - -Be aware that jobs scheduled during daylight-savings leap-ahead transitions will -not be run! - -Job Wrappers / Chain - -A Cron runner may be configured with a chain of job wrappers to add -cross-cutting functionality to all submitted jobs. For example, they may be used -to achieve the following effects: - - - Recover any panics from jobs (activated by default) - - Delay a job's execution if the previous run hasn't completed yet - - Skip a job's execution if the previous run hasn't completed yet - - Log each job's invocations - -Install wrappers for all jobs added to a cron using the `cron.WithChain` option: - - cron.New(cron.WithChain( - cron.SkipIfStillRunning(logger), - )) - -Install wrappers for individual jobs by explicitly wrapping them: - - job = cron.NewChain( - cron.SkipIfStillRunning(logger), - ).Then(job) - -Thread safety - -Since the Cron service runs concurrently with the calling code, some amount of -care must be taken to ensure proper synchronization. - -All cron methods are designed to be correctly synchronized as long as the caller -ensures that invocations have a clear happens-before ordering between them. - -Logging - -Cron defines a Logger interface that is a subset of the one defined in -github.com/go-logr/logr. It has two logging levels (Info and Error), and -parameters are key/value pairs. This makes it possible for cron logging to plug -into structured logging systems. An adapter, [Verbose]PrintfLogger, is provided -to wrap the standard library *log.Logger. - -For additional insight into Cron operations, verbose logging may be activated -which will record job runs, scheduling decisions, and added or removed jobs. -Activate it with a one-off logger as follows: - - cron.New( - cron.WithLogger( - cron.VerbosePrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags)))) - - -Implementation - -Cron entries are stored in an array, sorted by their next activation time. Cron -sleeps until the next job is due to be run. - -Upon waking: - - it runs each entry that is active on that second - - it calculates the next run times for the jobs that were run - - it re-sorts the array of entries by next activation time. - - it goes to sleep until the soonest job. -*/ -package cron diff --git a/backend/lib/cron/logger.go b/backend/lib/cron/logger.go deleted file mode 100644 index 46314da8..00000000 --- a/backend/lib/cron/logger.go +++ /dev/null @@ -1,86 +0,0 @@ -package cron - -import ( - "io/ioutil" - "log" - "os" - "strings" - "time" -) - -// DefaultLogger is used by Cron if none is specified. -var DefaultLogger = PrintfLogger(log.New(os.Stdout, "cron: ", log.LstdFlags)) - -// DiscardLogger can be used by callers to discard all log messages. -var DiscardLogger = PrintfLogger(log.New(ioutil.Discard, "", 0)) - -// Logger is the interface used in this package for logging, so that any backend -// can be plugged in. It is a subset of the github.com/go-logr/logr interface. -type Logger interface { - // Info logs routine messages about cron's operation. - Info(msg string, keysAndValues ...interface{}) - // Error logs an error condition. - Error(err error, msg string, keysAndValues ...interface{}) -} - -// PrintfLogger wraps a Printf-based logger (such as the standard library "log") -// into an implementation of the Logger interface which logs errors only. -func PrintfLogger(l interface{ Printf(string, ...interface{}) }) Logger { - return printfLogger{l, false} -} - -// VerbosePrintfLogger wraps a Printf-based logger (such as the standard library -// "log") into an implementation of the Logger interface which logs everything. -func VerbosePrintfLogger(l interface{ Printf(string, ...interface{}) }) Logger { - return printfLogger{l, true} -} - -type printfLogger struct { - logger interface{ Printf(string, ...interface{}) } - logInfo bool -} - -func (pl printfLogger) Info(msg string, keysAndValues ...interface{}) { - if pl.logInfo { - keysAndValues = formatTimes(keysAndValues) - pl.logger.Printf( - formatString(len(keysAndValues)), - append([]interface{}{msg}, keysAndValues...)...) - } -} - -func (pl printfLogger) Error(err error, msg string, keysAndValues ...interface{}) { - keysAndValues = formatTimes(keysAndValues) - pl.logger.Printf( - formatString(len(keysAndValues)+2), - append([]interface{}{msg, "error", err}, keysAndValues...)...) -} - -// formatString returns a logfmt-like format string for the number of -// key/values. -func formatString(numKeysAndValues int) string { - var sb strings.Builder - sb.WriteString("%s") - if numKeysAndValues > 0 { - sb.WriteString(", ") - } - for i := 0; i < numKeysAndValues/2; i++ { - if i > 0 { - sb.WriteString(", ") - } - sb.WriteString("%v=%v") - } - return sb.String() -} - -// formatTimes formats any time.Time values as RFC3339. -func formatTimes(keysAndValues []interface{}) []interface{} { - var formattedArgs []interface{} - for _, arg := range keysAndValues { - if t, ok := arg.(time.Time); ok { - arg = t.Format(time.RFC3339) - } - formattedArgs = append(formattedArgs, arg) - } - return formattedArgs -} diff --git a/backend/lib/cron/option.go b/backend/lib/cron/option.go deleted file mode 100644 index 07638201..00000000 --- a/backend/lib/cron/option.go +++ /dev/null @@ -1,45 +0,0 @@ -package cron - -import ( - "time" -) - -// Option represents a modification to the default behavior of a Cron. -type Option func(*Cron) - -// WithLocation overrides the timezone of the cron instance. -func WithLocation(loc *time.Location) Option { - return func(c *Cron) { - c.location = loc - } -} - -// WithSeconds overrides the parser used for interpreting job schedules to -// include a seconds field as the first one. -func WithSeconds() Option { - return WithParser(NewParser( - Second | Minute | Hour | Dom | Month | Dow | Descriptor, - )) -} - -// WithParser overrides the parser used for interpreting job schedules. -func WithParser(p Parser) Option { - return func(c *Cron) { - c.parser = p - } -} - -// WithChain specifies Job wrappers to apply to all jobs added to this cron. -// Refer to the Chain* functions in this package for provided wrappers. -func WithChain(wrappers ...JobWrapper) Option { - return func(c *Cron) { - c.chain = NewChain(wrappers...) - } -} - -// WithLogger uses the provided logger. -func WithLogger(logger Logger) Option { - return func(c *Cron) { - c.logger = logger - } -} diff --git a/backend/lib/cron/option_test.go b/backend/lib/cron/option_test.go deleted file mode 100644 index 57dbaa4b..00000000 --- a/backend/lib/cron/option_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package cron - -import ( - "log" - "strings" - "testing" - "time" -) - -func TestWithLocation(t *testing.T) { - c := New(WithLocation(time.UTC)) - if c.location != time.UTC { - t.Errorf("expected UTC, got %v", c.location) - } -} - -func TestWithParser(t *testing.T) { - var parser = NewParser(Dow) - c := New(WithParser(parser)) - if c.parser != parser { - t.Error("expected provided parser") - } -} - -func TestWithVerboseLogger(t *testing.T) { - var buf syncWriter - var logger = log.New(&buf, "", log.LstdFlags) - c := New(WithLogger(VerbosePrintfLogger(logger))) - if c.logger.(printfLogger).logger != logger { - t.Error("expected provided logger") - } - - _, _ = c.AddFunc("@every 1s", func() {}) - c.Start() - time.Sleep(OneSecond) - c.Stop() - out := buf.String() - if !strings.Contains(out, "schedule,") || - !strings.Contains(out, "run,") { - t.Error("expected to see some actions, got:", out) - } -} diff --git a/backend/lib/cron/parser.go b/backend/lib/cron/parser.go deleted file mode 100644 index 3cf8879f..00000000 --- a/backend/lib/cron/parser.go +++ /dev/null @@ -1,434 +0,0 @@ -package cron - -import ( - "fmt" - "math" - "strconv" - "strings" - "time" -) - -// Configuration options for creating a parser. Most options specify which -// fields should be included, while others enable features. If a field is not -// included the parser will assume a default value. These options do not change -// the order fields are parse in. -type ParseOption int - -const ( - Second ParseOption = 1 << iota // Seconds field, default 0 - SecondOptional // Optional seconds field, default 0 - Minute // Minutes field, default 0 - Hour // Hours field, default 0 - Dom // Day of month field, default * - Month // Month field, default * - Dow // Day of week field, default * - DowOptional // Optional day of week field, default * - Descriptor // Allow descriptors such as @monthly, @weekly, etc. -) - -var places = []ParseOption{ - Second, - Minute, - Hour, - Dom, - Month, - Dow, -} - -var defaults = []string{ - "0", - "0", - "0", - "*", - "*", - "*", -} - -// A custom Parser that can be configured. -type Parser struct { - options ParseOption -} - -// NewParser creates a Parser with custom options. -// -// It panics if more than one Optional is given, since it would be impossible to -// correctly infer which optional is provided or missing in general. -// -// Examples -// -// // Standard parser without descriptors -// specParser := NewParser(Minute | Hour | Dom | Month | Dow) -// sched, err := specParser.Parse("0 0 15 */3 *") -// -// // Same as above, just excludes time fields -// subsParser := NewParser(Dom | Month | Dow) -// sched, err := specParser.Parse("15 */3 *") -// -// // Same as above, just makes Dow optional -// subsParser := NewParser(Dom | Month | DowOptional) -// sched, err := specParser.Parse("15 */3") -// -func NewParser(options ParseOption) Parser { - optionals := 0 - if options&DowOptional > 0 { - optionals++ - } - if options&SecondOptional > 0 { - optionals++ - } - if optionals > 1 { - panic("multiple optionals may not be configured") - } - return Parser{options} -} - -// Parse returns a new crontab schedule representing the given spec. -// It returns a descriptive error if the spec is not valid. -// It accepts crontab specs and features configured by NewParser. -func (p Parser) Parse(spec string) (Schedule, error) { - if len(spec) == 0 { - return nil, fmt.Errorf("empty spec string") - } - - // Extract timezone if present - var loc = time.Local - if strings.HasPrefix(spec, "TZ=") || strings.HasPrefix(spec, "CRON_TZ=") { - var err error - i := strings.Index(spec, " ") - eq := strings.Index(spec, "=") - if loc, err = time.LoadLocation(spec[eq+1 : i]); err != nil { - return nil, fmt.Errorf("provided bad location %s: %v", spec[eq+1:i], err) - } - spec = strings.TrimSpace(spec[i:]) - } - - // Handle named schedules (descriptors), if configured - if strings.HasPrefix(spec, "@") { - if p.options&Descriptor == 0 { - return nil, fmt.Errorf("parser does not accept descriptors: %v", spec) - } - return parseDescriptor(spec, loc) - } - - // Split on whitespace. - fields := strings.Fields(spec) - - // Validate & fill in any omitted or optional fields - var err error - fields, err = normalizeFields(fields, p.options) - if err != nil { - return nil, err - } - - field := func(field string, r bounds) uint64 { - if err != nil { - return 0 - } - var bits uint64 - bits, err = getField(field, r) - return bits - } - - var ( - second = field(fields[0], seconds) - minute = field(fields[1], minutes) - hour = field(fields[2], hours) - dayofmonth = field(fields[3], dom) - month = field(fields[4], months) - dayofweek = field(fields[5], dow) - ) - if err != nil { - return nil, err - } - - return &SpecSchedule{ - Second: second, - Minute: minute, - Hour: hour, - Dom: dayofmonth, - Month: month, - Dow: dayofweek, - Location: loc, - }, nil -} - -// normalizeFields takes a subset set of the time fields and returns the full set -// with defaults (zeroes) populated for unset fields. -// -// As part of performing this function, it also validates that the provided -// fields are compatible with the configured options. -func normalizeFields(fields []string, options ParseOption) ([]string, error) { - // Validate optionals & add their field to options - optionals := 0 - if options&SecondOptional > 0 { - options |= Second - optionals++ - } - if options&DowOptional > 0 { - options |= Dow - optionals++ - } - if optionals > 1 { - return nil, fmt.Errorf("multiple optionals may not be configured") - } - - // Figure out how many fields we need - max := 0 - for _, place := range places { - if options&place > 0 { - max++ - } - } - min := max - optionals - - // Validate number of fields - if count := len(fields); count < min || count > max { - if min == max { - return nil, fmt.Errorf("expected exactly %d fields, found %d: %s", min, count, fields) - } - return nil, fmt.Errorf("expected %d to %d fields, found %d: %s", min, max, count, fields) - } - - // Populate the optional field if not provided - if min < max && len(fields) == min { - switch { - case options&DowOptional > 0: - fields = append(fields, defaults[5]) // TODO: improve access to default - case options&SecondOptional > 0: - fields = append([]string{defaults[0]}, fields...) - default: - return nil, fmt.Errorf("unknown optional field") - } - } - - // Populate all fields not part of options with their defaults - n := 0 - expandedFields := make([]string, len(places)) - copy(expandedFields, defaults) - for i, place := range places { - if options&place > 0 { - expandedFields[i] = fields[n] - n++ - } - } - return expandedFields, nil -} - -var standardParser = NewParser( - Minute | Hour | Dom | Month | Dow | Descriptor, -) - -// ParseStandard returns a new crontab schedule representing the given -// standardSpec (https://en.wikipedia.org/wiki/Cron). It requires 5 entries -// representing: minute, hour, day of month, month and day of week, in that -// order. It returns a descriptive error if the spec is not valid. -// -// It accepts -// - Standard crontab specs, e.g. "* * * * ?" -// - Descriptors, e.g. "@midnight", "@every 1h30m" -func ParseStandard(standardSpec string) (Schedule, error) { - return standardParser.Parse(standardSpec) -} - -// getField returns an Int with the bits set representing all of the times that -// the field represents or error parsing field value. A "field" is a comma-separated -// list of "ranges". -func getField(field string, r bounds) (uint64, error) { - var bits uint64 - ranges := strings.FieldsFunc(field, func(r rune) bool { return r == ',' }) - for _, expr := range ranges { - bit, err := getRange(expr, r) - if err != nil { - return bits, err - } - bits |= bit - } - return bits, nil -} - -// getRange returns the bits indicated by the given expression: -// number | number "-" number [ "/" number ] -// or error parsing range. -func getRange(expr string, r bounds) (uint64, error) { - var ( - start, end, step uint - rangeAndStep = strings.Split(expr, "/") - lowAndHigh = strings.Split(rangeAndStep[0], "-") - singleDigit = len(lowAndHigh) == 1 - err error - ) - - var extra uint64 - if lowAndHigh[0] == "*" || lowAndHigh[0] == "?" { - start = r.min - end = r.max - extra = starBit - } else { - start, err = parseIntOrName(lowAndHigh[0], r.names) - if err != nil { - return 0, err - } - switch len(lowAndHigh) { - case 1: - end = start - case 2: - end, err = parseIntOrName(lowAndHigh[1], r.names) - if err != nil { - return 0, err - } - default: - return 0, fmt.Errorf("too many hyphens: %s", expr) - } - } - - switch len(rangeAndStep) { - case 1: - step = 1 - case 2: - step, err = mustParseInt(rangeAndStep[1]) - if err != nil { - return 0, err - } - - // Special handling: "N/step" means "N-max/step". - if singleDigit { - end = r.max - } - if step > 1 { - extra = 0 - } - default: - return 0, fmt.Errorf("too many slashes: %s", expr) - } - - if start < r.min { - return 0, fmt.Errorf("beginning of range (%d) below minimum (%d): %s", start, r.min, expr) - } - if end > r.max { - return 0, fmt.Errorf("end of range (%d) above maximum (%d): %s", end, r.max, expr) - } - if start > end { - return 0, fmt.Errorf("beginning of range (%d) beyond end of range (%d): %s", start, end, expr) - } - if step == 0 { - return 0, fmt.Errorf("step of range should be a positive number: %s", expr) - } - - return getBits(start, end, step) | extra, nil -} - -// parseIntOrName returns the (possibly-named) integer contained in expr. -func parseIntOrName(expr string, names map[string]uint) (uint, error) { - if names != nil { - if namedInt, ok := names[strings.ToLower(expr)]; ok { - return namedInt, nil - } - } - return mustParseInt(expr) -} - -// mustParseInt parses the given expression as an int or returns an error. -func mustParseInt(expr string) (uint, error) { - num, err := strconv.Atoi(expr) - if err != nil { - return 0, fmt.Errorf("failed to parse int from %s: %s", expr, err) - } - if num < 0 { - return 0, fmt.Errorf("negative number (%d) not allowed: %s", num, expr) - } - - return uint(num), nil -} - -// getBits sets all bits in the range [min, max], modulo the given step size. -func getBits(min, max, step uint) uint64 { - var bits uint64 - - // If step is 1, use shifts. - if step == 1 { - return ^(math.MaxUint64 << (max + 1)) & (math.MaxUint64 << min) - } - - // Else, use a simple loop. - for i := min; i <= max; i += step { - bits |= 1 << i - } - return bits -} - -// all returns all bits within the given bounds. (plus the star bit) -func all(r bounds) uint64 { - return getBits(r.min, r.max, 1) | starBit -} - -// parseDescriptor returns a predefined schedule for the expression, or error if none matches. -func parseDescriptor(descriptor string, loc *time.Location) (Schedule, error) { - switch descriptor { - case "@yearly", "@annually": - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: 1 << hours.min, - Dom: 1 << dom.min, - Month: 1 << months.min, - Dow: all(dow), - Location: loc, - }, nil - - case "@monthly": - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: 1 << hours.min, - Dom: 1 << dom.min, - Month: all(months), - Dow: all(dow), - Location: loc, - }, nil - - case "@weekly": - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: 1 << hours.min, - Dom: all(dom), - Month: all(months), - Dow: 1 << dow.min, - Location: loc, - }, nil - - case "@daily", "@midnight": - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: 1 << hours.min, - Dom: all(dom), - Month: all(months), - Dow: all(dow), - Location: loc, - }, nil - - case "@hourly": - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: all(hours), - Dom: all(dom), - Month: all(months), - Dow: all(dow), - Location: loc, - }, nil - - } - - const every = "@every " - if strings.HasPrefix(descriptor, every) { - duration, err := time.ParseDuration(descriptor[len(every):]) - if err != nil { - return nil, fmt.Errorf("failed to parse duration %s: %s", descriptor, err) - } - return Every(duration), nil - } - - return nil, fmt.Errorf("unrecognized descriptor: %s", descriptor) -} diff --git a/backend/lib/cron/parser_test.go b/backend/lib/cron/parser_test.go deleted file mode 100644 index f95a54bb..00000000 --- a/backend/lib/cron/parser_test.go +++ /dev/null @@ -1,384 +0,0 @@ -package cron - -import ( - "reflect" - "strings" - "testing" - "time" -) - -var secondParser = NewParser(Second | Minute | Hour | Dom | Month | DowOptional | Descriptor) - -func TestRange(t *testing.T) { - zero := uint64(0) - ranges := []struct { - expr string - min, max uint - expected uint64 - err string - }{ - {"5", 0, 7, 1 << 5, ""}, - {"0", 0, 7, 1 << 0, ""}, - {"7", 0, 7, 1 << 7, ""}, - - {"5-5", 0, 7, 1 << 5, ""}, - {"5-6", 0, 7, 1<<5 | 1<<6, ""}, - {"5-7", 0, 7, 1<<5 | 1<<6 | 1<<7, ""}, - - {"5-6/2", 0, 7, 1 << 5, ""}, - {"5-7/2", 0, 7, 1<<5 | 1<<7, ""}, - {"5-7/1", 0, 7, 1<<5 | 1<<6 | 1<<7, ""}, - - {"*", 1, 3, 1<<1 | 1<<2 | 1<<3 | starBit, ""}, - {"*/2", 1, 3, 1<<1 | 1<<3, ""}, - - {"5--5", 0, 0, zero, "too many hyphens"}, - {"jan-x", 0, 0, zero, "failed to parse int from"}, - {"2-x", 1, 5, zero, "failed to parse int from"}, - {"*/-12", 0, 0, zero, "negative number"}, - {"*//2", 0, 0, zero, "too many slashes"}, - {"1", 3, 5, zero, "below minimum"}, - {"6", 3, 5, zero, "above maximum"}, - {"5-3", 3, 5, zero, "beyond end of range"}, - {"*/0", 0, 0, zero, "should be a positive number"}, - } - - for _, c := range ranges { - actual, err := getRange(c.expr, bounds{c.min, c.max, nil}) - if len(c.err) != 0 && (err == nil || !strings.Contains(err.Error(), c.err)) { - t.Errorf("%s => expected %v, got %v", c.expr, c.err, err) - } - if len(c.err) == 0 && err != nil { - t.Errorf("%s => unexpected error %v", c.expr, err) - } - if actual != c.expected { - t.Errorf("%s => expected %d, got %d", c.expr, c.expected, actual) - } - } -} - -func TestField(t *testing.T) { - fields := []struct { - expr string - min, max uint - expected uint64 - }{ - {"5", 1, 7, 1 << 5}, - {"5,6", 1, 7, 1<<5 | 1<<6}, - {"5,6,7", 1, 7, 1<<5 | 1<<6 | 1<<7}, - {"1,5-7/2,3", 1, 7, 1<<1 | 1<<5 | 1<<7 | 1<<3}, - } - - for _, c := range fields { - actual, _ := getField(c.expr, bounds{c.min, c.max, nil}) - if actual != c.expected { - t.Errorf("%s => expected %d, got %d", c.expr, c.expected, actual) - } - } -} - -func TestAll(t *testing.T) { - allBits := []struct { - r bounds - expected uint64 - }{ - {minutes, 0xfffffffffffffff}, // 0-59: 60 ones - {hours, 0xffffff}, // 0-23: 24 ones - {dom, 0xfffffffe}, // 1-31: 31 ones, 1 zero - {months, 0x1ffe}, // 1-12: 12 ones, 1 zero - {dow, 0x7f}, // 0-6: 7 ones - } - - for _, c := range allBits { - actual := all(c.r) // all() adds the starBit, so compensate for that.. - if c.expected|starBit != actual { - t.Errorf("%d-%d/%d => expected %b, got %b", - c.r.min, c.r.max, 1, c.expected|starBit, actual) - } - } -} - -func TestBits(t *testing.T) { - bits := []struct { - min, max, step uint - expected uint64 - }{ - {0, 0, 1, 0x1}, - {1, 1, 1, 0x2}, - {1, 5, 2, 0x2a}, // 101010 - {1, 4, 2, 0xa}, // 1010 - } - - for _, c := range bits { - actual := getBits(c.min, c.max, c.step) - if c.expected != actual { - t.Errorf("%d-%d/%d => expected %b, got %b", - c.min, c.max, c.step, c.expected, actual) - } - } -} - -func TestParseScheduleErrors(t *testing.T) { - var tests = []struct{ expr, err string }{ - {"* 5 j * * *", "failed to parse int from"}, - {"@every Xm", "failed to parse duration"}, - {"@unrecognized", "unrecognized descriptor"}, - {"* * * *", "expected 5 to 6 fields"}, - {"", "empty spec string"}, - } - for _, c := range tests { - actual, err := secondParser.Parse(c.expr) - if err == nil || !strings.Contains(err.Error(), c.err) { - t.Errorf("%s => expected %v, got %v", c.expr, c.err, err) - } - if actual != nil { - t.Errorf("expected nil schedule on error, got %v", actual) - } - } -} - -func TestParseSchedule(t *testing.T) { - tokyo, _ := time.LoadLocation("Asia/Tokyo") - entries := []struct { - parser Parser - expr string - expected Schedule - }{ - {secondParser, "0 5 * * * *", every5min(time.Local)}, - {standardParser, "5 * * * *", every5min(time.Local)}, - {secondParser, "CRON_TZ=UTC 0 5 * * * *", every5min(time.UTC)}, - {standardParser, "CRON_TZ=UTC 5 * * * *", every5min(time.UTC)}, - {secondParser, "CRON_TZ=Asia/Tokyo 0 5 * * * *", every5min(tokyo)}, - {secondParser, "@every 5m", ConstantDelaySchedule{5 * time.Minute}}, - {secondParser, "@midnight", midnight(time.Local)}, - {secondParser, "TZ=UTC @midnight", midnight(time.UTC)}, - {secondParser, "TZ=Asia/Tokyo @midnight", midnight(tokyo)}, - {secondParser, "@yearly", annual(time.Local)}, - {secondParser, "@annually", annual(time.Local)}, - { - parser: secondParser, - expr: "* 5 * * * *", - expected: &SpecSchedule{ - Second: all(seconds), - Minute: 1 << 5, - Hour: all(hours), - Dom: all(dom), - Month: all(months), - Dow: all(dow), - Location: time.Local, - }, - }, - } - - for _, c := range entries { - actual, err := c.parser.Parse(c.expr) - if err != nil { - t.Errorf("%s => unexpected error %v", c.expr, err) - } - if !reflect.DeepEqual(actual, c.expected) { - t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual) - } - } -} - -func TestOptionalSecondSchedule(t *testing.T) { - parser := NewParser(SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor) - entries := []struct { - expr string - expected Schedule - }{ - {"0 5 * * * *", every5min(time.Local)}, - {"5 5 * * * *", every5min5s(time.Local)}, - {"5 * * * *", every5min(time.Local)}, - } - - for _, c := range entries { - actual, err := parser.Parse(c.expr) - if err != nil { - t.Errorf("%s => unexpected error %v", c.expr, err) - } - if !reflect.DeepEqual(actual, c.expected) { - t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual) - } - } -} - -func TestNormalizeFields(t *testing.T) { - tests := []struct { - name string - input []string - options ParseOption - expected []string - }{ - { - "AllFields_NoOptional", - []string{"0", "5", "*", "*", "*", "*"}, - Second | Minute | Hour | Dom | Month | Dow | Descriptor, - []string{"0", "5", "*", "*", "*", "*"}, - }, - { - "AllFields_SecondOptional_Provided", - []string{"0", "5", "*", "*", "*", "*"}, - SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor, - []string{"0", "5", "*", "*", "*", "*"}, - }, - { - "AllFields_SecondOptional_NotProvided", - []string{"5", "*", "*", "*", "*"}, - SecondOptional | Minute | Hour | Dom | Month | Dow | Descriptor, - []string{"0", "5", "*", "*", "*", "*"}, - }, - { - "SubsetFields_NoOptional", - []string{"5", "15", "*"}, - Hour | Dom | Month, - []string{"0", "0", "5", "15", "*", "*"}, - }, - { - "SubsetFields_DowOptional_Provided", - []string{"5", "15", "*", "4"}, - Hour | Dom | Month | DowOptional, - []string{"0", "0", "5", "15", "*", "4"}, - }, - { - "SubsetFields_DowOptional_NotProvided", - []string{"5", "15", "*"}, - Hour | Dom | Month | DowOptional, - []string{"0", "0", "5", "15", "*", "*"}, - }, - { - "SubsetFields_SecondOptional_NotProvided", - []string{"5", "15", "*"}, - SecondOptional | Hour | Dom | Month, - []string{"0", "0", "5", "15", "*", "*"}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - actual, err := normalizeFields(test.input, test.options) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(actual, test.expected) { - t.Errorf("expected %v, got %v", test.expected, actual) - } - }) - } -} - -func TestNormalizeFields_Errors(t *testing.T) { - tests := []struct { - name string - input []string - options ParseOption - err string - }{ - { - "TwoOptionals", - []string{"0", "5", "*", "*", "*", "*"}, - SecondOptional | Minute | Hour | Dom | Month | DowOptional, - "", - }, - { - "TooManyFields", - []string{"0", "5", "*", "*"}, - SecondOptional | Minute | Hour, - "", - }, - { - "NoFields", - []string{}, - SecondOptional | Minute | Hour, - "", - }, - { - "TooFewFields", - []string{"*"}, - SecondOptional | Minute | Hour, - "", - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - actual, err := normalizeFields(test.input, test.options) - if err == nil { - t.Errorf("expected an error, got none. results: %v", actual) - return - } - if !strings.Contains(err.Error(), test.err) { - t.Errorf("expected error %q, got %q", test.err, err.Error()) - } - }) - } -} - -func TestStandardSpecSchedule(t *testing.T) { - entries := []struct { - expr string - expected Schedule - err string - }{ - { - expr: "5 * * * *", - expected: &SpecSchedule{1 << seconds.min, 1 << 5, all(hours), all(dom), all(months), all(dow), time.Local}, - }, - { - expr: "@every 5m", - expected: ConstantDelaySchedule{time.Duration(5) * time.Minute}, - }, - { - expr: "5 j * * *", - err: "failed to parse int from", - }, - { - expr: "* * * *", - err: "expected exactly 5 fields", - }, - } - - for _, c := range entries { - actual, err := ParseStandard(c.expr) - if len(c.err) != 0 && (err == nil || !strings.Contains(err.Error(), c.err)) { - t.Errorf("%s => expected %v, got %v", c.expr, c.err, err) - } - if len(c.err) == 0 && err != nil { - t.Errorf("%s => unexpected error %v", c.expr, err) - } - if !reflect.DeepEqual(actual, c.expected) { - t.Errorf("%s => expected %b, got %b", c.expr, c.expected, actual) - } - } -} - -func TestNoDescriptorParser(t *testing.T) { - parser := NewParser(Minute | Hour) - _, err := parser.Parse("@every 1m") - if err == nil { - t.Error("expected an error, got none") - } -} - -func every5min(loc *time.Location) *SpecSchedule { - return &SpecSchedule{1 << 0, 1 << 5, all(hours), all(dom), all(months), all(dow), loc} -} - -func every5min5s(loc *time.Location) *SpecSchedule { - return &SpecSchedule{1 << 5, 1 << 5, all(hours), all(dom), all(months), all(dow), loc} -} - -func midnight(loc *time.Location) *SpecSchedule { - return &SpecSchedule{1, 1, 1, all(dom), all(months), all(dow), loc} -} - -func annual(loc *time.Location) *SpecSchedule { - return &SpecSchedule{ - Second: 1 << seconds.min, - Minute: 1 << minutes.min, - Hour: 1 << hours.min, - Dom: 1 << dom.min, - Month: 1 << months.min, - Dow: all(dow), - Location: loc, - } -} diff --git a/backend/lib/cron/spec.go b/backend/lib/cron/spec.go deleted file mode 100644 index 9821a6a2..00000000 --- a/backend/lib/cron/spec.go +++ /dev/null @@ -1,188 +0,0 @@ -package cron - -import "time" - -// SpecSchedule specifies a duty cycle (to the second granularity), based on a -// traditional crontab specification. It is computed initially and stored as bit sets. -type SpecSchedule struct { - Second, Minute, Hour, Dom, Month, Dow uint64 - - // Override location for this schedule. - Location *time.Location -} - -// bounds provides a range of acceptable values (plus a map of name to value). -type bounds struct { - min, max uint - names map[string]uint -} - -// The bounds for each field. -var ( - seconds = bounds{0, 59, nil} - minutes = bounds{0, 59, nil} - hours = bounds{0, 23, nil} - dom = bounds{1, 31, nil} - months = bounds{1, 12, map[string]uint{ - "jan": 1, - "feb": 2, - "mar": 3, - "apr": 4, - "may": 5, - "jun": 6, - "jul": 7, - "aug": 8, - "sep": 9, - "oct": 10, - "nov": 11, - "dec": 12, - }} - dow = bounds{0, 6, map[string]uint{ - "sun": 0, - "mon": 1, - "tue": 2, - "wed": 3, - "thu": 4, - "fri": 5, - "sat": 6, - }} -) - -const ( - // Set the top bit if a star was included in the expression. - starBit = 1 << 63 -) - -// Next returns the next time this schedule is activated, greater than the given -// time. If no time can be found to satisfy the schedule, return the zero time. -func (s *SpecSchedule) Next(t time.Time) time.Time { - // General approach - // - // For Month, Day, Hour, Minute, Second: - // Check if the time value matches. If yes, continue to the next field. - // If the field doesn't match the schedule, then increment the field until it matches. - // While incrementing the field, a wrap-around brings it back to the beginning - // of the field list (since it is necessary to re-verify previous field - // values) - - // Convert the given time into the schedule's timezone, if one is specified. - // Save the original timezone so we can convert back after we find a time. - // Note that schedules without a time zone specified (time.Local) are treated - // as local to the time provided. - origLocation := t.Location() - loc := s.Location - if loc == time.Local { - loc = t.Location() - } - if s.Location != time.Local { - t = t.In(s.Location) - } - - // Start at the earliest possible time (the upcoming second). - t = t.Add(1*time.Second - time.Duration(t.Nanosecond())*time.Nanosecond) - - // This flag indicates whether a field has been incremented. - added := false - - // If no time is found within five years, return zero. - yearLimit := t.Year() + 5 - -WRAP: - if t.Year() > yearLimit { - return time.Time{} - } - - // Find the first applicable month. - // If it's this month, then do nothing. - for 1< 12 { - t = t.Add(time.Duration(24-t.Hour()) * time.Hour) - } else { - t = t.Add(time.Duration(-t.Hour()) * time.Hour) - } - } - - if t.Day() == 1 { - goto WRAP - } - } - - for 1< 0 - dowMatch = 1< 0 - ) - if s.Dom&starBit > 0 || s.Dow&starBit > 0 { - return domMatch && dowMatch - } - return domMatch || dowMatch -} diff --git a/backend/lib/cron/spec_test.go b/backend/lib/cron/spec_test.go deleted file mode 100644 index 1b8a503e..00000000 --- a/backend/lib/cron/spec_test.go +++ /dev/null @@ -1,300 +0,0 @@ -package cron - -import ( - "strings" - "testing" - "time" -) - -func TestActivation(t *testing.T) { - tests := []struct { - time, spec string - expected bool - }{ - // Every fifteen minutes. - {"Mon Jul 9 15:00 2012", "0/15 * * * *", true}, - {"Mon Jul 9 15:45 2012", "0/15 * * * *", true}, - {"Mon Jul 9 15:40 2012", "0/15 * * * *", false}, - - // Every fifteen minutes, starting at 5 minutes. - {"Mon Jul 9 15:05 2012", "5/15 * * * *", true}, - {"Mon Jul 9 15:20 2012", "5/15 * * * *", true}, - {"Mon Jul 9 15:50 2012", "5/15 * * * *", true}, - - // Named months - {"Sun Jul 15 15:00 2012", "0/15 * * Jul *", true}, - {"Sun Jul 15 15:00 2012", "0/15 * * Jun *", false}, - - // Everything set. - {"Sun Jul 15 08:30 2012", "30 08 ? Jul Sun", true}, - {"Sun Jul 15 08:30 2012", "30 08 15 Jul ?", true}, - {"Mon Jul 16 08:30 2012", "30 08 ? Jul Sun", false}, - {"Mon Jul 16 08:30 2012", "30 08 15 Jul ?", false}, - - // Predefined schedules - {"Mon Jul 9 15:00 2012", "@hourly", true}, - {"Mon Jul 9 15:04 2012", "@hourly", false}, - {"Mon Jul 9 15:00 2012", "@daily", false}, - {"Mon Jul 9 00:00 2012", "@daily", true}, - {"Mon Jul 9 00:00 2012", "@weekly", false}, - {"Sun Jul 8 00:00 2012", "@weekly", true}, - {"Sun Jul 8 01:00 2012", "@weekly", false}, - {"Sun Jul 8 00:00 2012", "@monthly", false}, - {"Sun Jul 1 00:00 2012", "@monthly", true}, - - // Test interaction of DOW and DOM. - // If both are restricted, then only one needs to match. - {"Sun Jul 15 00:00 2012", "* * 1,15 * Sun", true}, - {"Fri Jun 15 00:00 2012", "* * 1,15 * Sun", true}, - {"Wed Aug 1 00:00 2012", "* * 1,15 * Sun", true}, - {"Sun Jul 15 00:00 2012", "* * */10 * Sun", true}, // verifies #70 - - // However, if one has a star, then both need to match. - {"Sun Jul 15 00:00 2012", "* * * * Mon", false}, - {"Mon Jul 9 00:00 2012", "* * 1,15 * *", false}, - {"Sun Jul 15 00:00 2012", "* * 1,15 * *", true}, - {"Sun Jul 15 00:00 2012", "* * */2 * Sun", true}, - } - - for _, test := range tests { - sched, err := ParseStandard(test.spec) - if err != nil { - t.Error(err) - continue - } - actual := sched.Next(getTime(test.time).Add(-1 * time.Second)) - expected := getTime(test.time) - if test.expected && expected != actual || !test.expected && expected == actual { - t.Errorf("Fail evaluating %s on %s: (expected) %s != %s (actual)", - test.spec, test.time, expected, actual) - } - } -} - -func TestNext(t *testing.T) { - runs := []struct { - time, spec string - expected string - }{ - // Simple cases - {"Mon Jul 9 14:45 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"}, - {"Mon Jul 9 14:59 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"}, - {"Mon Jul 9 14:59:59 2012", "0 0/15 * * * *", "Mon Jul 9 15:00 2012"}, - - // Wrap around hours - {"Mon Jul 9 15:45 2012", "0 20-35/15 * * * *", "Mon Jul 9 16:20 2012"}, - - // Wrap around days - {"Mon Jul 9 23:46 2012", "0 */15 * * * *", "Tue Jul 10 00:00 2012"}, - {"Mon Jul 9 23:45 2012", "0 20-35/15 * * * *", "Tue Jul 10 00:20 2012"}, - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * * * *", "Tue Jul 10 00:20:15 2012"}, - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 1/2 * * *", "Tue Jul 10 01:20:15 2012"}, - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 10-12 * * *", "Tue Jul 10 10:20:15 2012"}, - - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 1/2 */2 * *", "Thu Jul 11 01:20:15 2012"}, - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * 9-20 * *", "Wed Jul 10 00:20:15 2012"}, - {"Mon Jul 9 23:35:51 2012", "15/35 20-35/15 * 9-20 Jul *", "Wed Jul 10 00:20:15 2012"}, - - // Wrap around months - {"Mon Jul 9 23:35 2012", "0 0 0 9 Apr-Oct ?", "Thu Aug 9 00:00 2012"}, - {"Mon Jul 9 23:35 2012", "0 0 0 */5 Apr,Aug,Oct Mon", "Tue Aug 1 00:00 2012"}, - {"Mon Jul 9 23:35 2012", "0 0 0 */5 Oct Mon", "Mon Oct 1 00:00 2012"}, - - // Wrap around years - {"Mon Jul 9 23:35 2012", "0 0 0 * Feb Mon", "Mon Feb 4 00:00 2013"}, - {"Mon Jul 9 23:35 2012", "0 0 0 * Feb Mon/2", "Fri Feb 1 00:00 2013"}, - - // Wrap around minute, hour, day, month, and year - {"Mon Dec 31 23:59:45 2012", "0 * * * * *", "Tue Jan 1 00:00:00 2013"}, - - // Leap year - {"Mon Jul 9 23:35 2012", "0 0 0 29 Feb ?", "Mon Feb 29 00:00 2016"}, - - // Daylight savings time 2am EST (-5) -> 3am EDT (-4) - {"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 30 2 11 Mar ?", "2013-03-11T02:30:00-0400"}, - - // hourly job - {"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T01:00:00-0500"}, - {"2012-03-11T01:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T03:00:00-0400"}, - {"2012-03-11T03:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T04:00:00-0400"}, - {"2012-03-11T04:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-03-11T05:00:00-0400"}, - - // hourly job using CRON_TZ - {"2012-03-11T00:00:00-0500", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T01:00:00-0500"}, - {"2012-03-11T01:00:00-0500", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T03:00:00-0400"}, - {"2012-03-11T03:00:00-0400", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T04:00:00-0400"}, - {"2012-03-11T04:00:00-0400", "CRON_TZ=America/New_York 0 0 * * * ?", "2012-03-11T05:00:00-0400"}, - - // 1am nightly job - {"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-03-11T01:00:00-0500"}, - {"2012-03-11T01:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-03-12T01:00:00-0400"}, - - // 2am nightly job (skipped) - {"2012-03-11T00:00:00-0500", "TZ=America/New_York 0 0 2 * * ?", "2012-03-12T02:00:00-0400"}, - - // Daylight savings time 2am EDT (-4) => 1am EST (-5) - {"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 30 2 04 Nov ?", "2012-11-04T02:30:00-0500"}, - {"2012-11-04T01:45:00-0400", "TZ=America/New_York 0 30 1 04 Nov ?", "2012-11-04T01:30:00-0500"}, - - // hourly job - {"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T01:00:00-0400"}, - {"2012-11-04T01:00:00-0400", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T01:00:00-0500"}, - {"2012-11-04T01:00:00-0500", "TZ=America/New_York 0 0 * * * ?", "2012-11-04T02:00:00-0500"}, - - // 1am nightly job (runs twice) - {"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 1 * * ?", "2012-11-04T01:00:00-0400"}, - {"2012-11-04T01:00:00-0400", "TZ=America/New_York 0 0 1 * * ?", "2012-11-04T01:00:00-0500"}, - {"2012-11-04T01:00:00-0500", "TZ=America/New_York 0 0 1 * * ?", "2012-11-05T01:00:00-0500"}, - - // 2am nightly job - {"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 2 * * ?", "2012-11-04T02:00:00-0500"}, - {"2012-11-04T02:00:00-0500", "TZ=America/New_York 0 0 2 * * ?", "2012-11-05T02:00:00-0500"}, - - // 3am nightly job - {"2012-11-04T00:00:00-0400", "TZ=America/New_York 0 0 3 * * ?", "2012-11-04T03:00:00-0500"}, - {"2012-11-04T03:00:00-0500", "TZ=America/New_York 0 0 3 * * ?", "2012-11-05T03:00:00-0500"}, - - // hourly job - {"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 * * * ?", "2012-11-04T01:00:00-0400"}, - {"TZ=America/New_York 2012-11-04T01:00:00-0400", "0 0 * * * ?", "2012-11-04T01:00:00-0500"}, - {"TZ=America/New_York 2012-11-04T01:00:00-0500", "0 0 * * * ?", "2012-11-04T02:00:00-0500"}, - - // 1am nightly job (runs twice) - {"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 1 * * ?", "2012-11-04T01:00:00-0400"}, - {"TZ=America/New_York 2012-11-04T01:00:00-0400", "0 0 1 * * ?", "2012-11-04T01:00:00-0500"}, - {"TZ=America/New_York 2012-11-04T01:00:00-0500", "0 0 1 * * ?", "2012-11-05T01:00:00-0500"}, - - // 2am nightly job - {"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 2 * * ?", "2012-11-04T02:00:00-0500"}, - {"TZ=America/New_York 2012-11-04T02:00:00-0500", "0 0 2 * * ?", "2012-11-05T02:00:00-0500"}, - - // 3am nightly job - {"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 3 * * ?", "2012-11-04T03:00:00-0500"}, - {"TZ=America/New_York 2012-11-04T03:00:00-0500", "0 0 3 * * ?", "2012-11-05T03:00:00-0500"}, - - // Unsatisfiable - {"Mon Jul 9 23:35 2012", "0 0 0 30 Feb ?", ""}, - {"Mon Jul 9 23:35 2012", "0 0 0 31 Apr ?", ""}, - - // Monthly job - {"TZ=America/New_York 2012-11-04T00:00:00-0400", "0 0 3 3 * ?", "2012-12-03T03:00:00-0500"}, - - // Test the scenario of DST resulting in midnight not being a valid time. - // https://github.com/robfig/cron/issues/157 - {"2018-10-17T05:00:00-0400", "TZ=America/Sao_Paulo 0 0 9 10 * ?", "2018-11-10T06:00:00-0500"}, - {"2018-02-14T05:00:00-0500", "TZ=America/Sao_Paulo 0 0 9 22 * ?", "2018-02-22T07:00:00-0500"}, - } - - for _, c := range runs { - sched, err := secondParser.Parse(c.spec) - if err != nil { - t.Error(err) - continue - } - actual := sched.Next(getTime(c.time)) - expected := getTime(c.expected) - if !actual.Equal(expected) { - t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.spec, expected, actual) - } - } -} - -func TestErrors(t *testing.T) { - invalidSpecs := []string{ - "xyz", - "60 0 * * *", - "0 60 * * *", - "0 0 * * XYZ", - } - for _, spec := range invalidSpecs { - _, err := ParseStandard(spec) - if err == nil { - t.Error("expected an error parsing: ", spec) - } - } -} - -func getTime(value string) time.Time { - if value == "" { - return time.Time{} - } - - var location = time.Local - if strings.HasPrefix(value, "TZ=") { - parts := strings.Fields(value) - loc, err := time.LoadLocation(parts[0][len("TZ="):]) - if err != nil { - panic("could not parse location:" + err.Error()) - } - location = loc - value = parts[1] - } - - var layouts = []string{ - "Mon Jan 2 15:04 2006", - "Mon Jan 2 15:04:05 2006", - } - for _, layout := range layouts { - if t, err := time.ParseInLocation(layout, value, location); err == nil { - return t - } - } - if t, err := time.ParseInLocation("2006-01-02T15:04:05-0700", value, location); err == nil { - return t - } - panic("could not parse time value " + value) -} - -func TestNextWithTz(t *testing.T) { - runs := []struct { - time, spec string - expected string - }{ - // Failing tests - {"2016-01-03T13:09:03+0530", "14 14 * * *", "2016-01-03T14:14:00+0530"}, - {"2016-01-03T04:09:03+0530", "14 14 * * ?", "2016-01-03T14:14:00+0530"}, - - // Passing tests - {"2016-01-03T14:09:03+0530", "14 14 * * *", "2016-01-03T14:14:00+0530"}, - {"2016-01-03T14:00:00+0530", "14 14 * * ?", "2016-01-03T14:14:00+0530"}, - } - for _, c := range runs { - sched, err := ParseStandard(c.spec) - if err != nil { - t.Error(err) - continue - } - actual := sched.Next(getTimeTZ(c.time)) - expected := getTimeTZ(c.expected) - if !actual.Equal(expected) { - t.Errorf("%s, \"%s\": (expected) %v != %v (actual)", c.time, c.spec, expected, actual) - } - } -} - -func getTimeTZ(value string) time.Time { - if value == "" { - return time.Time{} - } - t, err := time.Parse("Mon Jan 2 15:04 2006", value) - if err != nil { - t, err = time.Parse("Mon Jan 2 15:04:05 2006", value) - if err != nil { - t, err = time.Parse("2006-01-02T15:04:05-0700", value) - if err != nil { - panic(err) - } - } - } - - return t -} - -// https://github.com/robfig/cron/issues/144 -func TestSlash0NoHang(t *testing.T) { - schedule := "TZ=America/New_York 15/0 * * * *" - _, err := ParseStandard(schedule) - if err == nil { - t.Error("expected an error on 0 increment") - } -} diff --git a/backend/lib/validate/mongo.go b/backend/lib/validate/mongo.go deleted file mode 100644 index 7ef94177..00000000 --- a/backend/lib/validate/mongo.go +++ /dev/null @@ -1,10 +0,0 @@ -package validate - -import ( - "github.com/globalsign/mgo/bson" - "github.com/go-playground/validator/v10" -) - -func MongoID(sl validator.FieldLevel) bool { - return bson.IsObjectIdHex(sl.Field().String()) -} diff --git a/backend/main.go b/backend/main.go index 86dccf0a..8832bad7 100644 --- a/backend/main.go +++ b/backend/main.go @@ -2,24 +2,20 @@ package main import ( "context" - "crawlab/config" - "crawlab/database" - _ "crawlab/docs" - validate2 "crawlab/lib/validate" - "crawlab/middlewares" - "crawlab/model" - "crawlab/routes" - "crawlab/services" - "crawlab/services/challenge" - "crawlab/services/rpc" "github.com/apex/log" + "github.com/crawlab-team/crawlab-core/config" + validate2 "github.com/crawlab-team/crawlab-core/lib/validate" + "github.com/crawlab-team/crawlab-core/middlewares" + "github.com/crawlab-team/crawlab-core/model" + "github.com/crawlab-team/crawlab-core/routes" + "github.com/crawlab-team/crawlab-core/services" + "github.com/crawlab-team/crawlab-core/services/rpc" + "github.com/crawlab-team/crawlab-db" "github.com/gin-gonic/gin" "github.com/gin-gonic/gin/binding" "github.com/go-playground/validator/v10" "github.com/olivere/elastic/v7" "github.com/spf13/viper" - "github.com/swaggo/gin-swagger" - "github.com/swaggo/gin-swagger/swaggerFiles" "net" "net/http" "os" @@ -29,11 +25,6 @@ import ( "time" ) -var swagHandler gin.HandlerFunc - -func init() { - swagHandler = ginSwagger.WrapHandler(swaggerFiles.Handler) -} func main() { app := gin.New() app.Use(gin.Logger(), gin.Recovery()) @@ -41,10 +32,6 @@ func main() { _ = v.RegisterValidation("bid", validate2.MongoID) } - if swagHandler != nil { - app.GET("/swagger/*any", swagHandler) - } - // 初始化配置 if err := config.InitConfig(""); err != nil { log.Error("init config error:" + err.Error()) @@ -52,7 +39,7 @@ func main() { } log.Info("initialized config successfully") // 初始化Mongodb数据库 - if err := database.InitMongo(); err != nil { + if err := db.InitMongo(); err != nil { log.Error("init mongodb error:" + err.Error()) debug.PrintStack() panic(err) @@ -60,7 +47,7 @@ func main() { log.Info("initialized mongodb successfully") // 初始化Redis数据库 - if err := database.InitRedis(); err != nil { + if err := db.InitRedis(); err != nil { log.Error("init redis error:" + err.Error()) debug.PrintStack() panic(err) @@ -107,14 +94,6 @@ func main() { } log.Info("initialized dependency fetcher successfully") - // 初始化挑战服务 - if err := challenge.InitChallengeService(); err != nil { - log.Error("init challenge service error:" + err.Error()) - debug.PrintStack() - panic(err) - } - log.Info("initialized challenge service successfully") - // 初始化清理服务 if err := services.InitCleanService(); err != nil { log.Error("init clean service error:" + err.Error()) @@ -238,13 +217,13 @@ func main() { } // 任务 { - authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 - authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 - authGroup.PUT("/tasks", routes.PutTask) // 派发任务 - authGroup.PUT("/tasks/batch", routes.PutBatchTasks) // 批量派发任务 - authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务 - authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务 - authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务 + authGroup.GET("/tasks", routes.GetTaskList) // 任务列表 + authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情 + authGroup.PUT("/tasks", routes.PutTask) // 派发任务 + authGroup.PUT("/tasks/batch", routes.PutBatchTasks) // 批量派发任务 + authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务 + authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务 + //authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务 authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务 authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志 authGroup.GET("/tasks/:id/error-log", routes.GetTaskErrorLog) // 任务错误日志 @@ -303,11 +282,6 @@ func main() { authGroup.POST("/projects/:id", routes.PostProject) // 新增 authGroup.DELETE("/projects/:id", routes.DeleteProject) // 删除 } - // 挑战 - { - authGroup.GET("/challenges", routes.GetChallengeList) // 挑战列表 - authGroup.POST("/challenges-check", routes.CheckChallengeList) // 检查挑战列表 - } // 操作 { //authGroup.GET("/actions", routes.GetActionList) // 操作列表 @@ -330,12 +304,6 @@ func main() { authGroup.GET("/git/public-key", routes.GetGitSshPublicKey) // 获取 SSH 公钥 authGroup.GET("/git/commits", routes.GetGitCommits) // 获取 Git Commits authGroup.POST("/git/checkout", routes.PostGitCheckout) // 获取 Git Commits - // 爬虫市场 / 仓库 - { - authGroup.GET("/repos", routes.GetRepoList) // 获取仓库列表 - authGroup.GET("/repos/sub-dir", routes.GetRepoSubDirList) // 获取仓库子目录 - authGroup.POST("/repos/download", routes.DownloadRepo) // 下载仓库 - } } } diff --git a/backend/middlewares/auth.go b/backend/middlewares/auth.go deleted file mode 100644 index 8ab27728..00000000 --- a/backend/middlewares/auth.go +++ /dev/null @@ -1,48 +0,0 @@ -package middlewares - -import ( - "crawlab/constants" - "crawlab/routes" - "crawlab/services" - "github.com/gin-gonic/gin" - "net/http" - "strings" -) - -func AuthorizationMiddleware() gin.HandlerFunc { - return func(c *gin.Context) { - // 获取token string - tokenStr := c.GetHeader("Authorization") - - // 校验token - user, err := services.CheckToken(tokenStr) - - // 校验失败,返回错误响应 - if err != nil { - c.AbortWithStatusJSON(http.StatusUnauthorized, routes.Response{ - Status: "ok", - Message: "unauthorized", - Error: "unauthorized", - }) - return - } - - // 如果为普通权限,校验请求地址是否符合要求 - if user.Role == constants.RoleNormal { - if strings.HasPrefix(strings.ToLower(c.Request.URL.Path), "/users") { - c.AbortWithStatusJSON(http.StatusUnauthorized, routes.Response{ - Status: "ok", - Message: "unauthorized", - Error: "unauthorized", - }) - return - } - } - - // 设置用户 - c.Set(constants.ContextUser, &user) - - // 校验成功 - c.Next() - } -} diff --git a/backend/middlewares/cors.go b/backend/middlewares/cors.go deleted file mode 100644 index 5397251d..00000000 --- a/backend/middlewares/cors.go +++ /dev/null @@ -1,19 +0,0 @@ -package middlewares - -import "github.com/gin-gonic/gin" - -func CORSMiddleware() gin.HandlerFunc { - return func(c *gin.Context) { - c.Writer.Header().Set("Access-Control-Allow-Origin", "*") - c.Writer.Header().Set("Access-Control-Allow-Credentials", "true") - c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With") - c.Writer.Header().Set("Access-Control-Allow-Methods", "DELETE, POST, OPTIONS, GET, PUT") - - if c.Request.Method == "OPTIONS" { - c.AbortWithStatus(204) - return - } - - c.Next() - } -} diff --git a/backend/middlewares/es_log.go b/backend/middlewares/es_log.go deleted file mode 100644 index 464a2a0a..00000000 --- a/backend/middlewares/es_log.go +++ /dev/null @@ -1,54 +0,0 @@ -package middlewares - -import ( - "bytes" - "context" - "fmt" - "github.com/gin-gonic/gin" - "github.com/olivere/elastic/v7" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "strconv" - "time" -) - -func EsLog(ctx context.Context, esClient *elastic.Client) gin.HandlerFunc { - - return func(c *gin.Context) { - // 开始时间 - crawlabIndex := viper.GetString("setting.crawlabLogIndex") - start := time.Now() - // 处理请求 - c.Next() - // 结束时间 - end := time.Now() - //执行时间 - latency := strconv.FormatInt(end.Sub(start).Nanoseconds()/1000, 10) - path := c.Request.URL.Path - - clientIP := c.ClientIP() - method := c.Request.Method - statusCode := strconv.Itoa(c.Writer.Status()) - buf := new(bytes.Buffer) - buf.ReadFrom(c.Request.Body) - b := buf.String() - accessLog := "costTime:" + latency + "ms--" + "StatusCode:" + statusCode + "--" + "Method:" + method + "--" + "ClientIp:" + clientIP + "--" + - "RequestURI:" + path + "--" + "Host:" + c.Request.Host + "--" + "UserAgent--" + c.Request.UserAgent() + "--RequestBody:" + - string(b) - WriteMsg(ctx, crawlabIndex, esClient, time.Now(), accessLog) - } - -} - -// WriteMsg will write the msg and level into es -func WriteMsg(ctx context.Context, crawlabIndex string, es *elastic.Client, when time.Time, msg string) error { - vals := make(map[string]interface{}) - vals["@timestamp"] = when.Format(time.RFC3339) - vals["@msg"] = msg - uid := uuid.NewV4().String() - _, err := es.Index().Index(crawlabIndex).Id(uid).BodyJson(vals).Refresh("wait_for").Do(ctx) - if err != nil { - fmt.Println(err) - } - return err -} diff --git a/backend/mock/base.go b/backend/mock/base.go deleted file mode 100644 index d8b11eb9..00000000 --- a/backend/mock/base.go +++ /dev/null @@ -1,16 +0,0 @@ -package mock - -type Response struct { - Status string `json:"status"` - Message string `json:"message"` - Data interface{} `json:"data"` - Error string `json:"error"` -} - -type ListResponse struct { - Status string `json:"status"` - Message string `json:"message"` - Total int `json:"total"` - Data interface{} `json:"data"` - Error string `json:"error"` -} diff --git a/backend/mock/file.go b/backend/mock/file.go deleted file mode 100644 index addd771a..00000000 --- a/backend/mock/file.go +++ /dev/null @@ -1,8 +0,0 @@ -package mock - -type File struct { - Name string `json:"name"` - Path string `json:"path"` - IsDir bool `json:"is_dir"` - Size int64 `json:"size"` -} diff --git a/backend/mock/node.go b/backend/mock/node.go deleted file mode 100644 index 2d3a978d..00000000 --- a/backend/mock/node.go +++ /dev/null @@ -1,222 +0,0 @@ -package mock - -import ( - "crawlab/entity" - "crawlab/model" - "crawlab/services" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" - "time" -) - -var NodeList = []model.Node{ - { - Id: bson.ObjectId("5d429e6c19f7abede924fee2"), - Ip: "10.32.35.15", - Name: "test1", - Status: "online", - Port: "8081", - Mac: "ac:12:df:12:fd", - Description: "For test1", - IsMaster: true, - UpdateTs: time.Now(), - CreateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - }, - { - Id: bson.ObjectId("5d429e6c19f7abede924fe22"), - Ip: "10.32.35.12", - Name: "test2", - Status: "online", - Port: "8082", - Mac: "ac:12:df:12:vh", - Description: "For test2", - IsMaster: true, - UpdateTs: time.Now(), - CreateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - }, -} - -var TaskList = []model.Task{ - { - Id: "1234", - SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"), - StartTs: time.Now(), - FinishTs: time.Now(), - Status: "进行中", - NodeId: bson.ObjectId("5d429e6c19f7abede924fee2"), - LogPath: "./log", - Cmd: "scrapy crawl test", - Error: "", - ResultCount: 0, - WaitDuration: 10.0, - RuntimeDuration: 10, - TotalDuration: 20, - SpiderName: "test", - NodeName: "test", - CreateTs: time.Now(), - UpdateTs: time.Now(), - }, - { - Id: "5678", - SpiderId: bson.ObjectId("5d429e6c19f7abede924fee2"), - StartTs: time.Now(), - FinishTs: time.Now(), - Status: "进行中", - NodeId: bson.ObjectId("5d429e6c19f7abede924fee2"), - LogPath: "./log", - Cmd: "scrapy crawl test2", - Error: "", - ResultCount: 0, - WaitDuration: 10.0, - RuntimeDuration: 10, - TotalDuration: 20, - SpiderName: "test", - NodeName: "test", - CreateTs: time.Now(), - UpdateTs: time.Now(), - }, -} - -var dataList = []services.Data{ - { - Mac: "ac:12:fc:fd:ds:dd", - Ip: "192.10.2.1", - Master: true, - UpdateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - }, - { - Mac: "22:12:fc:fd:ds:dd", - Ip: "182.10.2.2", - Master: true, - UpdateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - }, -} - -var executeble = []entity.Executable{ - { - Path: "/test", - FileName: "test.py", - DisplayName: "test.py", - }, -} -var systemInfo = entity.SystemInfo{ARCH: "x86", - OS: "linux", - Hostname: "test", - NumCpu: 4, - Executables: executeble, -} - -func GetNodeList(c *gin.Context) { - nodes := NodeList - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: nodes, - }) -} - -func GetNode(c *gin.Context) { - var result model.Node - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - for _, node := range NodeList { - if node.Id == bson.ObjectId(id) { - result = node - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: result, - }) -} - -func Ping(c *gin.Context) { - data := dataList[0] - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -func PostNode(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var oldItem model.Node - for _, node := range NodeList { - if node.Id == bson.ObjectId(id) { - oldItem = node - } - - } - log.Info(id) - var newItem model.Node - if err := c.ShouldBindJSON(&newItem); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - newItem.Id = oldItem.Id - - log.Info("Post Node success") - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func GetNodeTaskList(c *gin.Context) { - - tasks := TaskList - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tasks, - }) -} - -func DeleteNode(c *gin.Context) { - id := bson.ObjectId("5d429e6c19f7abede924fee2") - - for _, node := range NodeList { - if node.Id == id { - log.Infof("Delete a node") - } - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func GetSystemInfo(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - sysInfo := systemInfo - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: sysInfo, - }) -} diff --git a/backend/mock/node_test.go b/backend/mock/node_test.go deleted file mode 100644 index abd568c2..00000000 --- a/backend/mock/node_test.go +++ /dev/null @@ -1,198 +0,0 @@ -package mock - -import ( - "bytes" - "crawlab/model" - "encoding/json" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - . "github.com/smartystreets/goconvey/convey" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -var app *gin.Engine - -// 本测试依赖MongoDB的服务,所以在测试之前需要启动MongoDB及相关服务 -func init() { - app = gin.Default() - - // mock Test - // 节点相关的API - app.GET("/ping", Ping) - app.GET("/nodes", GetNodeList) // 节点列表 - app.GET("/nodes/:id", GetNode) // 节点详情 - app.POST("/nodes/:id", PostNode) // 修改节点 - app.GET("/nodes/:id/tasks", GetNodeTaskList) // 节点任务列表 - app.GET("/nodes/:id/system", GetSystemInfo) // 节点任务列表 - app.DELETE("/nodes/:id", DeleteNode) // 删除节点 - //// 爬虫 - app.GET("/stats/home", GetHomeStats) // 首页统计数据 - // 定时任务 - app.GET("/schedules", GetScheduleList) // 定时任务列表 - app.GET("/schedules/:id", GetSchedule) // 定时任务详情 - app.PUT("/schedules", PutSchedule) // 创建定时任务 - app.POST("/schedules/:id", PostSchedule) // 修改定时任务 - app.DELETE("/schedules/:id", DeleteSchedule) // 删除定时任务 - app.GET("/tasks", GetTaskList) // 任务列表 - app.GET("/tasks/:id", GetTask) // 任务详情 - app.PUT("/tasks", PutTask) // 派发任务 - app.DELETE("/tasks/:id", DeleteTask) // 删除任务 - app.GET("/tasks/:id/results", GetTaskResults) // 任务结果 - app.GET("/tasks/:id/results/download", DownloadTaskResultsCsv) // 下载任务结果 - app.GET("/spiders", GetSpiderList) // 爬虫列表 - app.GET("/spiders/:id", GetSpider) // 爬虫详情 - app.POST("/spiders/:id", PostSpider) // 修改爬虫 - app.DELETE("/spiders/:id", DeleteSpider) // 删除爬虫 - app.GET("/spiders/:id/tasks", GetSpiderTasks) // 爬虫任务列表 - app.GET("/spiders/:id/dir", GetSpiderDir) // 爬虫目录 -} - -//mock test, test data in ./mock -func TestGetNodeList(t *testing.T) { - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/nodes", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - - Convey("Test API GetNodeList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetNode(t *testing.T) { - var resp Response - var mongoId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/nodes/"+mongoId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - t.Log(resp.Data) - Convey("Test API GetNode", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - So(resp.Data.(map[string]interface{})["_id"], ShouldEqual, bson.ObjectId(mongoId).Hex()) - }) - }) -} - -func TestPing(t *testing.T) { - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/ping", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API ping", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetNodeTaskList(t *testing.T) { - var resp Response - var mongoId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/tasks", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API GetNodeTaskList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestDeleteNode(t *testing.T) { - var resp Response - - var mongoId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("DELETE", "nodes/"+mongoId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API DeleteNode", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestPostNode(t *testing.T) { - var newItem = model.Node{ - Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - Ip: "10.32.35.15", - Name: "test1", - Status: "online", - Port: "8081", - Mac: "ac:12:df:12:fd", - Description: "For test1", - IsMaster: true, - UpdateTs: time.Now(), - CreateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - } - - var resp Response - body, _ := json.Marshal(newItem) - - var mongoId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("POST", "nodes/"+mongoId, bytes.NewReader(body)) - - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - t.Log(resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API PostNode", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetSystemInfo(t *testing.T) { - var resp Response - var mongoId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "nodes/"+mongoId+"/system", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API GetSystemInfo", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} diff --git a/backend/mock/schedule.go b/backend/mock/schedule.go deleted file mode 100644 index e5c45546..00000000 --- a/backend/mock/schedule.go +++ /dev/null @@ -1,136 +0,0 @@ -package mock - -import ( - "crawlab/constants" - "crawlab/model" - "fmt" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" - "time" -) - -var NodeIdss = []bson.ObjectId{bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - bson.ObjectIdHex("5d429e6c19f7abede924fee1")} - -var scheduleList = []model.Schedule{ - { - Id: bson.ObjectId("5d429e6c19f7abede924fee2"), - Name: "test schedule", - SpiderId: "123", - NodeIds: NodeIdss, - Cron: "***1*", - EntryId: 10, - // 前端展示 - SpiderName: "test scedule", - - CreateTs: time.Now(), - UpdateTs: time.Now(), - }, - { - Id: bson.ObjectId("xx429e6c19f7abede924fee2"), - Name: "test schedule2", - SpiderId: "234", - NodeIds: NodeIdss, - Cron: "***1*", - EntryId: 10, - // 前端展示 - SpiderName: "test scedule2", - - CreateTs: time.Now(), - UpdateTs: time.Now(), - }, -} - -func GetScheduleList(c *gin.Context) { - results := scheduleList - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: results, - }) -} - -func GetSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var result model.Schedule - for _, sch := range scheduleList { - if sch.Id == bson.ObjectId(id) { - result = sch - } - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: result, - }) -} - -func PostSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var oldItem model.Schedule - for _, sch := range scheduleList { - if sch.Id == bson.ObjectId(id) { - oldItem = sch - } - - } - - var newItem model.Schedule - if err := c.ShouldBindJSON(&newItem); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - newItem.Id = oldItem.Id - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func PutSchedule(c *gin.Context) { - var item model.Schedule - - // 绑定数据模型 - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 如果node_id为空,则置为空ObjectId - for _, NodeId := range item.NodeIds { - if NodeId == "" { - NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func DeleteSchedule(c *gin.Context) { - id := bson.ObjectIdHex("5d429e6c19f7abede924fee2") - for _, sch := range scheduleList { - if sch.Id == id { - fmt.Println("delete a schedule") - } - } - fmt.Println(id) - fmt.Println("update schedule") - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/mock/schedule_test.go b/backend/mock/schedule_test.go deleted file mode 100644 index 87f1131a..00000000 --- a/backend/mock/schedule_test.go +++ /dev/null @@ -1,141 +0,0 @@ -package mock - -import ( - "bytes" - "crawlab/model" - "crawlab/utils" - "encoding/json" - "github.com/globalsign/mgo/bson" - . "github.com/smartystreets/goconvey/convey" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" -) - -func TestGetScheduleList(t *testing.T) { - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/schedules", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - t.Log(resp.Data) - Convey("Test API GetScheduleList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetSchedule(t *testing.T) { - var mongoId = "5d429e6c19f7abede924fee2" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/schedules/"+mongoId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API GetSchedule", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - So(resp.Data.(map[string]interface{})["_id"], ShouldEqual, bson.ObjectId(mongoId).Hex()) - }) - }) -} - -func TestDeleteSchedule(t *testing.T) { - var mongoId = "5d429e6c19f7abede924fee2" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("DELETE", "/schedules/"+mongoId, nil) - app.ServeHTTP(w, req) - - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - - Convey("Test DeleteSchedule", t, func() { - Convey("Test resp status", func() { - So(resp.Status, ShouldEqual, "ok") - }) - }) -} - -func TestPostSchedule(t *testing.T) { - var newItem = model.Schedule{ - Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - Name: "test schedule", - SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - NodeIds: NodeIdss, - Cron: "***1*", - EntryId: 10, - // 前端展示 - SpiderName: "test scedule", - - CreateTs: time.Now(), - UpdateTs: time.Now(), - } - - var resp Response - var mongoId = "5d429e6c19f7abede924fee2" - body, _ := json.Marshal(newItem) - w := httptest.NewRecorder() - req, _ := http.NewRequest("POST", "/schedules/"+mongoId, strings.NewReader(utils.BytesToString(body))) - app.ServeHTTP(w, req) - - err := json.Unmarshal(w.Body.Bytes(), &resp) - t.Log(resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API PostSchedule", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) - -} - -func TestPutSchedule(t *testing.T) { - var newItem = model.Schedule{ - Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - Name: "test schedule", - SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - NodeIds: NodeIdss, - Cron: "***1*", - EntryId: 10, - // 前端展示 - SpiderName: "test scedule", - - CreateTs: time.Now(), - UpdateTs: time.Now(), - } - - var resp Response - body, _ := json.Marshal(newItem) - w := httptest.NewRecorder() - req, _ := http.NewRequest("PUT", "/schedules", bytes.NewReader(body)) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - t.Log(resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API PutSchedule", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) - -} diff --git a/backend/mock/spider.go b/backend/mock/spider.go deleted file mode 100644 index 1994196e..00000000 --- a/backend/mock/spider.go +++ /dev/null @@ -1,187 +0,0 @@ -package mock - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "io/ioutil" - "net/http" - "os" - "path/filepath" - "time" -) - -var SpiderList = []model.Spider{ - { - Id: bson.ObjectId("5d429e6c19f7abede924fee2"), - Name: "For test", - DisplayName: "test", - Type: "test", - Col: "test", - Site: "www.baidu.com", - Envs: nil, - Src: "../app/spiders", - Cmd: "scrapy crawl test", - LastRunTs: time.Now(), - CreateTs: time.Now(), - UpdateTs: time.Now(), - UserId: constants.ObjectIdNull, - }, -} - -func GetSpiderList(c *gin.Context) { - - // mock get spider list from database - results := SpiderList - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: results, - }) -} - -func GetSpider(c *gin.Context) { - id := c.Param("id") - var result model.Spider - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - for _, spider := range SpiderList { - if spider.Id == bson.ObjectId(id) { - result = spider - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: result, - }) -} - -func PostSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - } - - var item model.Spider - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - log.Info("modify the item") - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} -func GetSpiderDir(c *gin.Context) { - // 爬虫ID - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 目录相对路径 - path := c.Query("path") - var spi model.Spider - - // 获取爬虫 - for _, spider := range SpiderList { - if spider.Id == bson.ObjectId(id) { - spi = spider - } - } - - // 获取目录下文件列表 - f, err := ioutil.ReadDir(filepath.Join(spi.Src, path)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 遍历文件列表 - var fileList []model.File - for _, file := range f { - fileList = append(fileList, model.File{ - Name: file.Name(), - IsDir: file.IsDir(), - Size: file.Size(), - Path: filepath.Join(path, file.Name()), - }) - } - - // 返回结果 - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: fileList, - }) -} - -func GetSpiderTasks(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var spider model.Spider - for _, spi := range SpiderList { - if spi.Id == bson.ObjectId(id) { - spider = spi - } - } - - var tasks model.Task - for _, task := range TaskList { - if task.SpiderId == spider.Id { - tasks = task - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tasks, - }) -} - -func DeleteSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - // 获取该爬虫,get this spider - var spider model.Spider - for _, spi := range SpiderList { - if spi.Id == bson.ObjectId(id) { - spider = spi - } - } - - // 删除爬虫文件目录,delete the spider dir - if err := os.RemoveAll(spider.Src); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 从数据库中删除该爬虫,delete this spider from database - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/mock/spider_test.go b/backend/mock/spider_test.go deleted file mode 100644 index 11e2c9cd..00000000 --- a/backend/mock/spider_test.go +++ /dev/null @@ -1,139 +0,0 @@ -package mock - -import ( - "bytes" - "crawlab/constants" - "crawlab/model" - "encoding/json" - "github.com/globalsign/mgo/bson" - . "github.com/smartystreets/goconvey/convey" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -func TestGetSpiderList(t *testing.T) { - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/spiders", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp faild") - } - Convey("Test API GetSpiderList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetSpider(t *testing.T) { - var resp Response - var spiderId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/spiders/"+spiderId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API GetSpider", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestPostSpider(t *testing.T) { - var spider = model.Spider{ - Id: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - Name: "For test", - DisplayName: "test", - Type: "test", - Col: "test", - Site: "www.baidu.com", - Envs: nil, - Src: "/app/spider", - Cmd: "scrapy crawl test", - LastRunTs: time.Now(), - CreateTs: time.Now(), - UpdateTs: time.Now(), - UserId: constants.ObjectIdNull, - } - var resp Response - var spiderId = "5d429e6c19f7abede924fee2" - w := httptest.NewRecorder() - body, _ := json.Marshal(spider) - req, _ := http.NewRequest("POST", "/spiders/"+spiderId, bytes.NewReader(body)) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API PostSpider", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) - -} - -func TestGetSpiderDir(t *testing.T) { - var spiderId = "5d429e6c19f7abede924fee2" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/dir", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API GetSpiderDir", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) - -} - -func TestGetSpiderTasks(t *testing.T) { - var spiderId = "5d429e6c19f7abede924fee2" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/spiders/"+spiderId+"/tasks", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API GetSpiderTasks", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestDeleteSpider(t *testing.T) { - var spiderId = "5d429e6c19f7abede924fee2" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("DELETE", "/spiders/"+spiderId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API DeleteSpider", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} diff --git a/backend/mock/stats.go b/backend/mock/stats.go deleted file mode 100644 index f0227da9..00000000 --- a/backend/mock/stats.go +++ /dev/null @@ -1,62 +0,0 @@ -package mock - -import ( - "crawlab/model" - "github.com/gin-gonic/gin" - "net/http" -) - -var taskDailyItems = []model.TaskDailyItem{ - { - Date: "2019/08/19", - TaskCount: 2, - AvgRuntimeDuration: 1000, - }, - { - Date: "2019/08/20", - TaskCount: 3, - AvgRuntimeDuration: 10130, - }, -} - -func GetHomeStats(c *gin.Context) { - type DataOverview struct { - TaskCount int `json:"task_count"` - SpiderCount int `json:"spider_count"` - ActiveNodeCount int `json:"active_node_count"` - ScheduleCount int `json:"schedule_count"` - } - - type Data struct { - Overview DataOverview `json:"overview"` - Daily []model.TaskDailyItem `json:"daily"` - } - - // 任务总数 - taskCount := 10 - - // 在线节点总数 - activeNodeCount := 4 - - // 爬虫总数 - spiderCount := 5 - // 定时任务数 - scheduleCount := 2 - - // 每日任务数 - items := taskDailyItems - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: DataOverview{ - ActiveNodeCount: activeNodeCount, - TaskCount: taskCount, - SpiderCount: spiderCount, - ScheduleCount: scheduleCount, - }, - Daily: items, - }, - }) -} diff --git a/backend/mock/stats_test.go b/backend/mock/stats_test.go deleted file mode 100644 index a94e52d4..00000000 --- a/backend/mock/stats_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package mock - -import ( - "encoding/json" - "fmt" - . "github.com/smartystreets/goconvey/convey" - "net/http" - "net/http/httptest" - "testing" -) - -func TestGetHomeStats(t *testing.T) { - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/stats/home", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - fmt.Println(resp.Data) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - - Convey("Test API GetHomeStats", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} diff --git a/backend/mock/system.go b/backend/mock/system.go deleted file mode 100644 index f33e02ba..00000000 --- a/backend/mock/system.go +++ /dev/null @@ -1 +0,0 @@ -package mock diff --git a/backend/mock/task.go b/backend/mock/task.go deleted file mode 100644 index 3a2c8fcd..00000000 --- a/backend/mock/task.go +++ /dev/null @@ -1,236 +0,0 @@ -package mock - -import ( - "bytes" - "crawlab/constants" - "crawlab/model" - "crawlab/utils" - "encoding/csv" - "fmt" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "github.com/satori/go.uuid" - "net/http" -) - -type TaskListRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` - NodeId string `form:"node_id"` - SpiderId string `form:"spider_id"` -} - -type TaskResultsRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` -} - -func GetTaskList(c *gin.Context) { - // 绑定数据 - data := TaskListRequestData{} - - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - if data.PageNum == 0 { - data.PageNum = 1 - } - if data.PageSize == 0 { - data.PageNum = 10 - } - - // 过滤条件 - query := bson.M{} - if data.NodeId != "" { - query["node_id"] = bson.ObjectIdHex(data.NodeId) - } - if data.SpiderId != "" { - query["spider_id"] = bson.ObjectIdHex(data.SpiderId) - } - - // 获取任务列表 - tasks := TaskList - - // 获取总任务数 - total := len(TaskList) - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Total: total, - Data: tasks, - }) -} - -func GetTask(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var result model.Task - for _, task := range TaskList { - if task.Id == id { - result = task - } - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: result, - }) -} - -func PutTask(c *gin.Context) { - // 生成任务ID,generate task ID - id := uuid.NewV4() - - // 绑定数据 - var t model.Task - if err := c.ShouldBindJSON(&t); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - t.Id = id.String() - t.Status = constants.StatusPending - - // 如果没有传入node_id,则置为null - if t.NodeId.Hex() == "" { - t.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - // 将任务存入数据库,put the task into database - fmt.Println("put the task into database") - - // 加入任务队列, put the task into task queue - fmt.Println("put the task into task queue") - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func DeleteTask(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - for _, task := range TaskList { - if task.Id == id { - fmt.Println("delete the task") - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func GetTaskResults(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 绑定数据 - data := TaskResultsRequestData{} - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 获取任务 - var task model.Task - for _, ta := range TaskList { - if ta.Id == id { - task = ta - } - } - - fmt.Println(task) - // 获取结果 - var results interface{} - total := len(TaskList) - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: results, - Total: total, - }) -} - -func DownloadTaskResultsCsv(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 获取任务 - var task model.Task - for _, ta := range TaskList { - if ta.Id == id { - task = ta - } - } - fmt.Println(task) - - // 获取结果 - var results []interface { - } - - // 字段列表 - var columns []string - if len(results) == 0 { - columns = []string{} - } else { - item := results[0].(bson.M) - for key := range item { - columns = append(columns, key) - } - } - - // 缓冲 - bytesBuffer := &bytes.Buffer{} - - // 写入UTF-8 BOM,避免使用Microsoft Excel打开乱码 - bytesBuffer.WriteString("\xEF\xBB\xBF") - - writer := csv.NewWriter(bytesBuffer) - - // 写入表头 - if err := writer.Write(columns); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 写入内容 - for _, result := range results { - // 将result转换为[]string - item := result.(bson.M) - var values []string - for _, col := range columns { - value := utils.InterfaceToString(item[col]) - values = append(values, value) - } - - // 写入数据 - if err := writer.Write(values); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 此时才会将缓冲区数据写入 - writer.Flush() - - // 设置下载的文件名 - c.Writer.Header().Set("Content-Disposition", "attachment;filename=data.csv") - - // 设置文件类型以及输出数据 - c.Data(http.StatusOK, "text/csv", bytesBuffer.Bytes()) -} diff --git a/backend/mock/task_test.go b/backend/mock/task_test.go deleted file mode 100644 index 1cd4ccfa..00000000 --- a/backend/mock/task_test.go +++ /dev/null @@ -1,138 +0,0 @@ -package mock - -import ( - "bytes" - "crawlab/model" - "encoding/json" - "github.com/globalsign/mgo/bson" - . "github.com/smartystreets/goconvey/convey" - "net/http" - "net/http/httptest" - "testing" - "time" -) - -func TestGetTaskList(t *testing.T) { - //var teskListRequestFrom = TaskListRequestData{ - // PageNum: 2, - // PageSize: 10, - // NodeId: "434221grfsf", - // SpiderId: "fdfewqrftea", - //} - - var resp ListResponse - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/tasks?PageNum=2&PageSize=10&NodeId=342dfsff&SpiderId=f8dsf", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - - Convey("Test API GetNodeList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - So(resp.Total, ShouldEqual, 2) - }) - }) -} - -func TestGetTask(t *testing.T) { - var resp Response - var taskId = "1234" - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/tasks/"+taskId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - Convey("Test API GetTask", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestPutTask(t *testing.T) { - var newItem = model.Task{ - Id: "1234", - SpiderId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - StartTs: time.Now(), - FinishTs: time.Now(), - Status: "online", - NodeId: bson.ObjectIdHex("5d429e6c19f7abede924fee2"), - LogPath: "./log", - Cmd: "scrapy crawl test", - Error: "", - ResultCount: 0, - WaitDuration: 10.0, - RuntimeDuration: 10, - TotalDuration: 20, - SpiderName: "test", - NodeName: "test", - CreateTs: time.Now(), - UpdateTs: time.Now(), - } - - var resp Response - body, _ := json.Marshal(&newItem) - w := httptest.NewRecorder() - req, _ := http.NewRequest("PUT", "/tasks", bytes.NewReader(body)) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API PutTask", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestDeleteTask(t *testing.T) { - taskId := "1234" - var resp Response - w := httptest.NewRecorder() - req, _ := http.NewRequest("DELETE", "/tasks/"+taskId, nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("unmarshal resp failed") - } - Convey("Test API DeleteTask", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - }) - }) -} - -func TestGetTaskResults(t *testing.T) { - //var teskListResultFrom = TaskResultsRequestData{ - // PageNum: 2, - // PageSize: 1, - //} - taskId := "1234" - - var resp ListResponse - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/tasks/"+taskId+"/results?PageNum=2&PageSize=1", nil) - app.ServeHTTP(w, req) - err := json.Unmarshal(w.Body.Bytes(), &resp) - if err != nil { - t.Fatal("Unmarshal resp failed") - } - - Convey("Test API GetNodeList", t, func() { - Convey("Test response status", func() { - So(resp.Status, ShouldEqual, "ok") - So(resp.Message, ShouldEqual, "success") - So(resp.Total, ShouldEqual, 2) - }) - }) -} diff --git a/backend/mock/user.go b/backend/mock/user.go deleted file mode 100644 index f33e02ba..00000000 --- a/backend/mock/user.go +++ /dev/null @@ -1 +0,0 @@ -package mock diff --git a/backend/mock/utils.go b/backend/mock/utils.go deleted file mode 100644 index fd7d4efd..00000000 --- a/backend/mock/utils.go +++ /dev/null @@ -1,24 +0,0 @@ -package mock - -import ( - "github.com/gin-gonic/gin" - "runtime/debug" -) - -func HandleError(statusCode int, c *gin.Context, err error) { - debug.PrintStack() - c.JSON(statusCode, Response{ - Status: "ok", - Message: "error", - Error: err.Error(), - }) -} - -func HandleErrorF(statusCode int, c *gin.Context, err string) { - debug.PrintStack() - c.JSON(statusCode, Response{ - Status: "ok", - Message: "error", - Error: err, - }) -} diff --git a/backend/model/action.go b/backend/model/action.go deleted file mode 100644 index 15406181..00000000 --- a/backend/model/action.go +++ /dev/null @@ -1,162 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Action struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - Type string `json:"type" bson:"type"` - - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (a *Action) Save() error { - s, c := database.GetCol("actions") - defer s.Close() - - a.UpdateTs = time.Now() - - if err := c.UpdateId(a.Id, a); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (a *Action) Add() error { - s, c := database.GetCol("actions") - defer s.Close() - - a.Id = bson.NewObjectId() - a.UpdateTs = time.Now() - a.CreateTs = time.Now() - if err := c.Insert(a); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func GetAction(id bson.ObjectId) (Action, error) { - s, c := database.GetCol("actions") - defer s.Close() - var user Action - if err := c.Find(bson.M{"_id": id}).One(&user); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return user, err - } - return user, nil -} - -func GetActionList(filter interface{}, skip int, limit int, sortKey string) ([]Action, error) { - s, c := database.GetCol("actions") - defer s.Close() - - var actions []Action - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&actions); err != nil { - debug.PrintStack() - return actions, err - } - return actions, nil -} - -func GetActionListTotal(filter interface{}) (int, error) { - s, c := database.GetCol("actions") - defer s.Close() - - var result int - result, err := c.Find(filter).Count() - if err != nil { - return result, err - } - return result, nil -} - -func GetVisitDays(uid bson.ObjectId) (int, error) { - type ResData struct { - Days int `json:"days" bson:"days"` - } - s, c := database.GetCol("actions") - defer s.Close() - - pipeline := []bson.M{ - { - "$match": bson.M{ - "user_id": uid, - "type": constants.ActionTypeVisit, - }, - }, - { - "$addFields": bson.M{ - "date": bson.M{ - "$dateToString": bson.M{ - "format": "%Y%m%d", - "date": "$create_ts", - "timezone": "Asia/Shanghai", - }, - }, - }, - }, - { - "$group": bson.M{ - "_id": "$date", - }, - }, - { - "_id": nil, - "days": bson.M{"$sum": 1}, - }, - } - - var resData []ResData - if err := c.Pipe(pipeline).All(&resData); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return 0, err - } - - return resData[0].Days, nil -} - -func UpdateAction(id bson.ObjectId, item Action) error { - s, c := database.GetCol("actions") - defer s.Close() - - var result Action - if err := c.FindId(id).One(&result); err != nil { - debug.PrintStack() - return err - } - - if err := item.Save(); err != nil { - return err - } - return nil -} - -func RemoveAction(id bson.ObjectId) error { - s, c := database.GetCol("actions") - defer s.Close() - - var result Action - if err := c.FindId(id).One(&result); err != nil { - return err - } - - if err := c.RemoveId(id); err != nil { - return err - } - - return nil -} diff --git a/backend/model/base.go b/backend/model/base.go deleted file mode 100644 index 70d031f3..00000000 --- a/backend/model/base.go +++ /dev/null @@ -1,12 +0,0 @@ -package model - -type Base struct { -} - -func (b *Base) Save() error { - return nil -} - -func (b *Base) Delete() error { - return nil -} diff --git a/backend/model/challenge.go b/backend/model/challenge.go deleted file mode 100644 index 09f4db89..00000000 --- a/backend/model/challenge.go +++ /dev/null @@ -1,187 +0,0 @@ -package model - -import ( - "crawlab/database" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Challenge struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Name string `json:"name" bson:"name"` - TitleCn string `json:"title_cn" bson:"title_cn"` - TitleEn string `json:"title_en" bson:"title_en"` - DescriptionCn string `json:"description_cn" bson:"description_cn"` - DescriptionEn string `json:"description_en" bson:"description_en"` - Difficulty int `json:"difficulty" bson:"difficulty"` - Path string `json:"path" bson:"path"` - - // 前端展示 - Achieved bool `json:"achieved" bson:"achieved"` - - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (ch *Challenge) Save() error { - s, c := database.GetCol("challenges") - defer s.Close() - - ch.UpdateTs = time.Now() - - if err := c.UpdateId(ch.Id, ch); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (ch *Challenge) Add() error { - s, c := database.GetCol("challenges") - defer s.Close() - - ch.Id = bson.NewObjectId() - ch.UpdateTs = time.Now() - ch.CreateTs = time.Now() - if err := c.Insert(ch); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func GetChallenge(id bson.ObjectId) (Challenge, error) { - s, c := database.GetCol("challenges") - defer s.Close() - - var ch Challenge - if err := c.Find(bson.M{"_id": id}).One(&ch); err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - return ch, err - } - } - - return ch, nil -} - -func GetChallengeByName(name string) (Challenge, error) { - s, c := database.GetCol("challenges") - defer s.Close() - - var ch Challenge - if err := c.Find(bson.M{"name": name}).One(&ch); err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - return ch, err - } - } - - return ch, nil -} - -func GetChallengeList(filter interface{}, skip int, limit int, sortKey string) ([]Challenge, error) { - s, c := database.GetCol("challenges") - defer s.Close() - - var challenges []Challenge - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challenges); err != nil { - debug.PrintStack() - return challenges, err - } - - return challenges, nil -} - -func GetChallengeListWithAchieved(filter interface{}, skip int, limit int, sortKey string, uid bson.ObjectId) ([]Challenge, error) { - challenges, err := GetChallengeList(filter, skip, limit, sortKey) - if err != nil { - return challenges, err - } - - for i, ch := range challenges { - query := bson.M{ - "user_id": uid, - "challenge_id": ch.Id, - } - - list, err := GetChallengeAchievementList(query, 0, 1, "-_id") - if err != nil { - continue - } - - challenges[i].Achieved = len(list) > 0 - } - - return challenges, nil -} - -func GetChallengeListTotal(filter interface{}) (int, error) { - s, c := database.GetCol("challenges") - defer s.Close() - - var result int - result, err := c.Find(filter).Count() - if err != nil { - return result, err - } - return result, nil -} - -type ChallengeAchievement struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - ChallengeId bson.ObjectId `json:"challenge_id" bson:"challenge_id"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (ca *ChallengeAchievement) Save() error { - s, c := database.GetCol("challenges_achievements") - defer s.Close() - - ca.UpdateTs = time.Now() - - if err := c.UpdateId(ca.Id, c); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (ca *ChallengeAchievement) Add() error { - s, c := database.GetCol("challenges_achievements") - defer s.Close() - - ca.Id = bson.NewObjectId() - ca.UpdateTs = time.Now() - ca.CreateTs = time.Now() - if err := c.Insert(ca); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func GetChallengeAchievementList(filter interface{}, skip int, limit int, sortKey string) ([]ChallengeAchievement, error) { - s, c := database.GetCol("challenges_achievements") - defer s.Close() - - var challengeAchievements []ChallengeAchievement - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challengeAchievements); err != nil { - debug.PrintStack() - return challengeAchievements, err - } - - return challengeAchievements, nil -} diff --git a/backend/model/config_spider/common.go b/backend/model/config_spider/common.go deleted file mode 100644 index 4d244fe1..00000000 --- a/backend/model/config_spider/common.go +++ /dev/null @@ -1,26 +0,0 @@ -package config_spider - -import "crawlab/entity" - -func GetAllFields(data entity.ConfigSpiderData) []entity.Field { - var fields []entity.Field - for _, stage := range data.Stages { - for _, field := range stage.Fields { - fields = append(fields, field) - } - } - return fields -} - -func GetStartStageName(data entity.ConfigSpiderData) string { - // 如果 start_stage 设置了且在 stages 里,则返回 - if data.StartStage != "" { - return data.StartStage - } - - // 否则返回第一个 stage - for _, stage := range data.Stages { - return stage.Name - } - return "" -} diff --git a/backend/model/config_spider/scrapy.go b/backend/model/config_spider/scrapy.go deleted file mode 100644 index cbdf4a09..00000000 --- a/backend/model/config_spider/scrapy.go +++ /dev/null @@ -1,263 +0,0 @@ -package config_spider - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "crawlab/utils" - "errors" - "fmt" - "path/filepath" -) - -type ScrapyGenerator struct { - Spider model.Spider - ConfigData entity.ConfigSpiderData -} - -// 生成爬虫文件 -func (g ScrapyGenerator) Generate() error { - // 生成 items.py - if err := g.ProcessItems(); err != nil { - return err - } - - // 生成 spider.py - if err := g.ProcessSpider(); err != nil { - return err - } - return nil -} - -// 生成 items.py -func (g ScrapyGenerator) ProcessItems() error { - // 待处理文件名 - src := g.Spider.Src - filePath := filepath.Join(src, "config_spider", "items.py") - - // 获取所有字段 - fields := g.GetAllFields() - - // 字段名列表(包含默认字段名) - fieldNames := []string{ - "_id", - "task_id", - "ts", - } - - // 加入字段 - for _, field := range fields { - fieldNames = append(fieldNames, field.Name) - } - - // 将字段名转化为python代码 - str := "" - for _, fieldName := range fieldNames { - line := g.PadCode(fmt.Sprintf("%s = scrapy.Field()", fieldName), 1) - str += line - } - - // 将占位符替换为代码 - if err := utils.SetFileVariable(filePath, constants.AnchorItems, str); err != nil { - return err - } - - return nil -} - -// 生成 spider.py -func (g ScrapyGenerator) ProcessSpider() error { - // 待处理文件名 - src := g.Spider.Src - filePath := filepath.Join(src, "config_spider", "spiders", "spider.py") - - // 替换 start_stage - if err := utils.SetFileVariable(filePath, constants.AnchorStartStage, "parse_"+GetStartStageName(g.ConfigData)); err != nil { - return err - } - - // 替换 start_url - if err := utils.SetFileVariable(filePath, constants.AnchorStartUrl, g.ConfigData.StartUrl); err != nil { - return err - } - - // 替换 parsers - strParser := "" - for _, stage := range g.ConfigData.Stages { - stageName := stage.Name - stageStr := g.GetParserString(stageName, stage) - strParser += stageStr - } - if err := utils.SetFileVariable(filePath, constants.AnchorParsers, strParser); err != nil { - return err - } - - return nil -} - -func (g ScrapyGenerator) GetParserString(stageName string, stage entity.Stage) string { - // 构造函数定义行 - strDef := g.PadCode(fmt.Sprintf("def parse_%s(self, response):", stageName), 1) - - strParse := "" - if stage.IsList { - // 列表逻辑 - strParse = g.GetListParserString(stageName, stage) - } else { - // 非列表逻辑 - strParse = g.GetNonListParserString(stageName, stage) - } - - // 构造 - str := fmt.Sprintf(`%s%s`, strDef, strParse) - - return str -} - -func (g ScrapyGenerator) PadCode(str string, num int) string { - res := "" - for i := 0; i < num; i++ { - res += " " - } - res += str - res += "\n" - return res -} - -func (g ScrapyGenerator) GetNonListParserString(stageName string, stage entity.Stage) string { - str := "" - - // 获取或构造item - str += g.PadCode("item = Item() if response.meta.get('item') is None else response.meta.get('item')", 2) - - // 遍历字段列表 - for _, f := range stage.Fields { - line := fmt.Sprintf(`item['%s'] = response.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f)) - line = g.PadCode(line, 2) - str += line - } - - // next stage 字段 - if f, err := g.GetNextStageField(stage); err == nil { - // 如果找到 next stage 字段,进行下一个回调 - str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url="get_real_url(response, item['%s'])", callback=self.parse_%s, meta={'item': item})`, f.Name, f.NextStage), 2) - } else { - // 如果没找到 next stage 字段,返回 item - str += g.PadCode(fmt.Sprintf(`yield item`), 2) - } - - // 加入末尾换行 - str += g.PadCode("", 0) - - return str -} - -func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stage) string { - str := "" - - // 获取前一个 stage 的 item - str += g.PadCode(`prev_item = response.meta.get('item')`, 2) - - // for 循环遍历列表 - str += g.PadCode(fmt.Sprintf(`for elem in response.%s:`, g.GetListString(stage)), 2) - - // 构造item - str += g.PadCode(`item = Item()`, 3) - - // 遍历字段列表 - for _, f := range stage.Fields { - line := fmt.Sprintf(`item['%s'] = elem.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f)) - line = g.PadCode(line, 3) - str += line - } - - // 把前一个 stage 的 item 值赋给当前 item - str += g.PadCode(`if prev_item is not None:`, 3) - str += g.PadCode(`for key, value in prev_item.items():`, 4) - str += g.PadCode(`item[key] = value`, 5) - - // next stage 字段 - if f, err := g.GetNextStageField(stage); err == nil { - // 如果 url 为空,则不进入下一个 stage - str += g.PadCode(fmt.Sprintf(`if not item['%s']:`, f.Name), 3) - str += g.PadCode(`continue`, 4) - - // 如果找到 next stage 字段,进行下一个回调 - str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, item['%s']), callback=self.parse_%s, meta={'item': item})`, f.Name, f.NextStage), 3) - } else { - // 如果没找到 next stage 字段,返回 item - str += g.PadCode(fmt.Sprintf(`yield item`), 3) - } - - // 分页 - if stage.PageCss != "" || stage.PageXpath != "" { - str += g.PadCode(fmt.Sprintf(`next_url = response.%s.extract_first()`, g.GetExtractStringFromStage(stage)), 2) - str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': prev_item})`, stageName), 2) - } - - // 加入末尾换行 - str += g.PadCode("", 0) - - return str -} - -// 获取所有字段 -func (g ScrapyGenerator) GetAllFields() []entity.Field { - return GetAllFields(g.ConfigData) -} - -// 获取包含 next stage 的字段 -func (g ScrapyGenerator) GetNextStageField(stage entity.Stage) (entity.Field, error) { - for _, field := range stage.Fields { - if field.NextStage != "" { - return field, nil - } - } - return entity.Field{}, errors.New("cannot find next stage field") -} - -func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string { - if f.Css != "" { - // 如果为CSS - if f.Attr == "" { - // 文本 - return fmt.Sprintf(`css('%s::text')`, f.Css) - } else { - // 属性 - return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr) - } - } else { - // 如果为XPath - if f.Attr == "" { - // 文本 - return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath) - } else { - // 属性 - return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr) - } - } -} - -func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string { - // 分页元素属性,默认为 href - pageAttr := "href" - if stage.PageAttr != "" { - pageAttr = stage.PageAttr - } - - if stage.PageCss != "" { - // 如果为CSS - return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr) - } else { - // 如果为XPath - return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr) - } -} - -func (g ScrapyGenerator) GetListString(stage entity.Stage) string { - if stage.ListCss != "" { - return fmt.Sprintf(`css('%s')`, stage.ListCss) - } else { - return fmt.Sprintf(`xpath('%s')`, stage.ListXpath) - } -} diff --git a/backend/model/file.go b/backend/model/file.go deleted file mode 100644 index a2ad34eb..00000000 --- a/backend/model/file.go +++ /dev/null @@ -1,78 +0,0 @@ -package model - -import ( - "crawlab/database" - "crawlab/utils" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "os" - "runtime/debug" - "time" -) - -type GridFs struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - ChunkSize int32 `json:"chunk_size" bson:"chunkSize"` - UploadDate time.Time `json:"upload_date" bson:"uploadDate"` - Length int32 `json:"length" bson:"length"` - Md5 string `json:"md_5" bson:"md5"` - Filename string `json:"filename" bson:"filename"` -} - -type File struct { - Name string `json:"name"` - Path string `json:"path"` - RelativePath string `json:"relative_path"` - IsDir bool `json:"is_dir"` - Size int64 `json:"size"` - Children []File `json:"children"` - Label string `json:"label"` -} - -func (f *GridFs) Remove() { - s, gf := database.GetGridFs("files") - defer s.Close() - if err := gf.RemoveId(f.Id); err != nil { - log.Errorf("remove file id error: %s, id: %s", err.Error(), f.Id.Hex()) - debug.PrintStack() - } -} - -func GetAllGridFs() []*GridFs { - s, gf := database.GetGridFs("files") - defer s.Close() - - var files []*GridFs - if err := gf.Find(nil).All(&files); err != nil { - log.Errorf("get all files error: {}", err.Error()) - debug.PrintStack() - return nil - } - return files -} - -func GetGridFs(id bson.ObjectId) *GridFs { - s, gf := database.GetGridFs("files") - defer s.Close() - - var gfFile GridFs - err := gf.Find(bson.M{"_id": id}).One(&gfFile) - if err != nil { - log.Errorf("get gf file error: %s, file_id: %s", err.Error(), id.Hex()) - debug.PrintStack() - return nil - } - return &gfFile -} - -func RemoveFile(path string) error { - if !utils.Exists(path) { - log.Info("file not found: " + path) - debug.PrintStack() - return nil - } - if err := os.RemoveAll(path); err != nil { - return err - } - return nil -} diff --git a/backend/model/log.go b/backend/model/log.go deleted file mode 100644 index fecf7def..00000000 --- a/backend/model/log.go +++ /dev/null @@ -1,167 +0,0 @@ -package model - -import ( - "crawlab/database" - "crawlab/utils" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "os" - "runtime/debug" - "time" -) - -type LogItem struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Message string `json:"msg" bson:"msg"` - TaskId string `json:"task_id" bson:"task_id"` - Seq int64 `json:"seq" bson:"seq"` - Ts time.Time `json:"ts" bson:"ts"` - ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"` -} - -type ErrorLogItem struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - TaskId string `json:"task_id" bson:"task_id"` - Message string `json:"msg" bson:"msg"` - LogId bson.ObjectId `json:"log_id" bson:"log_id"` - Seq int64 `json:"seq" bson:"seq"` - Ts time.Time `json:"ts" bson:"ts"` - ExpireTs time.Time `json:"expire_ts" bson:"expire_ts"` -} - -// 获取本地日志 -func GetLocalLog(logPath string) (fileBytes []byte, err error) { - - f, err := os.Open(logPath) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return nil, err - } - fi, err := f.Stat() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return nil, err - } - defer utils.Close(f) - - const bufLen = 2 * 1024 * 1024 - logBuf := make([]byte, bufLen) - - off := int64(0) - if fi.Size() > int64(len(logBuf)) { - off = fi.Size() - int64(len(logBuf)) - } - n, err := f.ReadAt(logBuf, off) - - //到文件结尾会有EOF标识 - if err != nil && err.Error() != "EOF" { - log.Error(err.Error()) - debug.PrintStack() - return nil, err - } - logBuf = logBuf[:n] - return logBuf, nil -} - -func AddLogItem(l LogItem) error { - s, c := database.GetCol("logs") - defer s.Close() - if err := c.Insert(l); err != nil { - log.Errorf("insert log error: " + err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func AddLogItems(ls []LogItem) error { - if len(ls) == 0 { - return nil - } - s, c := database.GetCol("logs") - defer s.Close() - var docs []interface{} - for _, l := range ls { - docs = append(docs, l) - } - if err := c.Insert(docs...); err != nil { - log.Errorf("insert log error: " + err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func AddErrorLogItem(e ErrorLogItem) error { - s, c := database.GetCol("error_logs") - defer s.Close() - var l LogItem - err := c.FindId(bson.M{"log_id": e.LogId}).One(&l) - if err != nil && err == mgo.ErrNotFound { - if err := c.Insert(e); err != nil { - log.Errorf("insert log error: " + err.Error()) - debug.PrintStack() - return err - } - } - return nil -} - -func GetLogItemList(query bson.M, keyword string, skip int, limit int, sortStr string) ([]LogItem, error) { - s, c := database.GetCol("logs") - defer s.Close() - - filter := query - - var logItems []LogItem - if keyword == "" { - filter["seq"] = bson.M{ - "$gte": skip, - "$lt": skip + limit, - } - if err := c.Find(filter).Sort(sortStr).All(&logItems); err != nil { - debug.PrintStack() - return logItems, err - } - } else { - filter["msg"] = bson.M{ - "$regex": bson.RegEx{ - Pattern: keyword, - Options: "i", - }, - } - if err := c.Find(filter).Sort(sortStr).Skip(skip).Limit(limit).All(&logItems); err != nil { - debug.PrintStack() - return logItems, err - } - } - - return logItems, nil -} - -func GetLogItemTotal(query bson.M, keyword string) (int, error) { - s, c := database.GetCol("logs") - defer s.Close() - - filter := query - - if keyword != "" { - filter["msg"] = bson.M{ - "$regex": bson.RegEx{ - Pattern: keyword, - Options: "i", - }, - } - } - - total, err := c.Find(filter).Count() - if err != nil { - debug.PrintStack() - return total, err - } - - return total, nil -} diff --git a/backend/model/market/repo.go b/backend/model/market/repo.go deleted file mode 100644 index 33697879..00000000 --- a/backend/model/market/repo.go +++ /dev/null @@ -1,4 +0,0 @@ -package market - -type Repo struct { -} diff --git a/backend/model/node.go b/backend/model/node.go deleted file mode 100644 index 2b7f193a..00000000 --- a/backend/model/node.go +++ /dev/null @@ -1,232 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "errors" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "runtime/debug" - "time" -) - -type Node struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Name string `json:"name" bson:"name"` - Status string `json:"status" bson:"status"` - Ip string `json:"ip" bson:"ip"` - Port string `json:"port" bson:"port"` - Mac string `json:"mac" bson:"mac"` - Hostname string `json:"hostname" bson:"hostname"` - Description string `json:"description" bson:"description"` - // 用于唯一标识节点,可能是mac地址,可能是ip地址 - Key string `json:"key" bson:"key"` - - // 前端展示 - IsMaster bool `json:"is_master" bson:"is_master"` - - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTsUnix int64 `json:"update_ts_unix" bson:"update_ts_unix"` -} - -const ( - Yes = "Y" -) - -// 当前节点是否为主节点 -func IsMaster() bool { - return viper.GetString("server.master") == Yes -} - -func (n *Node) Save() error { - s, c := database.GetCol("nodes") - defer s.Close() - n.UpdateTs = time.Now() - if err := c.UpdateId(n.Id, n); err != nil { - return err - } - return nil -} - -func (n *Node) Add() error { - s, c := database.GetCol("nodes") - defer s.Close() - n.Id = bson.NewObjectId() - n.UpdateTs = time.Now() - n.UpdateTsUnix = time.Now().Unix() - n.CreateTs = time.Now() - if err := c.Insert(&n); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (n *Node) Delete() error { - s, c := database.GetCol("nodes") - defer s.Close() - if err := c.RemoveId(n.Id); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (n *Node) GetTasks() ([]Task, error) { - tasks, err := GetTaskList(bson.M{"node_id": n.Id}, 0, 10, "-create_ts") - //tasks, err := GetTaskList(nil, 0, 10, "-create_ts") - if err != nil { - debug.PrintStack() - return []Task{}, err - } - - return tasks, nil -} - -// 节点列表 -func GetNodeList(filter interface{}) ([]Node, error) { - s, c := database.GetCol("nodes") - defer s.Close() - - var results []Node - if err := c.Find(filter).All(&results); err != nil { - log.Error("get node list error: " + err.Error()) - debug.PrintStack() - return results, err - } - return results, nil -} - -// 节点信息 -func GetNode(id bson.ObjectId) (Node, error) { - var node Node - - if id.Hex() == "" { - log.Infof("id is empty") - debug.PrintStack() - return node, errors.New("id is empty") - } - - s, c := database.GetCol("nodes") - defer s.Close() - - if err := c.FindId(id).One(&node); err != nil { - //log.Errorf("get node error: %s, id: %s", err.Error(), id.Hex()) - //debug.PrintStack() - return node, err - } - return node, nil -} - -// 节点信息 -func GetNodeByKey(key string) (Node, error) { - s, c := database.GetCol("nodes") - defer s.Close() - - var node Node - if err := c.Find(bson.M{"key": key}).One(&node); err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - } - return node, err - } - return node, nil -} - -// 更新节点 -func UpdateNode(id bson.ObjectId, item Node) error { - s, c := database.GetCol("nodes") - defer s.Close() - - var node Node - if err := c.FindId(id).One(&node); err != nil { - return err - } - - if err := item.Save(); err != nil { - return err - } - return nil -} - -// 任务列表 -func GetNodeTaskList(id bson.ObjectId) ([]Task, error) { - node, err := GetNode(id) - if err != nil { - return []Task{}, err - } - tasks, err := node.GetTasks() - if err != nil { - return []Task{}, err - } - return tasks, nil -} - -// 节点数 -func GetNodeCount(query interface{}) (int, error) { - s, c := database.GetCol("nodes") - defer s.Close() - - count, err := c.Find(query).Count() - if err != nil { - return 0, err - } - - return count, nil -} - -// 根据redis的key值,重置node节点为offline -func ResetNodeStatusToOffline(list []string) { - nodes, _ := GetNodeList(nil) - for _, node := range nodes { - hasNode := false - for _, key := range list { - if key == node.Key { - hasNode = true - break - } - } - if !hasNode || node.Status == "" { - node.Status = constants.StatusOffline - if err := node.Save(); err != nil { - log.Errorf(err.Error()) - return - } - continue - } - } -} - -func UpdateMasterNodeInfo(key string, ip string, mac string, hostname string) error { - s, c := database.GetCol("nodes") - defer s.Close() - c.UpdateAll(bson.M{ - "is_master": true, - }, bson.M{ - "is_master": false, - }) - _, err := c.Upsert(bson.M{ - "key": key, - }, bson.M{ - "$set": bson.M{ - "ip": ip, - "port": "8000", - "mac": mac, - "hostname": hostname, - "is_master": true, - "update_ts": time.Now(), - "update_ts_unix": time.Now().Unix(), - }, - "$setOnInsert": bson.M{ - "key": key, - "name": key, - "create_ts": time.Now(), - "_id": bson.NewObjectId(), - }, - }) - return err -} diff --git a/backend/model/node_test.go b/backend/model/node_test.go deleted file mode 100644 index ba3f4aaa..00000000 --- a/backend/model/node_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package model - -import ( - "crawlab/config" - "crawlab/constants" - "crawlab/database" - "github.com/apex/log" - . "github.com/smartystreets/goconvey/convey" - "runtime/debug" - "testing" -) - -func TestAddNode(t *testing.T) { - Convey("Test AddNode", t, func() { - if err := config.InitConfig("../conf/config.yml"); err != nil { - log.Error("init config error:" + err.Error()) - panic(err) - } - log.Info("初始化配置成功") - - // 初始化Mongodb数据库 - if err := database.InitMongo(); err != nil { - log.Error("init mongodb error:" + err.Error()) - debug.PrintStack() - panic(err) - } - log.Info("初始化Mongodb数据库成功") - - // 初始化Redis数据库 - if err := database.InitRedis(); err != nil { - log.Error("init redis error:" + err.Error()) - debug.PrintStack() - panic(err) - } - - var node = Node{ - Key: "c4:b3:01:bd:b5:e7", - Name: "10.27.238.101", - Ip: "10.27.238.101", - Port: "8000", - Mac: "c4:b3:01:bd:b5:e7", - Status: constants.StatusOnline, - IsMaster: true, - } - if err := node.Add(); err != nil { - log.Error("add node error:" + err.Error()) - panic(err) - } - }) -} diff --git a/backend/model/project.go b/backend/model/project.go deleted file mode 100644 index 2889d6aa..00000000 --- a/backend/model/project.go +++ /dev/null @@ -1,167 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Project struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Name string `json:"name" bson:"name"` - Description string `json:"description" bson:"description"` - Tags []string `json:"tags" bson:"tags"` - - // 前端展示 - Spiders []Spider `json:"spiders" bson:"spiders"` - Username string `json:"username" bson:"username"` - - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (p *Project) Save() error { - s, c := database.GetCol("projects") - defer s.Close() - - p.UpdateTs = time.Now() - - if err := c.UpdateId(p.Id, p); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (p *Project) Add() error { - s, c := database.GetCol("projects") - defer s.Close() - - p.Id = bson.NewObjectId() - p.UpdateTs = time.Now() - p.CreateTs = time.Now() - if err := c.Insert(p); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func (p *Project) GetSpiders() ([]Spider, error) { - s, c := database.GetCol("spiders") - defer s.Close() - - var query interface{} - if p.Id.Hex() == constants.ObjectIdNull { - query = bson.M{ - "$or": []bson.M{ - {"project_id": p.Id}, - {"project_id": bson.M{"$exists": false}}, - }, - } - } else { - query = bson.M{"project_id": p.Id} - } - - var spiders []Spider - if err := c.Find(query).All(&spiders); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return spiders, err - } - - return spiders, nil -} - -func GetProject(id bson.ObjectId) (Project, error) { - s, c := database.GetCol("projects") - defer s.Close() - var p Project - if err := c.Find(bson.M{"_id": id}).One(&p); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return p, err - } - return p, nil -} - -func GetProjectList(filter interface{}, sortKey string) ([]Project, error) { - s, c := database.GetCol("projects") - defer s.Close() - - var projects []Project - if err := c.Find(filter).Sort(sortKey).All(&projects); err != nil { - debug.PrintStack() - return projects, err - } - - for i, p := range projects { - // 获取用户名称 - user, _ := GetUser(p.UserId) - projects[i].Username = user.Username - } - return projects, nil -} - -func GetProjectListTotal(filter interface{}) (int, error) { - s, c := database.GetCol("projects") - defer s.Close() - - var result int - result, err := c.Find(filter).Count() - if err != nil { - return result, err - } - return result, nil -} - -func UpdateProject(id bson.ObjectId, item Project) error { - s, c := database.GetCol("projects") - defer s.Close() - - var result Project - if err := c.FindId(id).One(&result); err != nil { - debug.PrintStack() - return err - } - - if err := item.Save(); err != nil { - return err - } - return nil -} - -func RemoveProject(id bson.ObjectId) error { - s, c := database.GetCol("projects") - defer s.Close() - - var result User - if err := c.FindId(id).One(&result); err != nil { - return err - } - - if err := c.RemoveId(id); err != nil { - return err - } - - return nil -} - -func GetProjectCount(filter interface{}) (int, error) { - s, c := database.GetCol("projects") - defer s.Close() - - count, err := c.Find(filter).Count() - if err != nil { - return 0, err - } - - return count, nil -} - diff --git a/backend/model/schedule.go b/backend/model/schedule.go deleted file mode 100644 index 5d0da286..00000000 --- a/backend/model/schedule.go +++ /dev/null @@ -1,177 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/lib/cron" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Schedule struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Name string `json:"name" bson:"name"` - Description string `json:"description" bson:"description"` - SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` - Cron string `json:"cron" bson:"cron"` - EntryId cron.EntryID `json:"entry_id" bson:"entry_id"` - Param string `json:"param" bson:"param"` - RunType string `json:"run_type" bson:"run_type"` - NodeIds []bson.ObjectId `json:"node_ids" bson:"node_ids"` - Status string `json:"status" bson:"status"` - Enabled bool `json:"enabled" bson:"enabled"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - ScrapySpider string `json:"scrapy_spider" bson:"scrapy_spider"` - ScrapyLogLevel string `json:"scrapy_log_level" bson:"scrapy_log_level"` - - // 前端展示 - SpiderName string `json:"spider_name" bson:"spider_name"` - Username string `json:"user_name" bson:"user_name"` - Nodes []Node `json:"nodes" bson:"nodes"` - Message string `json:"message" bson:"message"` - - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (sch *Schedule) Save() error { - s, c := database.GetCol("schedules") - defer s.Close() - sch.UpdateTs = time.Now() - if err := c.UpdateId(sch.Id, sch); err != nil { - return err - } - return nil -} - -func (sch *Schedule) Delete() error { - s, c := database.GetCol("schedules") - defer s.Close() - return c.RemoveId(sch.Id) -} - -func GetScheduleList(filter interface{}) ([]Schedule, error) { - s, c := database.GetCol("schedules") - defer s.Close() - - var schedules []Schedule - if err := c.Find(filter).All(&schedules); err != nil { - return schedules, err - } - - var schs []Schedule - for _, schedule := range schedules { - // 获取节点名称 - schedule.Nodes = []Node{} - if schedule.RunType == constants.RunTypeSelectedNodes { - for _, nodeId := range schedule.NodeIds { - // 选择单一节点 - node, err := GetNode(nodeId) - if err != nil { - continue - } - schedule.Nodes = append(schedule.Nodes, node) - } - } - - // 获取爬虫名称 - spider, err := GetSpider(schedule.SpiderId) - if err != nil { - log.Errorf("get spider by id: %s, error: %s", schedule.SpiderId.Hex(), err.Error()) - schedule.Status = constants.ScheduleStatusError - if err == mgo.ErrNotFound { - schedule.Message = constants.ScheduleStatusErrorNotFoundSpider - } else { - schedule.Message = err.Error() - } - } else { - schedule.SpiderName = spider.Name - } - - // 获取用户名称 - user, _ := GetUser(schedule.UserId) - schedule.Username = user.Username - - schs = append(schs, schedule) - } - return schs, nil -} - -func GetSchedule(id bson.ObjectId) (Schedule, error) { - s, c := database.GetCol("schedules") - defer s.Close() - - var schedule Schedule - if err := c.FindId(id).One(&schedule); err != nil { - return schedule, err - } - - // 获取用户名称 - user, _ := GetUser(schedule.UserId) - schedule.Username = user.Username - - return schedule, nil -} - -func UpdateSchedule(id bson.ObjectId, item Schedule) error { - s, c := database.GetCol("schedules") - defer s.Close() - - var result Schedule - if err := c.FindId(id).One(&result); err != nil { - return err - } - - item.UpdateTs = time.Now() - if err := item.Save(); err != nil { - return err - } - return nil -} - -func AddSchedule(item Schedule) error { - s, c := database.GetCol("schedules") - defer s.Close() - - item.Id = bson.NewObjectId() - item.CreateTs = time.Now() - item.UpdateTs = time.Now() - - if err := c.Insert(&item); err != nil { - debug.PrintStack() - log.Errorf(err.Error()) - return err - } - return nil -} - -func RemoveSchedule(id bson.ObjectId) error { - s, c := database.GetCol("schedules") - defer s.Close() - - var result Schedule - if err := c.FindId(id).One(&result); err != nil { - return err - } - - if err := c.RemoveId(id); err != nil { - return err - } - - return nil -} - -func GetScheduleCount(filter interface{}) (int, error) { - s, c := database.GetCol("schedules") - defer s.Close() - - count, err := c.Find(filter).Count() - if err != nil { - return 0, err - } - - return count, nil -} diff --git a/backend/model/setting.go b/backend/model/setting.go deleted file mode 100644 index 3b546f68..00000000 --- a/backend/model/setting.go +++ /dev/null @@ -1,45 +0,0 @@ -package model - -import ( - "crawlab/database" - "github.com/globalsign/mgo/bson" - "time" -) - -type Setting struct { - Keyword string - Document bson.Raw -} - -func GetRawSetting(keyword string, pointer interface{}) error { - s, col := database.GetCol("settings") - defer s.Close() - var setting Setting - err := col.Find(bson.M{"keyword": keyword}).One(&setting) - if err != nil { - return err - } - return setting.Document.Unmarshal(pointer) -} - -type DocumentMeta struct { - DocumentVersion int - DocStructVersion int - UpdateTime time.Time - CreateTime time.Time - DeleteTime time.Time -} - -//demo -type SecuritySetting struct { - EnableRegister bool - EnableInvitation bool - DocumentMeta `bson:"inline" json:"inline"` -} - -func GetSecuritySetting() (SecuritySetting, error) { - var app SecuritySetting - err := GetRawSetting("security", &app) - return app, err - -} diff --git a/backend/model/spider.go b/backend/model/spider.go deleted file mode 100644 index d6ff72d4..00000000 --- a/backend/model/spider.go +++ /dev/null @@ -1,414 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/utils" - "errors" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "gopkg.in/yaml.v2" - "io/ioutil" - "path/filepath" - "runtime/debug" - "time" -) - -type Env struct { - Name string `json:"name" bson:"name"` - Value string `json:"value" bson:"value"` -} - -type Spider struct { - Id bson.ObjectId `json:"_id" bson:"_id"` // 爬虫ID - Name string `json:"name" bson:"name"` // 爬虫名称(唯一) - DisplayName string `json:"display_name" bson:"display_name"` // 爬虫显示名称 - Type string `json:"type" bson:"type"` // 爬虫类别 - FileId bson.ObjectId `json:"file_id" bson:"file_id"` // GridFS文件ID - Col string `json:"col" bson:"col"` // 结果储存位置 - Site string `json:"site" bson:"site"` // 爬虫网站 - Envs []Env `json:"envs" bson:"envs"` // 环境变量 - Remark string `json:"remark" bson:"remark"` // 备注 - Src string `json:"src" bson:"src"` // 源码位置 - ProjectId bson.ObjectId `json:"project_id" bson:"project_id"` // 项目ID - IsPublic bool `json:"is_public" bson:"is_public"` // 是否公开 - - // 自定义爬虫 - Cmd string `json:"cmd" bson:"cmd"` // 执行命令 - - // Scrapy 爬虫(属于自定义爬虫) - IsScrapy bool `json:"is_scrapy" bson:"is_scrapy"` // 是否为 Scrapy 爬虫 - SpiderNames []string `json:"spider_names" bson:"spider_names"` // 爬虫名称列表 - - // 可配置爬虫 - Template string `json:"template" bson:"template"` // Spiderfile模版 - - // Git 设置 - IsGit bool `json:"is_git" bson:"is_git"` // 是否为 Git - GitUrl string `json:"git_url" bson:"git_url"` // Git URL - GitBranch string `json:"git_branch" bson:"git_branch"` // Git 分支 - GitHasCredential bool `json:"git_has_credential" bson:"git_has_credential"` // Git 是否加密 - GitUsername string `json:"git_username" bson:"git_username"` // Git 用户名 - GitPassword string `json:"git_password" bson:"git_password"` // Git 密码 - GitAutoSync bool `json:"git_auto_sync" bson:"git_auto_sync"` // Git 是否自动同步 - GitSyncFrequency string `json:"git_sync_frequency" bson:"git_sync_frequency"` // Git 同步频率 - GitSyncError string `json:"git_sync_error" bson:"git_sync_error"` // Git 同步错误 - - // 长任务 - IsLongTask bool `json:"is_long_task" bson:"is_long_task"` // 是否为长任务 - - // 去重 - IsDedup bool `json:"is_dedup" bson:"is_dedup"` // 是否去重 - DedupField string `json:"dedup_field" bson:"dedup_field"` // 去重字段 - DedupMethod string `json:"dedup_method" bson:"dedup_method"` // 去重方式 - - // Web Hook - IsWebHook bool `json:"is_web_hook" bson:"is_web_hook"` // 是否开启 Web Hook - WebHookUrl string `json:"web_hook_url" bson:"web_hook_url"` // Web Hook URL - - // 前端展示 - LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 - LastStatus string `json:"last_status"` // 最后执行状态 - Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置 - LatestTasks []Task `json:"latest_tasks"` // 最近任务列表 - Username string `json:"username"` // 用户名称 - ProjectName string `json:"project_name"` // 项目名称 - - // 时间 - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -// 更新爬虫 -func (spider *Spider) Save() error { - s, c := database.GetCol("spiders") - defer s.Close() - - spider.UpdateTs = time.Now() - - // 兼容没有项目ID的爬虫 - if spider.ProjectId.Hex() == "" { - spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - if err := c.UpdateId(spider.Id, spider); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - return nil -} - -// 新增爬虫 -func (spider *Spider) Add() error { - s, c := database.GetCol("spiders") - defer s.Close() - - spider.Id = bson.NewObjectId() - spider.CreateTs = time.Now() - spider.UpdateTs = time.Now() - - if !spider.ProjectId.Valid() { - spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - if err := c.Insert(&spider); err != nil { - return err - } - return nil -} - -// 获取爬虫的任务 -func (spider *Spider) GetTasks() ([]Task, error) { - tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 10, "-create_ts") - if err != nil { - return tasks, err - } - return tasks, nil -} - -// 爬虫最新的任务 -func (spider *Spider) GetLastTask() (Task, error) { - tasks, err := GetTaskList(bson.M{"spider_id": spider.Id}, 0, 1, "-create_ts") - if err != nil { - return Task{}, err - } - if tasks == nil { - return Task{}, nil - } - return tasks[0], nil -} - -// 爬虫正在运行的任务 -func (spider *Spider) GetLatestTasks(latestN int) (tasks []Task, err error) { - tasks, err = GetTaskList(bson.M{"spider_id": spider.Id}, 0, latestN, "-create_ts") - if err != nil { - return tasks, err - } - if tasks == nil { - return tasks, err - } - return tasks, nil -} - -// 删除爬虫 -func (spider *Spider) Delete() error { - s, c := database.GetCol("spiders") - defer s.Close() - return c.RemoveId(spider.Id) -} - -// 获取爬虫列表 -func GetSpiderList(filter interface{}, skip int, limit int, sortStr string) ([]Spider, int, error) { - s, c := database.GetCol("spiders") - defer s.Close() - - // 获取爬虫列表 - var spiders []Spider - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortStr).All(&spiders); err != nil { - debug.PrintStack() - return spiders, 0, err - } - - if spiders == nil { - spiders = []Spider{} - } - - // 遍历爬虫列表 - for i, spider := range spiders { - // 获取最后一次任务 - task, err := spider.GetLastTask() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - - // 获取正在运行的爬虫 - latestTasks, err := spider.GetLatestTasks(50) // TODO: latestN 暂时写死,后面加入数据库 - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - - // 获取用户 - var user User - if spider.UserId.Valid() && spider.UserId.Hex() != constants.ObjectIdNull { - user, err = GetUser(spider.UserId) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - } - - // 获取项目 - var project Project - if spider.ProjectId.Valid() && spider.ProjectId.Hex() != constants.ObjectIdNull { - project, err = GetProject(spider.ProjectId) - if err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - } - } - } - - // 赋值 - spiders[i].LastRunTs = task.CreateTs - spiders[i].LastStatus = task.Status - spiders[i].LatestTasks = latestTasks - spiders[i].Username = user.Username - spiders[i].ProjectName = project.Name - } - - count, _ := c.Find(filter).Count() - - return spiders, count, nil -} - -// 获取所有爬虫列表 -func GetSpiderAllList(filter interface{}) (spiders []Spider, err error) { - spiders, _, err = GetSpiderList(filter, 0, constants.Infinite, "_id") - if err != nil { - return spiders, err - } - return spiders, nil -} - -// 获取爬虫(根据FileId) -func GetSpiderByFileId(fileId bson.ObjectId) *Spider { - s, c := database.GetCol("spiders") - defer s.Close() - - var result *Spider - if err := c.Find(bson.M{"file_id": fileId}).One(&result); err != nil { - log.Errorf("get spider error: %s, file_id: %s", err.Error(), fileId.Hex()) - debug.PrintStack() - return nil - } - return result -} - -// 获取爬虫(根据名称) -func GetSpiderByName(name string) Spider { - s, c := database.GetCol("spiders") - defer s.Close() - - var spider Spider - if err := c.Find(bson.M{"name": name}).One(&spider); err != nil && err != mgo.ErrNotFound { - log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) - //debug.PrintStack() - return spider - } - - // 获取用户 - var user User - if spider.UserId.Valid() { - user, _ = GetUser(spider.UserId) - } - spider.Username = user.Username - - return spider -} - -// 获取爬虫(根据ID) -func GetSpider(id bson.ObjectId) (Spider, error) { - s, c := database.GetCol("spiders") - defer s.Close() - - // 获取爬虫 - var spider Spider - if err := c.FindId(id).One(&spider); err != nil { - if err != mgo.ErrNotFound { - log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex()) - debug.PrintStack() - } - return spider, err - } - - // 如果为可配置爬虫,获取爬虫配置 - if spider.Type == constants.Configurable && utils.Exists(filepath.Join(spider.Src, "Spiderfile")) { - config, err := GetConfigSpiderData(spider) - if err != nil { - return spider, err - } - spider.Config = config - } - - // 获取用户名称 - var user User - if spider.UserId.Valid() { - user, _ = GetUser(spider.UserId) - } - spider.Username = user.Username - - return spider, nil -} - -// 更新爬虫 -func UpdateSpider(id bson.ObjectId, item Spider) error { - s, c := database.GetCol("spiders") - defer s.Close() - - var result Spider - if err := c.FindId(id).One(&result); err != nil { - debug.PrintStack() - return err - } - - if err := item.Save(); err != nil { - return err - } - return nil -} - -// 删除爬虫 -func RemoveSpider(id bson.ObjectId) error { - s, c := database.GetCol("spiders") - defer s.Close() - - var result Spider - if err := c.FindId(id).One(&result); err != nil { - log.Errorf("find spider error: %s, id:%s", err.Error(), id.Hex()) - debug.PrintStack() - return err - } - - if err := c.RemoveId(id); err != nil { - log.Errorf("remove spider error: %s, id:%s", err.Error(), id.Hex()) - debug.PrintStack() - return err - } - - // gf上的文件 - s, gf := database.GetGridFs("files") - defer s.Close() - if result.FileId.Hex() != constants.ObjectIdNull { - if err := gf.RemoveId(result.FileId); err != nil { - log.Error("remove file error, id:" + result.FileId.Hex()) - debug.PrintStack() - } - } - - return nil -} - -// 删除所有爬虫 -func RemoveAllSpider() error { - s, c := database.GetCol("spiders") - defer s.Close() - - var spiders []Spider - err := c.Find(nil).All(&spiders) - if err != nil { - log.Error("get all spiders error:" + err.Error()) - return err - } - for _, spider := range spiders { - if err := RemoveSpider(spider.Id); err != nil { - log.Error("remove spider error:" + err.Error()) - } - } - return nil -} - -// 获取爬虫总数 -func GetSpiderCount(filter interface{}) (int, error) { - s, c := database.GetCol("spiders") - defer s.Close() - - count, err := c.Find(filter).Count() - if err != nil { - return 0, err - } - return count, nil -} - -// 获取爬虫定时任务 -func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) { - // 构造配置数据 - configData := entity.ConfigSpiderData{} - - // 校验爬虫类别 - if spider.Type != constants.Configurable { - return configData, errors.New("not a configurable spider") - } - - // Spiderfile 目录 - sfPath := filepath.Join(spider.Src, "Spiderfile") - - // 读取YAML文件 - yamlFile, err := ioutil.ReadFile(sfPath) - if err != nil { - return configData, err - } - - // 反序列化 - if err := yaml.Unmarshal(yamlFile, &configData); err != nil { - return configData, err - } - - return configData, nil -} diff --git a/backend/model/system.go b/backend/model/system.go deleted file mode 100644 index 5c2f5997..00000000 --- a/backend/model/system.go +++ /dev/null @@ -1,98 +0,0 @@ -package model - -import ( - "crawlab/entity" - "github.com/apex/log" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "runtime/debug" - "strings" -) - -var executableNameMap = map[string]string{ - // python - "python": "Python", - "python2": "Python 2", - "python2.7": "Python 2.7", - "python3": "Python 3", - "python3.5": "Python 3.5", - "python3.6": "Python 3.6", - "python3.7": "Python 3.7", - "python3.8": "Python 3.8", - // java - "java": "Java", - // go - "go": "Go", - // node - "node": "NodeJS", - // php - "php": "PHP", - // windows command - "cmd": "Windows Command Prompt", - // linux shell - "sh": "Shell", - "bash": "bash", -} - -func GetLocalSystemInfo() (sysInfo entity.SystemInfo, err error) { - executables, err := GetExecutables() - if err != nil { - return sysInfo, err - } - hostname, err := os.Hostname() - if err != nil { - debug.PrintStack() - return sysInfo, err - } - - return entity.SystemInfo{ - ARCH: runtime.GOARCH, - OS: runtime.GOOS, - NumCpu: runtime.GOMAXPROCS(0), - Hostname: hostname, - Executables: executables, - }, nil -} - -func GetSystemEnv(key string) string { - return os.Getenv(key) -} - -func GetPathValues() (paths []string) { - pathEnv := GetSystemEnv("PATH") - return strings.Split(pathEnv, ":") -} - -func GetExecutables() (executables []entity.Executable, err error) { - pathValues := GetPathValues() - - cache := map[string]string{} - - for _, path := range pathValues { - fileList, err := ioutil.ReadDir(path) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - - for _, file := range fileList { - displayName := executableNameMap[file.Name()] - filePath := filepath.Join(path, file.Name()) - - if cache[filePath] == "" { - if displayName != "" { - executables = append(executables, entity.Executable{ - Path: filePath, - FileName: file.Name(), - DisplayName: displayName, - }) - } - cache[filePath] = filePath - } - } - } - return executables, nil -} diff --git a/backend/model/task.go b/backend/model/task.go deleted file mode 100644 index 789bfafb..00000000 --- a/backend/model/task.go +++ /dev/null @@ -1,534 +0,0 @@ -package model - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/utils" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Task struct { - Id string `json:"_id" bson:"_id"` - SpiderId bson.ObjectId `json:"spider_id" bson:"spider_id"` - StartTs time.Time `json:"start_ts" bson:"start_ts"` - FinishTs time.Time `json:"finish_ts" bson:"finish_ts"` - Status string `json:"status" bson:"status"` - NodeId bson.ObjectId `json:"node_id" bson:"node_id"` - LogPath string `json:"log_path" bson:"log_path"` - Cmd string `json:"cmd" bson:"cmd"` - Param string `json:"param" bson:"param"` - Error string `json:"error" bson:"error"` - ResultCount int `json:"result_count" bson:"result_count"` - ErrorLogCount int `json:"error_log_count" bson:"error_log_count"` - WaitDuration float64 `json:"wait_duration" bson:"wait_duration"` - RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` - TotalDuration float64 `json:"total_duration" bson:"total_duration"` - Pid int `json:"pid" bson:"pid"` - RunType string `json:"run_type" bson:"run_type"` - ScheduleId bson.ObjectId `json:"schedule_id" bson:"schedule_id"` - Type string `json:"type" bson:"type"` - - // 前端数据 - SpiderName string `json:"spider_name"` - NodeName string `json:"node_name"` - Username string `json:"username"` - NodeIds []string `json:"node_ids"` - - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -type TaskDailyItem struct { - Date string `json:"date" bson:"_id"` - TaskCount int `json:"task_count" bson:"task_count"` - AvgRuntimeDuration float64 `json:"avg_runtime_duration" bson:"avg_runtime_duration"` -} - -func (t *Task) GetSpider() (Spider, error) { - spider, err := GetSpider(t.SpiderId) - if err != nil { - return spider, err - } - return spider, nil -} - -func (t *Task) GetNode() (Node, error) { - node, err := GetNode(t.NodeId) - if err != nil { - return node, err - } - return node, nil -} - -func (t *Task) Save() error { - s, c := database.GetCol("tasks") - defer s.Close() - t.UpdateTs = time.Now() - if err := c.UpdateId(t.Id, t); err != nil { - log.Errorf("update task error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func (t *Task) Delete() error { - s, c := database.GetCol("tasks") - defer s.Close() - if err := c.RemoveId(t.Id); err != nil { - return err - } - return nil -} - -func (t *Task) GetResults(pageNum int, pageSize int) (results []interface{}, total int, err error) { - spider, err := t.GetSpider() - if err != nil { - log.Errorf(err.Error()) - return - } - - col := utils.GetSpiderCol(spider.Col, spider.Name) - - s, c := database.GetCol(col) - defer s.Close() - - query := bson.M{ - "task_id": t.Id, - } - if err = c.Find(query).Skip((pageNum - 1) * pageSize).Limit(pageSize).All(&results); err != nil { - return - } - - if total, err = c.Find(query).Count(); err != nil { - return - } - - return -} - -func (t *Task) GetLogItems(keyword string, page int, pageSize int) (logItems []LogItem, logTotal int, err error) { - query := bson.M{ - "task_id": t.Id, - } - - logTotal, err = GetLogItemTotal(query, keyword) - if err != nil { - return logItems, logTotal, err - } - - logItems, err = GetLogItemList(query, keyword, (page-1)*pageSize, pageSize, "+_id") - if err != nil { - return logItems, logTotal, err - } - - return logItems, logTotal, nil -} - -func (t *Task) GetErrorLogItems(n int) (errLogItems []ErrorLogItem, err error) { - s, c := database.GetCol("error_logs") - defer s.Close() - - query := bson.M{ - "task_id": t.Id, - } - - if err := c.Find(query).Limit(n).All(&errLogItems); err != nil { - log.Errorf("find error logs error: " + err.Error()) - debug.PrintStack() - return errLogItems, err - } - - return errLogItems, nil -} - -func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Task, error) { - s, c := database.GetCol("tasks") - defer s.Close() - - var tasks []Task - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&tasks); err != nil { - debug.PrintStack() - return tasks, err - } - - for i, task := range tasks { - // 获取爬虫名称 - if spider, err := task.GetSpider(); err == nil { - tasks[i].SpiderName = spider.DisplayName - } - - // 获取节点名称 - if node, err := task.GetNode(); err == nil { - tasks[i].NodeName = node.Name - } - - // 获取用户名称 - user, _ := GetUser(task.UserId) - task.Username = user.Username - } - return tasks, nil -} - -func GetTaskListTotal(filter interface{}) (int, error) { - s, c := database.GetCol("tasks") - defer s.Close() - - var result int - result, err := c.Find(filter).Count() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return result, err - } - return result, nil -} - -func GetTask(id string) (Task, error) { - s, c := database.GetCol("tasks") - defer s.Close() - - var task Task - if err := c.FindId(id).One(&task); err != nil { - log.Infof("get task error: %s, id: %s", err.Error(), id) - debug.PrintStack() - return task, err - } - - // 获取用户名称 - user, _ := GetUser(task.UserId) - task.Username = user.Username - - return task, nil -} - -func AddTask(item Task) error { - s, c := database.GetCol("tasks") - defer s.Close() - - item.CreateTs = time.Now() - item.UpdateTs = time.Now() - - if err := c.Insert(&item); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func RemoveTask(id string) error { - s, c := database.GetCol("tasks") - defer s.Close() - - var result Task - if err := c.FindId(id).One(&result); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - if err := c.RemoveId(id); err != nil { - return err - } - - return nil -} - -func RemoveTaskByStatus(status string) error { - tasks, err := GetTaskList(bson.M{"status": status}, 0, constants.Infinite, "-create_ts") - if err != nil { - log.Error("get tasks error:" + err.Error()) - } - for _, task := range tasks { - if err := RemoveTask(task.Id); err != nil { - log.Error("remove task error:" + err.Error()) - continue - } - } - return nil -} - -// 删除task by spider_id -func RemoveTaskBySpiderId(id bson.ObjectId) error { - tasks, err := GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") - if err != nil { - log.Error("get tasks error:" + err.Error()) - } - - for _, task := range tasks { - if err := RemoveTask(task.Id); err != nil { - log.Error("remove task error:" + err.Error()) - continue - } - } - return nil -} - -// task 总数 -func GetTaskCount(query interface{}) (int, error) { - s, c := database.GetCol("tasks") - defer s.Close() - - count, err := c.Find(query).Count() - if err != nil { - return 0, err - } - - return count, nil -} - -func GetDailyTaskStats(query bson.M) ([]TaskDailyItem, error) { - s, c := database.GetCol("tasks") - defer s.Close() - - // 起始日期 - startDate := time.Now().Add(-30 * 24 * time.Hour) - endDate := time.Now() - - // query - query["create_ts"] = bson.M{ - "$gte": startDate, - "$lt": endDate, - } - - // match - op1 := bson.M{ - "$match": query, - } - - // project - op2 := bson.M{ - "$project": bson.M{ - "date": bson.M{ - "$dateToString": bson.M{ - "format": "%Y%m%d", - "date": "$create_ts", - "timezone": "Asia/Shanghai", - }, - }, - "success_count": bson.M{ - "$cond": []interface{}{ - bson.M{ - "$eq": []string{ - "$status", - constants.StatusFinished, - }, - }, - 1, - 0, - }, - }, - "runtime_duration": "$runtime_duration", - }, - } - - // group - op3 := bson.M{ - "$group": bson.M{ - "_id": "$date", - "task_count": bson.M{"$sum": 1}, - "runtime_duration": bson.M{"$sum": "$runtime_duration"}, - }, - } - - op4 := bson.M{ - "$project": bson.M{ - "task_count": "$task_count", - "date": "$date", - "avg_runtime_duration": bson.M{ - "$divide": []string{"$runtime_duration", "$task_count"}, - }, - }, - } - - // run aggregation - var items []TaskDailyItem - if err := c.Pipe([]bson.M{op1, op2, op3, op4}).All(&items); err != nil { - return items, err - } - - // 缓存每日数据 - dict := make(map[string]TaskDailyItem) - for _, item := range items { - dict[item.Date] = item - } - - // 遍历日期 - var dailyItems []TaskDailyItem - for date := startDate; endDate.Sub(date) > 0; date = date.Add(24 * time.Hour) { - dateStr := date.Format("20060102") - dailyItems = append(dailyItems, TaskDailyItem{ - Date: dateStr, - TaskCount: dict[dateStr].TaskCount, - AvgRuntimeDuration: dict[dateStr].AvgRuntimeDuration, - }) - } - - return dailyItems, nil -} - -// 更新task的结果数 -func UpdateTaskResultCount(id string) (err error) { - // 获取任务 - task, err := GetTask(id) - if err != nil { - log.Errorf(err.Error()) - return err - } - - // 获取爬虫 - spider, err := GetSpider(task.SpiderId) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // default results collection - col := utils.GetSpiderCol(spider.Col, spider.Name) - - // 获取结果数量 - s, c := database.GetCol(col) - defer s.Close() - resultCount, err := c.Find(bson.M{"task_id": task.Id}).Count() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 保存结果数量 - task.ResultCount = resultCount - if err := task.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - return nil -} - -// update error log count -func UpdateErrorLogCount(id string) (err error) { - s, c := database.GetCol("error_logs") - defer s.Close() - - query := bson.M{ - "task_id": id, - } - count, err := c.Find(query).Count() - if err != nil { - log.Errorf("update error log count error: " + err.Error()) - debug.PrintStack() - return err - } - - st, ct := database.GetCol("tasks") - defer st.Close() - - task, err := GetTask(id) - if err != nil { - log.Errorf(err.Error()) - return err - } - task.ErrorLogCount = count - - if err := ct.UpdateId(id, task); err != nil { - log.Errorf("update error log count error: " + err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -// convert all running tasks to abnormal tasks -func UpdateTaskToAbnormal(nodeId bson.ObjectId) error { - s, c := database.GetCol("tasks") - defer s.Close() - - selector := bson.M{ - "node_id": nodeId, - "status": bson.M{ - "$in": []string{ - constants.StatusPending, - constants.StatusRunning, - }, - }, - } - update := bson.M{ - "$set": bson.M{ - "status": constants.StatusAbnormal, - }, - } - _, err := c.UpdateAll(selector, update) - if err != nil { - log.Errorf("update task to abnormal error: %s, node_id : %s", err.Error(), nodeId.Hex()) - debug.PrintStack() - return err - } - return nil -} - -// update task error logs -func UpdateTaskErrorLogs(taskId string, errorRegexPattern string) error { - s, c := database.GetCol("logs") - defer s.Close() - - if errorRegexPattern == "" { - errorRegexPattern = constants.ErrorRegexPattern - } - - query := bson.M{ - "task_id": taskId, - "msg": bson.M{ - "$regex": bson.RegEx{ - Pattern: errorRegexPattern, - Options: "i", - }, - }, - } - var logs []LogItem - if err := c.Find(query).All(&logs); err != nil { - log.Errorf("find error logs error: " + err.Error()) - debug.PrintStack() - return err - } - - for _, l := range logs { - e := ErrorLogItem{ - Id: bson.NewObjectId(), - TaskId: l.TaskId, - Message: l.Message, - LogId: l.Id, - Seq: l.Seq, - Ts: time.Now(), - } - if err := AddErrorLogItem(e); err != nil { - return err - } - } - - return nil -} - -func GetTaskByFilter(filter bson.M) (t Task, err error) { - s, c := database.GetCol("tasks") - defer s.Close() - - if err := c.Find(filter).One(&t); err != nil { - if err != mgo.ErrNotFound { - log.Errorf("find task by filter error: " + err.Error()) - debug.PrintStack() - return t, err - } - return t, err - } - - return t, nil -} diff --git a/backend/model/token.go b/backend/model/token.go deleted file mode 100644 index b5763866..00000000 --- a/backend/model/token.go +++ /dev/null @@ -1,80 +0,0 @@ -package model - -import ( - "crawlab/database" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Token struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Token string `json:"token" bson:"token"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -func (t *Token) Add() error { - s, c := database.GetCol("tokens") - defer s.Close() - - if err := c.Insert(t); err != nil { - log.Errorf("insert token error: " + err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func (t *Token) Delete() error { - s, c := database.GetCol("tokens") - defer s.Close() - - if err := c.RemoveId(t.Id); err != nil { - log.Errorf("insert token error: " + err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func GetTokenById(id bson.ObjectId) (t Token, err error) { - s, c := database.GetCol("tokens") - defer s.Close() - - if err = c.FindId(id).One(&t); err != nil { - return t, err - } - - return t, nil -} - -func GetTokensByUserId(uid bson.ObjectId) (tokens []Token, err error) { - s, c := database.GetCol("tokens") - defer s.Close() - - if err = c.Find(bson.M{"user_id": uid}).All(&tokens); err != nil { - log.Errorf("find tokens error: " + err.Error()) - debug.PrintStack() - return tokens, err - } - - return tokens, nil -} - -func DeleteTokenById(id bson.ObjectId) error { - t, err := GetTokenById(id) - if err != nil { - return err - } - - if err := t.Delete(); err != nil { - return err - } - - return nil -} diff --git a/backend/model/user.go b/backend/model/user.go deleted file mode 100644 index feb801cb..00000000 --- a/backend/model/user.go +++ /dev/null @@ -1,166 +0,0 @@ -package model - -import ( - "crawlab/database" - "crawlab/utils" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/pkg/errors" - "runtime/debug" - "time" -) - -type User struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Username string `json:"username" bson:"username"` - Password string `json:"password" bson:"password"` - Role string `json:"role" bson:"role"` - Email string `json:"email" bson:"email"` - Setting UserSetting `json:"setting" bson:"setting"` - - UserId bson.ObjectId `json:"user_id" bson:"user_id"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` -} - -type UserSetting struct { - NotificationTrigger string `json:"notification_trigger" bson:"notification_trigger"` - DingTalkRobotWebhook string `json:"ding_talk_robot_webhook" bson:"ding_talk_robot_webhook"` - WechatRobotWebhook string `json:"wechat_robot_webhook" bson:"wechat_robot_webhook"` - EnabledNotifications []string `json:"enabled_notifications" bson:"enabled_notifications"` - ErrorRegexPattern string `json:"error_regex_pattern" bson:"error_regex_pattern"` - MaxErrorLog int `json:"max_error_log" bson:"max_error_log"` - LogExpireDuration int64 `json:"log_expire_duration" bson:"log_expire_duration"` -} - -func (user *User) Save() error { - s, c := database.GetCol("users") - defer s.Close() - - user.UpdateTs = time.Now() - - if err := c.UpdateId(user.Id, user); err != nil { - debug.PrintStack() - return err - } - return nil -} - -func (user *User) Add() error { - s, c := database.GetCol("users") - defer s.Close() - - // 如果存在用户名相同的用户,抛错 - user2, err := GetUserByUsername(user.Username) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - if user2.Username == user.Username { - return errors.New("username already exists") - } - - user.Id = bson.NewObjectId() - user.UpdateTs = time.Now() - user.CreateTs = time.Now() - if err := c.Insert(user); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func GetUser(id bson.ObjectId) (User, error) { - s, c := database.GetCol("users") - defer s.Close() - var user User - if err := c.Find(bson.M{"_id": id}).One(&user); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return user, err - } - return user, nil -} - -func GetUserByUsername(username string) (User, error) { - s, c := database.GetCol("users") - defer s.Close() - - var user User - if err := c.Find(bson.M{"username": username}).One(&user); err != nil { - if err != mgo.ErrNotFound { - log.Errorf(err.Error()) - debug.PrintStack() - return user, err - } - } - - return user, nil -} - -func GetUserList(filter interface{}, skip int, limit int, sortKey string) ([]User, error) { - s, c := database.GetCol("users") - defer s.Close() - - var users []User - if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&users); err != nil { - debug.PrintStack() - return users, err - } - return users, nil -} - -func GetUserListTotal(filter interface{}) (int, error) { - s, c := database.GetCol("users") - defer s.Close() - - var result int - result, err := c.Find(filter).Count() - if err != nil { - return result, err - } - return result, nil -} - -func UpdateUser(id bson.ObjectId, item User) error { - s, c := database.GetCol("users") - defer s.Close() - - var result User - if err := c.FindId(id).One(&result); err != nil { - debug.PrintStack() - return err - } - - if item.Password == "" { - item.Password = result.Password - } else { - item.Password = utils.EncryptPassword(item.Password) - } - - if err := item.Save(); err != nil { - return err - } - return nil -} - -func RemoveUser(id bson.ObjectId) error { - s, c := database.GetCol("users") - defer s.Close() - - var result User - if err := c.FindId(id).One(&result); err != nil { - return err - } - - if err := c.RemoveId(id); err != nil { - return err - } - - return nil -} diff --git a/backend/model/variable.go b/backend/model/variable.go deleted file mode 100644 index 3af2188e..00000000 --- a/backend/model/variable.go +++ /dev/null @@ -1,97 +0,0 @@ -package model - -import ( - "crawlab/database" - "errors" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" -) - -/** -全局变量 -*/ - -type Variable struct { - Id bson.ObjectId `json:"_id" bson:"_id"` - Key string `json:"key" bson:"key"` - Value string `json:"value" bson:"value"` - Remark string `json:"remark" bson:"remark"` -} - -func (model *Variable) Save() error { - s, c := database.GetCol("variable") - defer s.Close() - - if err := c.UpdateId(model.Id, model); err != nil { - log.Errorf("update variable error: %s", err.Error()) - return err - } - return nil -} - -func (model *Variable) Add() error { - s, c := database.GetCol("variable") - defer s.Close() - - // key 去重 - _, err := GetByKey(model.Key) - if err == nil { - return errors.New("key already exists") - } - - model.Id = bson.NewObjectId() - if err := c.Insert(model); err != nil { - log.Errorf("add variable error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func (model *Variable) Delete() error { - s, c := database.GetCol("variable") - defer s.Close() - - if err := c.RemoveId(model.Id); err != nil { - log.Errorf("remove variable error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -func GetByKey(key string) (Variable, error) { - s, c := database.GetCol("variable") - defer s.Close() - - var model Variable - if err := c.Find(bson.M{"key": key}).One(&model); err != nil { - log.Errorf("variable found error: %s, key: %s", err.Error(), key) - return model, err - } - return model, nil -} - -func GetVariable(id bson.ObjectId) (Variable, error) { - s, c := database.GetCol("variable") - defer s.Close() - - var model Variable - if err := c.FindId(id).One(&model); err != nil { - log.Errorf("variable found error: %s", err.Error()) - return model, err - } - return model, nil -} - -func GetVariableList() []Variable { - s, c := database.GetCol("variable") - defer s.Close() - - var list []Variable - if err := c.Find(nil).All(&list); err != nil { - - } - return list -} diff --git a/backend/routes/action.go b/backend/routes/action.go deleted file mode 100644 index 19144a4e..00000000 --- a/backend/routes/action.go +++ /dev/null @@ -1,118 +0,0 @@ -package routes - -import ( - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -func GetAction(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - user, err := model.GetAction(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: user, - }) -} - -func GetActionList(c *gin.Context) { - pageNum := c.GetInt("page_num") - pageSize := c.GetInt("page_size") - - users, err := model.GetActionList(nil, (pageNum-1)*pageSize, pageSize, "-create_ts") - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - total, err := model.GetActionListTotal(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: users, - Total: total, - }) -} - -func PutAction(c *gin.Context) { - // 绑定请求数据 - var action model.Action - if err := c.ShouldBindJSON(&action); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - action.UserId = services.GetCurrentUserId(c) - - if err := action.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func PostAction(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - var item model.Action - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - if err := model.UpdateAction(bson.ObjectIdHex(id), item); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func DeleteAction(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - // 从数据库中删除该爬虫 - if err := model.RemoveAction(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/base.go b/backend/routes/base.go deleted file mode 100644 index 0204b0ea..00000000 --- a/backend/routes/base.go +++ /dev/null @@ -1,24 +0,0 @@ -package routes - -type Response struct { - Status string `json:"status"` - Message string `json:"message"` - Data interface{} `json:"data"` - Error string `json:"error"` -} - -type ListResponse struct { - Status string `json:"status"` - Message string `json:"message"` - Total int `json:"total"` - Data interface{} `json:"data"` - Error string `json:"error"` -} - -type ListRequestData struct { - PageNum int `form:"page_num" json:"page_num"` - PageSize int `form:"page_size" json:"page_size"` - SortKey string `form:"sort_key" json:"sort_key"` - Status string `form:"status" json:"status"` - Keyword string `form:"keyword" json:"keyword"` -} diff --git a/backend/routes/challenge.go b/backend/routes/challenge.go deleted file mode 100644 index 1f03654a..00000000 --- a/backend/routes/challenge.go +++ /dev/null @@ -1,45 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/services" - "crawlab/services/challenge" - "github.com/gin-gonic/gin" - "net/http" -) - -func GetChallengeList(c *gin.Context) { - // 获取列表 - users, err := model.GetChallengeListWithAchieved(nil, 0, constants.Infinite, "create_ts", services.GetCurrentUserId(c)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取总数 - total, err := model.GetChallengeListTotal(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: users, - Total: total, - }) -} - -func CheckChallengeList(c *gin.Context) { - uid := services.GetCurrentUserId(c) - if err := challenge.CheckChallengeAndUpdateAll(uid); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go deleted file mode 100644 index 93b5c7f9..00000000 --- a/backend/routes/config_spider.go +++ /dev/null @@ -1,411 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "crawlab/services" - "crawlab/utils" - "fmt" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "gopkg.in/yaml.v2" - "io" - "io/ioutil" - "net/http" - "os" - "path/filepath" - "strings" -) - -// 添加可配置爬虫 - -// @Summary Put config spider -// @Description Put config spider -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param spider body model.Spider true "spider item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders [put] -func PutConfigSpider(c *gin.Context) { - var spider model.Spider - if err := c.ShouldBindJSON(&spider); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 爬虫名称不能为空 - if spider.Name == "" { - HandleErrorF(http.StatusBadRequest, c, "spider name should not be empty") - return - } - - // 模版名不能为空 - if spider.Template == "" { - HandleErrorF(http.StatusBadRequest, c, "spider template should not be empty") - return - } - - // 判断爬虫是否存在 - if spider := model.GetSpiderByName(spider.Name); spider.Name != "" { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name)) - return - } - - // 设置爬虫类别 - spider.Type = constants.Configurable - - // 将FileId置空 - spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull) - - // UserId - spider.UserId = services.GetCurrentUserId(c) - - // 创建爬虫目录 - spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) - if utils.Exists(spiderDir) { - if err := os.RemoveAll(spiderDir); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - if err := os.MkdirAll(spiderDir, 0777); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - spider.Src = spiderDir - - // 复制Spiderfile模版 - contentByte, err := ioutil.ReadFile("./template/spiderfile/Spiderfile." + spider.Template) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - f, err := os.Create(filepath.Join(spider.Src, "Spiderfile")) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - defer f.Close() - if _, err := f.Write(contentByte); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 添加爬虫到数据库 - if err := spider.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spider, - }) -} - -// 更改可配置爬虫 -func PostConfigSpider(c *gin.Context) { - PostSpider(c) -} - -// 上传可配置爬虫Spiderfile - -// @Summary Upload config spider -// @Description Upload config spider -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param spider body model.Spider true "spider item" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders/{id}/upload [post] -func UploadConfigSpider(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 获取爬虫 - var spider model.Spider - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) - return - } - - // UserId - spider.UserId = services.GetCurrentUserId(c) - - // 获取上传文件 - file, header, err := c.Request.FormFile("file") - if err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 文件名称必须为Spiderfile - filename := header.Filename - if filename != "Spiderfile" && filename != "Spiderfile.yaml" && filename != "Spiderfile.yml" { - HandleErrorF(http.StatusBadRequest, c, "filename must be 'Spiderfile(.yaml|.yml)'") - return - } - - // 爬虫目录 - spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) - - // 爬虫Spiderfile文件路径 - sfPath := filepath.Join(spiderDir, filename) - - // 创建(如果不存在)或打开Spiderfile(如果存在) - var f *os.File - if utils.Exists(sfPath) { - f, err = os.OpenFile(sfPath, os.O_WRONLY, 0777) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } else { - f, err = os.Create(sfPath) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 将上传的文件拷贝到爬虫Spiderfile文件 - _, err = io.Copy(f, file) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 关闭Spiderfile文件 - _ = f.Close() - - // 构造配置数据 - configData := entity.ConfigSpiderData{} - - // 读取YAML文件 - yamlFile, err := ioutil.ReadFile(sfPath) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 反序列化 - if err := yaml.Unmarshal(yamlFile, &configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 根据序列化后的数据处理爬虫文件 - if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post config spider -// @Description Post config spider -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders/{id}/spiderfile [post] -func PostConfigSpiderSpiderfile(c *gin.Context) { - type Body struct { - Content string `json:"content"` - } - - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 文件内容 - var reqBody Body - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - content := reqBody.Content - - // 获取爬虫 - var spider model.Spider - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) - return - } - - // UserId - if !spider.UserId.Valid() { - spider.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - // 反序列化 - var configData entity.ConfigSpiderData - if err := yaml.Unmarshal([]byte(content), &configData); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 校验configData - if err := services.ValidateSpiderfile(configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 写文件 - if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 根据序列化后的数据处理爬虫文件 - if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post config spider config -// @Description Post config spider config -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param spider body model.Spider true "spider item" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders/{id}/config [post] -func PostConfigSpiderConfig(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 获取爬虫 - var spider model.Spider - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) - return - } - - // UserId - if !spider.UserId.Valid() { - spider.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - // 反序列化配置数据 - var configData entity.ConfigSpiderData - if err := c.ShouldBindJSON(&configData); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 校验configData - if err := services.ValidateSpiderfile(configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 替换Spiderfile文件 - if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 根据序列化后的数据处理爬虫文件 - if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get config spider -// @Description Get config spider -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders/{id}/config [get] -func GetConfigSpiderConfig(c *gin.Context) { - id := c.Param("id") - - // 校验ID - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spider.Config, - }) -} - -// 获取模版名称列表 - -// @Summary Get config spider template list -// @Description Get config spider template list -// @Tags config spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /config_spiders_templates [get] -func GetConfigSpiderTemplateList(c *gin.Context) { - var data []string - for _, fInfo := range utils.ListDir("./template/spiderfile") { - templateName := strings.Replace(fInfo.Name(), "Spiderfile.", "", -1) - data = append(data, templateName) - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} diff --git a/backend/routes/doc.go b/backend/routes/doc.go deleted file mode 100644 index f38c5431..00000000 --- a/backend/routes/doc.go +++ /dev/null @@ -1,33 +0,0 @@ -package routes - -import ( - "crawlab/services" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "net/http" - "runtime/debug" -) - -// @Summary Get docs -// @Description Get docs -// @Tags docs -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /docs [get] -func GetDocs(c *gin.Context) { - type ResData struct { - String string `json:"string"` - } - data, err := services.GetDocs() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: ResData{String:data}, - }) -} diff --git a/backend/routes/file.go b/backend/routes/file.go deleted file mode 100644 index 4c9f8576..00000000 --- a/backend/routes/file.go +++ /dev/null @@ -1,29 +0,0 @@ -package routes - -import ( - "crawlab/utils" - "github.com/gin-gonic/gin" - "io/ioutil" - "net/http" -) - -// @Summary Get file -// @Description Get file -// @Tags file -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /file [get] -func GetFile(c *gin.Context) { - path := c.Query("path") - fileBytes, err := ioutil.ReadFile(path) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: utils.BytesToString(fileBytes), - }) -} diff --git a/backend/routes/git.go b/backend/routes/git.go deleted file mode 100644 index aa889be2..00000000 --- a/backend/routes/git.go +++ /dev/null @@ -1,84 +0,0 @@ -package routes - -import ( - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -func GetGitRemoteBranches(c *gin.Context) { - url := c.Query("url") - username := c.Query("username") - password := c.Query("password") - branches, err := services.GetGitRemoteBranchesPlain(url, username, password) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: branches, - }) -} - -func GetGitSshPublicKey(c *gin.Context) { - content := services.GetGitSshPublicKey() - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: content, - }) -} - -func GetGitCommits(c *gin.Context) { - spiderId := c.Query("spider_id") - if spiderId == "" || !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusInternalServerError, c, "invalid request") - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - commits, err := services.GetGitCommits(spider) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: commits, - }) -} - -func PostGitCheckout(c *gin.Context) { - type ReqBody struct { - SpiderId string `json:"spider_id"` - Hash string `json:"hash"` - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - } - if reqBody.SpiderId == "" || !bson.IsObjectIdHex(reqBody.SpiderId) { - HandleErrorF(http.StatusInternalServerError, c, "invalid request") - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(reqBody.SpiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - if err := services.GitCheckout(spider, reqBody.Hash); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/market.go b/backend/routes/market.go deleted file mode 100644 index 0db51ae5..00000000 --- a/backend/routes/market.go +++ /dev/null @@ -1 +0,0 @@ -package routes diff --git a/backend/routes/node.go b/backend/routes/node.go deleted file mode 100644 index 434e1b33..00000000 --- a/backend/routes/node.go +++ /dev/null @@ -1,198 +0,0 @@ -package routes - -import ( - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -// @Summary Get nodes -// @Description Get nodes -// @Tags node -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes [get] -func GetNodeList(c *gin.Context) { - nodes, err := model.GetNodeList(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - //for i, node := range nodes { - // nodes[i].IsMaster = services.IsMasterNode(node.Id.Hex()) - //} - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: nodes, - }) -} - -// @Summary Get node -// @Description Get node -// @Tags node -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param id path string true "id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id} [get] -func GetNode(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - result, err := model.GetNode(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: result, - }) -} - -func Ping(c *gin.Context) { - data, err := services.GetNodeData() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -// @Summary Post node -// @Description Post node -// @Tags node -// @Accept json -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param id path string true "post node" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /nodes/{id} [post] -func PostNode(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - item, err := model.GetNode(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - var newItem model.Node - if err := c.ShouldBindJSON(&newItem); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - newItem.Id = item.Id - - if err := model.UpdateNode(bson.ObjectIdHex(id), newItem); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get tasks on node -// @Description Get tasks on node -// @Tags node -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/tasks [get] -func GetNodeTaskList(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - tasks, err := model.GetNodeTaskList(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tasks, - }) -} - -// @Summary Get system info -// @Description Get system info -// @Tags node -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/system [get] -func GetSystemInfo(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - sysInfo, _ := services.GetSystemInfo(id) - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: sysInfo, - }) -} - -// @Summary Delete node -// @Description Delete node -// @Tags node -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id} [delete] -func DeleteNode(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - node, err := model.GetNode(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - err = node.Delete() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/project.go b/backend/routes/project.go deleted file mode 100644 index 7a62d31a..00000000 --- a/backend/routes/project.go +++ /dev/null @@ -1,245 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -// @Summary Get projects -// @Description Get projects -// @Tags project -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param tag query string true "projects" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /projects [get] -func GetProjectList(c *gin.Context) { - tag := c.Query("tag") - - // 筛选条件 - query := bson.M{} - if tag != "" { - query["tags"] = tag - } - - // 获取校验 - query = services.GetAuthQuery(query, c) - - // 获取列表 - projects, err := model.GetProjectList(query, "+_id") - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取总数 - total, err := model.GetProjectListTotal(query) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取每个项目的爬虫列表 - for i, p := range projects { - spiders, err := p.GetSpiders() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - projects[i].Spiders = spiders - } - - // 获取未被分配的爬虫数量 - if tag == "" { - noProject := model.Project{ - Id: bson.ObjectIdHex(constants.ObjectIdNull), - Name: "No Project", - Description: "Not assigned to any project", - } - spiders, err := noProject.GetSpiders() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - noProject.Spiders = spiders - projects = append(projects, noProject) - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: projects, - Total: total, - }) -} - -// @Summary Put project -// @Description Put project -// @Tags project -// @Accept json -// @Produce json -// @Param Authorization header string true "With the bearer started" -// @Param p body model.Project true "post project" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /projects [put] -func PutProject(c *gin.Context) { - // 绑定请求数据 - var p model.Project - if err := c.ShouldBindJSON(&p); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // UserId - p.UserId = services.GetCurrentUserId(c) - - if err := p.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post project -// @Description Post project -// @Tags project -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "project id" -// @Param item body model.Project true "project item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /projects/{id} [post] -func PostProject(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - var item model.Project - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - if err := model.UpdateProject(bson.ObjectIdHex(id), item); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Delete project -// @Description Delete project -// @Tags project -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "project id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /projects/{id} [delete] -func DeleteProject(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - // 从数据库中删除该爬虫 - if err := model.RemoveProject(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取相关的爬虫 - var spiders []model.Spider - s, col := database.GetCol("spiders") - defer s.Close() - if err := col.Find(bson.M{"project_id": bson.ObjectIdHex(id)}).All(&spiders); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 将爬虫的项目ID置空 - for _, spider := range spiders { - spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull) - if err := spider.Save(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get project tags -// @Description Get projects tags -// @Tags project -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /projects/tags [get] -func GetProjectTags(c *gin.Context) { - type Result struct { - Tag string `json:"tag" bson:"tag"` - } - - s, col := database.GetCol("projects") - defer s.Close() - - pipeline := []bson.M{ - { - "$unwind": "$tags", - }, - { - "$group": bson.M{ - "_id": "$tags", - }, - }, - { - "$sort": bson.M{ - "_id": 1, - }, - }, - { - "$addFields": bson.M{ - "tag": "$_id", - }, - }, - } - - var items []Result - if err := col.Pipe(pipeline).All(&items); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: items, - }) -} diff --git a/backend/routes/repos.go b/backend/routes/repos.go deleted file mode 100644 index 00a4e847..00000000 --- a/backend/routes/repos.go +++ /dev/null @@ -1,81 +0,0 @@ -package routes - -import ( - "crawlab/services" - "fmt" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/imroc/req" - "github.com/spf13/viper" - "net/http" - "runtime/debug" -) - -func GetRepoList(c *gin.Context) { - var data ListRequestData - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - params := req.Param{ - "page_num": data.PageNum, - "page_size": data.PageSize, - "keyword": data.Keyword, - "sort_key": data.SortKey, - } - res, err := req.Get(fmt.Sprintf("%s/public/repos", viper.GetString("repo.apiUrl")), params) - if err != nil { - log.Error("get repos error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - var resJson interface{} - if err := res.ToJSON(&resJson); err != nil { - log.Error("to json error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, resJson) -} - -func GetRepoSubDirList(c *gin.Context) { - params := req.Param{ - "full_name": c.Query("full_name"), - } - res, err := req.Get(fmt.Sprintf("%s/public/repos/sub-dir", viper.GetString("repo.apiUrl")), params) - if err != nil { - log.Error("get repo sub-dir error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - var resJson interface{} - if err := res.ToJSON(&resJson); err != nil { - log.Error("to json error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, resJson) -} - -func DownloadRepo(c *gin.Context) { - type RequestData struct { - FullName string `json:"full_name"` - } - var reqData RequestData - if err := c.ShouldBindJSON(&reqData); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - if err := services.DownloadRepo(reqData.FullName, services.GetCurrentUserId(c)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go deleted file mode 100644 index 5da735e0..00000000 --- a/backend/routes/schedule.go +++ /dev/null @@ -1,323 +0,0 @@ -package routes - -import ( - "crawlab/model" - "crawlab/services" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" - "runtime/debug" -) - -// @Summary Get schedule list -// @Description Get schedule list -// @Tags schedule -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /schedules [get] -func GetScheduleList(c *gin.Context) { - query := bson.M{} - - // 获取校验 - query = services.GetAuthQuery(query, c) - - results, err := model.GetScheduleList(query) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccessData(c, results) -} - -// @Summary Get schedule by id -// @Description Get schedule by id -// @Tags schedule -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /schedules/{id} [get] -func GetSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - result, err := model.GetSchedule(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccessData(c, result) -} - -// @Summary Post schedule -// @Description Post schedule -// @Tags schedule -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Param newItem body model.Schedule true "schedule item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /schedules/{id} [post] -func PostSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 绑定数据模型 - var newItem model.Schedule - if err := c.ShouldBindJSON(&newItem); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 验证cron表达式 - if err := services.ParserCron(newItem.Cron); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - newItem.Id = bson.ObjectIdHex(id) - // 更新数据库 - if err := model.UpdateSchedule(bson.ObjectIdHex(id), newItem); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -// @Summary Put schedule -// @Description Put schedule -// @Tags schedule -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param item body model.Schedule true "schedule item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /schedules [put] -func PutSchedule(c *gin.Context) { - var item model.Schedule - - // 绑定数据模型 - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 验证cron表达式 - if err := services.ParserCron(item.Cron); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 加入用户ID - item.UserId = services.GetCurrentUserId(c) - - // 更新数据库 - if err := model.AddSchedule(item); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -// @Summary Delete schedule -// @Description Delete schedule -// @Tags schedule -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /schedules/{id} [delete] -func DeleteSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 删除定时任务 - if err := model.RemoveSchedule(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -// 停止定时任务 -// @Summary disable schedule -// @Description disable schedule -// @Tags schedule -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /schedules/{id}/disable [post] -func DisableSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - if err := services.Sched.Disable(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -// 运行定时任务 -// @Summary enable schedule -// @Description enable schedule -// @Tags schedule -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /schedules/{id}/enable [post] -func EnableSchedule(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - if err := services.Sched.Enable(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -func PutBatchSchedules(c *gin.Context) { - var schedules []model.Schedule - if err := c.ShouldBindJSON(&schedules); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - for _, s := range schedules { - // 验证cron表达式 - if err := services.ParserCron(s.Cron); err != nil { - log.Errorf("parse cron error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 添加 UserID - s.UserId = services.GetCurrentUserId(c) - - // 默认启用 - s.Enabled = true - - // 添加定时任务 - if err := model.AddSchedule(s); err != nil { - log.Errorf("add schedule error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -func DeleteBatchSchedules(c *gin.Context) { - ids := make(map[string][]string) - if err := c.ShouldBindJSON(&ids); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - list := ids["ids"] - for _, id := range list { - if err := model.RemoveSchedule(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -func SetEnabledSchedules(c *gin.Context) { - type ReqBody struct { - ScheduleIds []bson.ObjectId `json:"schedule_ids"` - Enabled bool `json:"enabled"` - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - for _, id := range reqBody.ScheduleIds { - s, err := model.GetSchedule(id) - if err != nil { - log.Errorf("get schedule error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - s.Enabled = reqBody.Enabled - if err := s.Save(); err != nil { - log.Errorf("save schedule error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 更新定时任务 - if err := services.Sched.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} diff --git a/backend/routes/setting.go b/backend/routes/setting.go deleted file mode 100644 index 5faea750..00000000 --- a/backend/routes/setting.go +++ /dev/null @@ -1,55 +0,0 @@ -package routes - -import ( - "github.com/gin-gonic/gin" - "github.com/spf13/viper" - "net/http" -) - -type SettingBody struct { - AllowRegister string `json:"allow_register"` - EnableTutorial string `json:"enable_tutorial"` - RunOnMaster string `json:"run_on_master"` - EnableDemoSpiders string `json:"enable_demo_spiders"` -} - -// @Summary Get version -// @Description Get version -// @Tags setting -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /version [get] -func GetVersion(c *gin.Context) { - version := viper.GetString("version") - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: version, - }) -} - -// @Summary Get setting -// @Description Get setting -// @Tags setting -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /setting [get] -func GetSetting(c *gin.Context) { - body := SettingBody{ - AllowRegister: viper.GetString("setting.allowRegister"), - EnableTutorial: viper.GetString("setting.enableTutorial"), - RunOnMaster: viper.GetString("setting.runOnMaster"), - EnableDemoSpiders: viper.GetString("setting.enableDemoSpiders"), - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: body, - }) -} diff --git a/backend/routes/spider.go b/backend/routes/spider.go deleted file mode 100644 index 87ec7c26..00000000 --- a/backend/routes/spider.go +++ /dev/null @@ -1,1910 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/model" - "crawlab/services" - "crawlab/utils" - "fmt" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/pkg/errors" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "io/ioutil" - "net/http" - "os" - "path" - "path/filepath" - "runtime/debug" - "strconv" - "strings" - "time" -) - -// ======== 爬虫管理 ======== - -// @Summary Get spider list -// @Description Get spider list -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param page_num query string false "page num" -// @Param page_size query string false "page size" -// @Param keyword query string false "keyword" -// @Param project_id query string false "project_id" -// @Param type query string false "type" -// @Param sort_key query string false "sort_key" -// @Param sort_direction query string false "sort_direction" -// @Param owner_type query string false "owner_type" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /schedules [get] -func GetSpiderList(c *gin.Context) { - pageNum := c.Query("page_num") - pageSize := c.Query("page_size") - keyword := c.Query("keyword") - pid := c.Query("project_id") - t := c.Query("type") - sortKey := c.Query("sort_key") - sortDirection := c.Query("sort_direction") - ownerType := c.Query("owner_type") - - // 筛选-名称 - filter := bson.M{ - "name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}, - } - - // 筛选-类型 - if t != "" && t != "all" { - filter["type"] = t - } - - // 筛选-是否为长任务 - if t == "long-task" { - delete(filter, "type") - filter["is_long_task"] = true - } - - // 筛选-项目 - if pid == "" { - // do nothing - } else if pid == constants.ObjectIdNull { - filter["$or"] = []bson.M{ - {"project_id": bson.ObjectIdHex(pid)}, - {"project_id": bson.M{"$exists": false}}, - } - } else { - filter["project_id"] = bson.ObjectIdHex(pid) - } - - // 筛选-用户 - if ownerType == constants.OwnerTypeAll { - user := services.GetCurrentUser(c) - if user.Role == constants.RoleNormal { - filter["$or"] = []bson.M{ - {"user_id": services.GetCurrentUserId(c)}, - {"is_public": true}, - } - } - } else if ownerType == constants.OwnerTypeMe { - filter["user_id"] = services.GetCurrentUserId(c) - } else if ownerType == constants.OwnerTypePublic { - filter["is_public"] = true - } - - // 排序 - sortStr := "-_id" - if sortKey != "" && sortDirection != "" { - if sortDirection == constants.DESCENDING { - sortStr = "-" + sortKey - } else if sortDirection == constants.ASCENDING { - sortStr = "+" + sortKey - } else { - HandleErrorF(http.StatusBadRequest, c, "invalid sort_direction") - return - } - } - - // 分页 - page := &entity.Page{} - page.GetPage(pageNum, pageSize) - - results, count, err := model.GetSpiderList(filter, page.Skip, page.Limit, sortStr) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: bson.M{"list": results, "total": count}, - }) -} - -// @Summary Get spider by id -// @Description Get spider by id -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id} [get] -func GetSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spider, - }) -} - -// @Summary Post spider -// @Description Post spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Param item body model.Spider true "spider item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders/{id} [post] -func PostSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - var item model.Spider - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // UserId - if !item.UserId.Valid() { - item.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - if err := model.UpdateSpider(bson.ObjectIdHex(id), item); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新 GitCron - if err := services.GitCron.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 去重处理 - if err := services.UpdateSpiderDedup(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Publish spider -// @Description Publish spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders/{id}/publish [post] -func PublishSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - services.PublishSpider(spider) - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Put spider -// @Description Put spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param spider body model.Spider true "spider item" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders [put] -func PutSpider(c *gin.Context) { - var spider model.Spider - if err := c.ShouldBindJSON(&spider); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 爬虫名称不能为空 - if spider.Name == "" { - HandleErrorF(http.StatusBadRequest, c, "spider name should not be empty") - return - } - - // 判断爬虫是否存在 - if spider := model.GetSpiderByName(spider.Name); spider.Name != "" { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name)) - return - } - - // 设置爬虫类别 - spider.Type = constants.Customized - - // 将FileId置空 - spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull) - - // UserId - spider.UserId = services.GetCurrentUserId(c) - - // 爬虫目录 - spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) - - // 赋值到爬虫实例 - spider.Src = spiderDir - - // 移除已有爬虫目录 - if utils.Exists(spiderDir) { - if err := os.RemoveAll(spiderDir); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 生成爬虫目录 - if err := os.MkdirAll(spiderDir, 0777); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 如果为 Scrapy 项目,生成 Scrapy 项目 - if spider.IsScrapy { - if err := services.CreateScrapyProject(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 添加爬虫到数据库 - if err := spider.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新 GitCron - if err := services.GitCron.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spider, - }) -} - -// @Summary Copy spider -// @Description Copy spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "schedule id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders/{id}/copy [post] -func CopySpider(c *gin.Context) { - type ReqBody struct { - Name string `json:"name"` - } - - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 检查新爬虫名称是否存在 - // 如果存在,则返回错误 - s := model.GetSpiderByName(reqBody.Name) - if s.Name != "" { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider name '%s' already exists", reqBody.Name)) - return - } - - // 被复制爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // UserId - spider.UserId = services.GetCurrentUserId(c) - - // 复制爬虫 - if err := services.CopySpider(spider, reqBody.Name); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Upload spider -// @Description Upload spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param file formData file true "spider file to upload" -// @Param name formData string true "spider name" -// @Param display_name formData string true "display name" -// @Param col formData string true "col" -// @Param cmd formData string true "cmd" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders [post] -func UploadSpider(c *gin.Context) { - // 从body中获取文件 - uploadFile, err := c.FormFile("file") - if err != nil { - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取参数 - name := c.PostForm("name") - displayName := c.PostForm("display_name") - col := c.PostForm("col") - cmd := c.PostForm("cmd") - - // 如果不为zip文件,返回错误 - if !strings.HasSuffix(uploadFile.Filename, ".zip") { - HandleError(http.StatusBadRequest, c, errors.New("not a valid zip file")) - return - } - - // 以防tmp目录不存在 - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { - log.Error("mkdir other.tmppath dir error:" + err.Error()) - debug.PrintStack() - HandleError(http.StatusBadRequest, c, errors.New("mkdir other.tmppath dir error")) - return - } - } - - // 保存到本地临时文件 - randomId := uuid.NewV4() - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil { - log.Error("save upload file error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取 GridFS 实例 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 判断文件是否已经存在 - var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { - // 已经存在文件,则删除 - if err := gf.RemoveId(gfFile.Id); err != nil { - log.Errorf("remove grid fs error: %s", err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 上传到GridFs - fid, err := services.RetryUploadToGridFs(uploadFile.Filename, tmpFilePath) - if err != nil { - log.Errorf("upload to grid fs error: %s", err.Error()) - debug.PrintStack() - return - } - - idx := strings.LastIndex(uploadFile.Filename, "/") - targetFilename := uploadFile.Filename[idx+1:] - - // 判断爬虫是否存在 - spiderName := strings.Replace(targetFilename, ".zip", "", 1) - if name != "" { - spiderName = name - } - spider := model.GetSpiderByName(spiderName) - if spider.Name == "" { - // 保存爬虫信息 - srcPath := viper.GetString("spider.path") - spider := model.Spider{ - Name: spiderName, - DisplayName: spiderName, - Type: constants.Customized, - Src: filepath.Join(srcPath, spiderName), - FileId: fid, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - UserId: services.GetCurrentUserId(c), - } - if name != "" { - spider.Name = name - } - if displayName != "" { - spider.DisplayName = displayName - } - if col != "" { - spider.Col = col - } - if cmd != "" { - spider.Cmd = cmd - } - if err := spider.Add(); err != nil { - log.Error("add spider error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } else { - if name != "" { - spider.Name = name - } - if displayName != "" { - spider.DisplayName = displayName - } - if col != "" { - spider.Col = col - } - if cmd != "" { - spider.Cmd = cmd - } - // 更新file_id - spider.FileId = fid - if err := spider.Save(); err != nil { - log.Error("add spider error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 获取爬虫 - spider = model.GetSpiderByName(spiderName) - - // 发起同步 - services.PublishSpider(spider) - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spider, - }) -} - -// @Summary Upload spider by id -// @Description Upload spider by id -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param file formData file true "spider file to upload" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders/{id}/upload [post] -func UploadSpiderFromId(c *gin.Context) { - // TODO: 与 UploadSpider 部分逻辑重复,需要优化代码 - // 爬虫ID - spiderId := c.Param("id") - if !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - if err == mgo.ErrNotFound { - HandleErrorF(http.StatusNotFound, c, "cannot find spider") - } else { - HandleError(http.StatusInternalServerError, c, err) - } - return - } - - // 从body中获取文件 - uploadFile, err := c.FormFile("file") - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 如果不为zip文件,返回错误 - if !strings.HasSuffix(uploadFile.Filename, ".zip") { - debug.PrintStack() - HandleError(http.StatusBadRequest, c, errors.New("Not a valid zip file")) - return - } - - // 以防tmp目录不存在 - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil { - log.Error("mkdir other.tmppath dir error:" + err.Error()) - debug.PrintStack() - HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error")) - return - } - } - - // 保存到本地临时文件 - randomId := uuid.NewV4() - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - if err := c.SaveUploadedFile(uploadFile, tmpFilePath); err != nil { - log.Error("save upload file error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取 GridFS 实例 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 判断文件是否已经存在 - var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": spider.Name}).One(&gfFile); err == nil { - // 已经存在文件,则删除 - if err := gf.RemoveId(gfFile.Id); err != nil { - log.Errorf("remove grid fs error: " + err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 上传到GridFs - fid, err := services.RetryUploadToGridFs(spider.Name, tmpFilePath) - if err != nil { - log.Errorf("upload to grid fs error: %s", err.Error()) - debug.PrintStack() - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新file_id - spider.FileId = fid - if err := spider.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 发起同步 - services.PublishSpider(spider) - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Delete spider by id -// @Description Delete spider by id -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id} [delete] -func DeleteSpider(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - if err := services.RemoveSpider(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 更新 GitCron - if err := services.GitCron.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary delete spider -// @Description delete spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders [post] -func DeleteSelectedSpider(c *gin.Context) { - type ReqBody struct { - SpiderIds []string `json:"spider_ids"` - } - - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - for _, spiderId := range reqBody.SpiderIds { - if err := services.RemoveSpider(spiderId); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 更新 GitCron - if err := services.GitCron.Update(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary cancel spider -// @Description cancel spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders-cancel [post] -func CancelSelectedSpider(c *gin.Context) { - type ReqBody struct { - SpiderIds []string `json:"spider_ids"` - } - - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - for _, spiderId := range reqBody.SpiderIds { - if err := services.CancelSpider(spiderId); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary run spider -// @Description run spider -// @Tags spider -// @Accept json -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 500 json string Response -// @Router /spiders-run [post] -func RunSelectedSpider(c *gin.Context) { - type TaskParam struct { - SpiderId bson.ObjectId `json:"spider_id"` - Param string `json:"param"` - } - type ReqBody struct { - RunType string `json:"run_type"` - NodeIds []bson.ObjectId `json:"node_ids"` - TaskParams []TaskParam `json:"task_params"` - } - - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - // 任务ID - var taskIds []string - - // 遍历爬虫 - // TODO: 优化此部分代码,与 routes.PutTask 有重合部分 - for _, taskParam := range reqBody.TaskParams { - if reqBody.RunType == constants.RunTypeAllNodes { - // 所有节点 - nodes, err := model.GetNodeList(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - for _, node := range nodes { - t := model.Task{ - SpiderId: taskParam.SpiderId, - NodeId: node.Id, - Param: taskParam.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeAllNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - taskIds = append(taskIds, id) - } - } else if reqBody.RunType == constants.RunTypeRandom { - // 随机 - t := model.Task{ - SpiderId: taskParam.SpiderId, - Param: taskParam.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeRandom, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } else if reqBody.RunType == constants.RunTypeSelectedNodes { - // 指定节点 - for _, nodeId := range reqBody.NodeIds { - t := model.Task{ - SpiderId: taskParam.SpiderId, - NodeId: nodeId, - Param: taskParam.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeSelectedNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else { - HandleErrorF(http.StatusInternalServerError, c, "invalid run_type") - return - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: taskIds, - }) -} - -func SetProjectsSelectedSpider(c *gin.Context) { - type ReqBody struct { - ProjectId bson.ObjectId `json:"project_id"` - SpiderIds []bson.ObjectId `json:"spider_ids"` - } - - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - for _, spiderId := range reqBody.SpiderIds { - spider, err := model.GetSpider(spiderId) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - spider.ProjectId = reqBody.ProjectId - if err := spider.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get task list -// @Description Get task list -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/tasks [get] -func GetSpiderTasks(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - tasks, err := spider.GetTasks() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tasks, - }) -} - -// @Summary Get spider stats -// @Description Get spider stats -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/stats [get] -func GetSpiderStats(c *gin.Context) { - type Overview struct { - TaskCount int `json:"task_count" bson:"task_count"` - ResultCount int `json:"result_count" bson:"result_count"` - SuccessCount int `json:"success_count" bson:"success_count"` - SuccessRate float64 `json:"success_rate"` - TotalWaitDuration float64 `json:"wait_duration" bson:"wait_duration"` - TotalRuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` - AvgWaitDuration float64 `json:"avg_wait_duration"` - AvgRuntimeDuration float64 `json:"avg_runtime_duration"` - } - - type Data struct { - Overview Overview `json:"overview"` - Daily []model.TaskDailyItem `json:"daily"` - } - - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - s, col := database.GetCol("tasks") - defer s.Close() - - // 起始日期 - startDate := time.Now().Add(-time.Hour * 24 * 30) - endDate := time.Now() - - // match - op1 := bson.M{ - "$match": bson.M{ - "spider_id": spider.Id, - "create_ts": bson.M{ - "$gte": startDate, - "$lt": endDate, - }, - }, - } - - // project - op2 := bson.M{ - "$project": bson.M{ - "success_count": bson.M{ - "$cond": []interface{}{ - bson.M{ - "$eq": []string{ - "$status", - constants.StatusFinished, - }, - }, - 1, - 0, - }, - }, - "result_count": "$result_count", - "wait_duration": "$wait_duration", - "runtime_duration": "$runtime_duration", - }, - } - - // group - op3 := bson.M{ - "$group": bson.M{ - "_id": nil, - "task_count": bson.M{"$sum": 1}, - "success_count": bson.M{"$sum": "$success_count"}, - "result_count": bson.M{"$sum": "$result_count"}, - "wait_duration": bson.M{"$sum": "$wait_duration"}, - "runtime_duration": bson.M{"$sum": "$runtime_duration"}, - }, - } - - // run aggregation pipeline - var overview Overview - if err := col.Pipe([]bson.M{op1, op2, op3}).One(&overview); err != nil { - if err == mgo.ErrNotFound { - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: overview, - Daily: []model.TaskDailyItem{}, - }, - }) - return - } - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 后续处理 - successCount, _ := strconv.ParseFloat(strconv.Itoa(overview.SuccessCount), 64) - taskCount, _ := strconv.ParseFloat(strconv.Itoa(overview.TaskCount), 64) - overview.SuccessRate = successCount / taskCount - overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount - overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount - - items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id, "user_id": services.GetCurrentUserId(c)}) - if err != nil { - log.Errorf(err.Error()) - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: overview, - Daily: items, - }, - }) -} - -// @Summary Get schedules -// @Description Get schedules -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/schedules [get] -func GetSpiderSchedules(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - // 获取定时任务 - list, err := model.GetScheduleList(bson.M{"spider_id": bson.ObjectIdHex(id)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: list, - }) -} - -// ======== ./爬虫管理 ======== - -// ======== 爬虫文件管理 ======== - -// @Summary Get spider dir -// @Description Get spider dir -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param path query string true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/dir [get] -func GetSpiderDir(c *gin.Context) { - // 爬虫ID - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 目录相对路径 - path := c.Query("path") - - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取目录下文件列表 - spiderPath := viper.GetString("spider.path") - f, err := ioutil.ReadDir(filepath.Join(spiderPath, spider.Name, path)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 遍历文件列表 - var fileList []model.File - for _, file := range f { - fileList = append(fileList, model.File{ - Name: file.Name(), - IsDir: file.IsDir(), - Size: file.Size(), - Path: filepath.Join(path, file.Name()), - }) - } - - // 返回结果 - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: fileList, - }) -} - -type SpiderFileReqBody struct { - Path string `json:"path"` - Content string `json:"content"` - NewPath string `json:"new_path"` -} - -// @Summary Get spider file -// @Description Get spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param path query string true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file [get] -func GetSpiderFile(c *gin.Context) { - // 爬虫ID - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 文件相对路径 - path := c.Query("path") - - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 读取文件 - fileBytes, err := ioutil.ReadFile(filepath.Join(spider.Src, path)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - } - - // 返回结果 - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: utils.BytesToString(fileBytes), - }) -} - -// @Summary Get spider dir -// @Description Get spider dir -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file/tree [get] -func GetSpiderFileTree(c *gin.Context) { - // 爬虫ID - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取目录下文件列表 - spiderPath := viper.GetString("spider.path") - spiderFilePath := filepath.Join(spiderPath, spider.Name) - - // 获取文件目录树 - fileNodeTree, err := services.GetFileNodeTree(spiderFilePath, 0) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 返回结果 - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: fileNodeTree, - }) -} - -// @Summary Post spider file -// @Description Post spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqBody body routes.SpiderFileReqBody true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file [post] -func PostSpiderFile(c *gin.Context) { - // 爬虫ID - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - // 文件相对路径 - var reqBody SpiderFileReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 获取爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 写文件 - if err := ioutil.WriteFile(filepath.Join(spider.Src, reqBody.Path), []byte(reqBody.Content), os.ModePerm); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 返回结果 - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Put spider file -// @Description Put spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqBody body routes.SpiderFileReqBody true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file [post] -func PutSpiderFile(c *gin.Context) { - spiderId := c.Param("id") - if !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody SpiderFileReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 文件路径 - filePath := path.Join(spider.Src, reqBody.Path) - - // 如果文件已存在,则报错 - if utils.Exists(filePath) { - HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, filePath)) - return - } - - // 写入文件 - if err := ioutil.WriteFile(filePath, []byte(reqBody.Content), 0777); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post spider dir -// @Description Post spider dir -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqBody body routes.SpiderFileReqBody true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file [put] -func PutSpiderDir(c *gin.Context) { - spiderId := c.Param("id") - if !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody SpiderFileReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 文件路径 - filePath := path.Join(spider.Src, reqBody.Path) - - // 如果文件已存在,则报错 - if utils.Exists(filePath) { - HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, filePath)) - return - } - - // 创建文件夹 - if err := os.MkdirAll(filePath, 0777); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Delete spider file -// @Description Delete spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqBody body routes.SpiderFileReqBody true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file [delete] -func DeleteSpiderFile(c *gin.Context) { - spiderId := c.Param("id") - if !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody SpiderFileReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - filePath := path.Join(spider.Src, reqBody.Path) - if err := os.RemoveAll(filePath); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Rename spider file -// @Description Rename spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqBody body routes.SpiderFileReqBody true "path" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/file/rename [post] -func RenameSpiderFile(c *gin.Context) { - spiderId := c.Param("id") - - if !bson.IsObjectIdHex(spiderId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody SpiderFileReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - } - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 原文件路径 - filePath := path.Join(spider.Src, reqBody.Path) - newFilePath := path.Join(path.Join(path.Dir(filePath), reqBody.NewPath)) - - // 如果新文件已存在,则报错 - if utils.Exists(newFilePath) { - HandleErrorF(http.StatusInternalServerError, c, fmt.Sprintf(`%s already exists`, newFilePath)) - return - } - - // 重命名 - if err := os.Rename(filePath, newFilePath); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 删除原文件 - if err := os.RemoveAll(filePath); err != nil { - HandleError(http.StatusInternalServerError, c, err) - } - - // 同步到GridFS - if err := services.UploadSpiderToGridFsFromMaster(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// ======== 爬虫文件管理 ======== - -// ======== Scrapy 部分 ======== - -// @Summary Get scrapy spider file -// @Description Get scrapy spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/spiders [get] -func GetSpiderScrapySpiders(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - spiderNames, err := services.GetScrapySpiderNames(spider) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: spiderNames, - }) -} - -// @Summary Put scrapy spider file -// @Description Put scrapy spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/spiders [put] -func PutSpiderScrapySpiders(c *gin.Context) { - type ReqBody struct { - Name string `json:"name"` - Domain string `json:"domain"` - Template string `json:"template"` - } - - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - if err := services.CreateScrapySpider(spider, reqBody.Name, reqBody.Domain, reqBody.Template); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get scrapy spider settings -// @Description Get scrapy spider settings -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/settings [get] -func GetSpiderScrapySettings(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - data, err := services.GetScrapySettings(spider) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -// @Summary Get scrapy spider file -// @Description Get scrapy spider file -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqData body []entity.ScrapySettingParam true "req data" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/settings [post] -func PostSpiderScrapySettings(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - var reqData []entity.ScrapySettingParam - if err := c.ShouldBindJSON(&reqData); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - if err := services.SaveScrapySettings(spider, reqData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get scrapy spider items -// @Description Get scrapy spider items -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/items [get] -func GetSpiderScrapyItems(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - data, err := services.GetScrapyItems(spider) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -// @Summary Post scrapy spider items -// @Description Post scrapy spider items -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Param reqData body []entity.ScrapyItem true "req data" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/items [post] -func PostSpiderScrapyItems(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - var reqData []entity.ScrapyItem - if err := c.ShouldBindJSON(&reqData); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - if err := services.SaveScrapyItems(spider, reqData); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get scrapy spider pipelines -// @Description Get scrapy spider pipelines -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/pipelines [get] -func GetSpiderScrapyPipelines(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - data, err := services.GetScrapyPipelines(spider) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -// @Summary Get scrapy spider file path -// @Description Get scrapy spider file path -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/scrapy/spider/filepath [get] -func GetSpiderScrapySpiderFilepath(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - spiderName := c.Query("spider_name") - if spiderName == "" { - HandleErrorF(http.StatusBadRequest, c, "spider_name is empty") - return - } - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - data, err := services.GetScrapySpiderFilepath(spider, spiderName) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} - -// ======== ./Scrapy 部分 ======== - -// ======== Git 部分 ======== - -// @Summary Post spider sync git -// @Description Post spider sync git -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/git/sync [post] -func PostSpiderSyncGit(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - if err := services.SyncSpiderGit(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post spider reset git -// @Description Post spider reset git -// @Tags spider -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "spider id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /spiders/{id}/git/reset [post] -func PostSpiderResetGit(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid") - return - } - - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - if err := services.ResetSpiderGit(spider); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// ======== ./Git 部分 ======== diff --git a/backend/routes/stats.go b/backend/routes/stats.go deleted file mode 100644 index 02e0993e..00000000 --- a/backend/routes/stats.go +++ /dev/null @@ -1,90 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -// @Summary Get home stats -// @Description Get home stats -// @Tags version -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /stats/home [get] -func GetHomeStats(c *gin.Context) { - type DataOverview struct { - TaskCount int `json:"task_count"` - SpiderCount int `json:"spider_count"` - ActiveNodeCount int `json:"active_node_count"` - ScheduleCount int `json:"schedule_count"` - ProjectCount int `json:"project_count"` - } - - type Data struct { - Overview DataOverview `json:"overview"` - Daily []model.TaskDailyItem `json:"daily"` - } - - // 任务总数 - taskCount, err := model.GetTaskCount(bson.M{"user_id": services.GetCurrentUserId(c)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 在线节点总数 - activeNodeCount, err := model.GetNodeCount(bson.M{"status": constants.StatusOnline}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 爬虫总数 - spiderCount, err := model.GetSpiderCount(bson.M{"user_id": services.GetCurrentUserId(c)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 定时任务数 - scheduleCount, err := model.GetScheduleCount(bson.M{"user_id": services.GetCurrentUserId(c)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 项目数 - projectCount, err := model.GetProjectCount(bson.M{"user_id": services.GetCurrentUserId(c)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 每日任务数 - items, err := model.GetDailyTaskStats(bson.M{"user_id": services.GetCurrentUserId(c)}) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: Data{ - Overview: DataOverview{ - ActiveNodeCount: activeNodeCount, - TaskCount: taskCount, - SpiderCount: spiderCount, - ScheduleCount: scheduleCount, - ProjectCount: projectCount, - }, - Daily: items, - }, - }) -} diff --git a/backend/routes/system.go b/backend/routes/system.go deleted file mode 100644 index a94be822..00000000 --- a/backend/routes/system.go +++ /dev/null @@ -1,344 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/services" - "crawlab/services/rpc" - "fmt" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" - "strings" -) - -// @Summary Get language list -// @Description Get language list -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/langs [get] -func GetLangList(c *gin.Context) { - nodeId := c.Param("id") - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: services.GetLangList(nodeId), - }) -} - -// @Summary Get dep list -// @Description Get dep list -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Param lang query string true "language" -// @Param dep_name query string true "dep name" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/deps [get] -func GetDepList(c *gin.Context) { - nodeId := c.Param("id") - lang := c.Query("lang") - depName := c.Query("dep_name") - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var depList []entity.Dependency - if lang == constants.Python { - list, err := services.GetPythonDepList(nodeId, depName) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - depList = list - } else if lang == constants.Nodejs { - list, err := services.GetNodejsDepList(nodeId, depName) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - depList = list - } else { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: depList, - }) -} - -// @Summary Get installed dep list -// @Description Get installed dep list -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Param lang query string true "language" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/deps/installed [get] -func GetInstalledDepList(c *gin.Context) { - nodeId := c.Param("id") - lang := c.Query("lang") - - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var depList []entity.Dependency - if services.IsMasterNode(nodeId) { - list, err := rpc.GetInstalledDepsLocal(lang) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - depList = list - } else { - list, err := rpc.GetInstalledDepsRemote(nodeId, lang) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - depList = list - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: depList, - }) -} - -// @Summary Get all dep list -// @Description Get all dep list -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param lang path string true "language" -// @Param dep_nane query string true "dep name" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /system/deps/:lang [get] -func GetAllDepList(c *gin.Context) { - lang := c.Param("lang") - depName := c.Query("dep_name") - - // 获取所有依赖列表 - var list []string - if lang == constants.Python { - _list, err := services.GetPythonDepListFromRedis() - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - list = _list - } else { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) - return - } - - // 过滤依赖列表 - var depList []string - for _, name := range list { - if strings.HasPrefix(strings.ToLower(name), strings.ToLower(depName)) { - depList = append(depList, name) - } - } - - // 只取前20 - var returnList []string - for i, name := range depList { - if i >= 10 { - break - } - returnList = append(returnList, name) - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: returnList, - }) -} - -// @Summary Install dep -// @Description Install dep -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/deps/install [Post] -func InstallDep(c *gin.Context) { - type ReqBody struct { - Lang string `json:"lang"` - DepName string `json:"dep_name"` - } - - nodeId := c.Param("id") - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - if services.IsMasterNode(nodeId) { - if err := rpc.InstallDepLocal(reqBody.Lang, reqBody.DepName); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } else { - if err := rpc.InstallDepRemote(nodeId, reqBody.Lang, reqBody.DepName); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Uninstall dep -// @Description Uninstall dep -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/deps/uninstall [Post] -func UninstallDep(c *gin.Context) { - type ReqBody struct { - Lang string `json:"lang"` - DepName string `json:"dep_name"` - } - - nodeId := c.Param("id") - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - } - - if services.IsMasterNode(nodeId) { - if err := rpc.UninstallDepLocal(reqBody.Lang, reqBody.DepName); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } else { - if err := rpc.UninstallDepRemote(nodeId, reqBody.Lang, reqBody.DepName); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Get dep json -// @Description Get dep json -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param lang path string true "language" -// @Param dep_name path string true "dep name" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /system/deps/{lang}/{dep_name}/json [get] -func GetDepJson(c *gin.Context) { - depName := c.Param("dep_name") - lang := c.Param("lang") - - var dep entity.Dependency - if lang == constants.Python { - _dep, err := services.FetchPythonDepInfo(depName) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - dep = _dep - } else { - HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("%s is not implemented", lang)) - return - } - - c.Header("Cache-Control", "max-age=86400") - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: dep, - }) -} - -// @Summary Install language -// @Description Install language -// @Tags system -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "node id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /nodes/{id}/langs/install [Post] -func InstallLang(c *gin.Context) { - type ReqBody struct { - Lang string `json:"lang"` - } - - nodeId := c.Param("id") - if !bson.IsObjectIdHex(nodeId) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var reqBody ReqBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - if services.IsMasterNode(nodeId) { - _, err := rpc.InstallLangLocal(reqBody.Lang) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } else { - _, err := rpc.InstallLangRemote(nodeId, reqBody.Lang) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // TODO: check if install is successful - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/system_tasks.go b/backend/routes/system_tasks.go deleted file mode 100644 index 087a2fb4..00000000 --- a/backend/routes/system_tasks.go +++ /dev/null @@ -1,118 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/services" - "crawlab/utils" - "fmt" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -func GetSystemScripts(c *gin.Context) { - HandleSuccessData(c, utils.GetSystemScripts()) -} - -func PutSystemTask(c *gin.Context) { - type TaskRequestBody struct { - RunType string `json:"run_type"` - NodeIds []bson.ObjectId `json:"node_ids"` - Script string `json:"script"` - } - - // 绑定数据 - var reqBody TaskRequestBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 校验脚本参数不为空 - if reqBody.Script == "" { - HandleErrorF(http.StatusBadRequest, c, "script cannot be empty") - return - } - - // 校验脚本参数是否存在 - var allScripts = utils.GetSystemScripts() - if !utils.StringArrayContains(allScripts, reqBody.Script) { - HandleErrorF(http.StatusBadRequest, c, "script does not exist") - return - } - - // TODO: 校验脚本是否正在运行 - - // 获取执行命令 - cmd := fmt.Sprintf("sh %s", utils.GetSystemScriptPath(reqBody.Script)) - - // 任务ID - var taskIds []string - - if reqBody.RunType == constants.RunTypeAllNodes { - // 所有节点 - nodes, err := model.GetNodeList(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - for _, node := range nodes { - t := model.Task{ - SpiderId: bson.ObjectIdHex(constants.ObjectIdNull), - NodeId: node.Id, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeAllNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSystem, - Cmd: cmd, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else if reqBody.RunType == constants.RunTypeRandom { - // 随机 - t := model.Task{ - SpiderId: bson.ObjectIdHex(constants.ObjectIdNull), - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeRandom, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSystem, - Cmd: cmd, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } else if reqBody.RunType == constants.RunTypeSelectedNodes { - // 指定节点 - for _, nodeId := range reqBody.NodeIds { - t := model.Task{ - SpiderId: bson.ObjectIdHex(constants.ObjectIdNull), - NodeId: nodeId, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeSelectedNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSystem, - Cmd: cmd, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else { - HandleErrorF(http.StatusInternalServerError, c, "invalid run_type") - return - } - - HandleSuccessData(c, taskIds) -} diff --git a/backend/routes/task.go b/backend/routes/task.go deleted file mode 100644 index 2a74f4e8..00000000 --- a/backend/routes/task.go +++ /dev/null @@ -1,621 +0,0 @@ -package routes - -import ( - "bytes" - "crawlab/constants" - "crawlab/model" - "crawlab/services" - "crawlab/utils" - "encoding/csv" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -type TaskListRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` - NodeId string `form:"node_id"` - SpiderId string `form:"spider_id"` - ScheduleId string `form:"schedule_id"` - Status string `form:"status"` - Type string `form:"type"` -} - -type TaskResultsRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` -} - -// @Summary Get task list -// @Description Get task list -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param data body routes.TaskListRequestData true "req data" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks [get] -func GetTaskList(c *gin.Context) { - // 绑定数据 - data := TaskListRequestData{} - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - if data.PageNum == 0 { - data.PageNum = 1 - } - if data.PageSize == 0 { - data.PageSize = 10 - } - - // 过滤条件 - query := bson.M{} - if data.NodeId != "" { - query["node_id"] = bson.ObjectIdHex(data.NodeId) - } - if data.SpiderId != "" { - query["spider_id"] = bson.ObjectIdHex(data.SpiderId) - } - // 根据任务状态获取task列表 - if data.Status != "" { - query["status"] = data.Status - } - if data.ScheduleId != "" { - query["schedule_id"] = bson.ObjectIdHex(data.ScheduleId) - } - if data.Type != "" { - query["type"] = data.Type - } - - // 获取校验 - query = services.GetAuthQuery(query, c) - - // 获取任务列表 - tasks, err := model.GetTaskList(query, (data.PageNum-1)*data.PageSize, data.PageSize, "-create_ts") - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取总任务数 - total, err := model.GetTaskListTotal(query) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Total: total, - Data: tasks, - }) -} - -// @Summary Get task -// @Description Get task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id} [get] -func GetTask(c *gin.Context) { - id := c.Param("id") - result, err := model.GetTask(id) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccessData(c, result) -} - -// @Summary Put task -// @Description Put task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks [put] -func PutTask(c *gin.Context) { - type TaskRequestBody struct { - SpiderId bson.ObjectId `json:"spider_id"` - RunType string `json:"run_type"` - NodeIds []bson.ObjectId `json:"node_ids"` - Param string `json:"param"` - } - - // 绑定数据 - var reqBody TaskRequestBody - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 任务ID - var taskIds []string - - if reqBody.RunType == constants.RunTypeAllNodes { - // 所有节点 - nodes, err := model.GetNodeList(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - for _, node := range nodes { - t := model.Task{ - SpiderId: reqBody.SpiderId, - NodeId: node.Id, - Param: reqBody.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeAllNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - taskIds = append(taskIds, id) - } - } else if reqBody.RunType == constants.RunTypeRandom { - // 随机 - t := model.Task{ - SpiderId: reqBody.SpiderId, - Param: reqBody.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeRandom, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } else if reqBody.RunType == constants.RunTypeSelectedNodes { - // 指定节点 - for _, nodeId := range reqBody.NodeIds { - t := model.Task{ - SpiderId: reqBody.SpiderId, - NodeId: nodeId, - Param: reqBody.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeSelectedNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else { - HandleErrorF(http.StatusInternalServerError, c, "invalid run_type") - return - } - - HandleSuccessData(c, taskIds) -} - -func PutBatchTasks(c *gin.Context) { - var tasks []model.Task - if err := c.ShouldBindJSON(&tasks); err != nil { - HandleError(http.StatusOK, c, err) - return - } - var taskIds []string - for _, t := range tasks { - if t.RunType == constants.RunTypeAllNodes { - // 所有节点 - nodes, err := model.GetNodeList(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - for _, node := range nodes { - t := model.Task{ - SpiderId: t.SpiderId, - NodeId: node.Id, - Param: t.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeAllNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else if t.RunType == constants.RunTypeRandom { - // 随机 - t := model.Task{ - SpiderId: t.SpiderId, - Param: t.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeRandom, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } else if t.RunType == constants.RunTypeSelectedNodes { - // 指定节点 - for _, nodeId := range t.NodeIds { - t := model.Task{ - SpiderId: t.SpiderId, - NodeId: bson.ObjectIdHex(nodeId), - Param: t.Param, - UserId: services.GetCurrentUserId(c), - RunType: constants.RunTypeSelectedNodes, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: constants.TaskTypeSpider, - } - - id, err := services.AddTask(t) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - taskIds = append(taskIds, id) - } - } else { - HandleErrorF(http.StatusInternalServerError, c, "invalid run_type") - return - } - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: taskIds, - }) -} - -// @Summary Delete task -// @Description Delete task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param status query string true "task status" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks_by_status [delete] -func DeleteTaskByStatus(c *gin.Context) { - status := c.Query("status") - - //删除相应的日志文件 - if err := services.RemoveLogByTaskStatus(status); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - //删除该状态下的task - if err := model.RemoveTaskByStatus(status); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - HandleSuccess(c) -} - -// 删除多个任务 - -// @Summary Delete tasks -// @Description Delete tasks -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks [delete] -func DeleteSelectedTask(c *gin.Context) { - ids := make(map[string][]string) - if err := c.ShouldBindJSON(&ids); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - list := ids["ids"] - for _, id := range list { - if err := services.RemoveLogByTaskId(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - if err := model.RemoveTask(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - HandleSuccess(c) -} - -// 删除单个任务 - -// @Summary Delete task -// @Description Delete task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /task/{id} [delete] -func DeleteTask(c *gin.Context) { - id := c.Param("id") - // 删除日志文件 - if err := services.RemoveLogByTaskId(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - // 删除task - if err := model.RemoveTask(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -func CancelSelectedTask(c *gin.Context) { - ids := make(map[string][]string) - if err := c.ShouldBindJSON(&ids); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - list := ids["ids"] - for _, id := range list { - if err := services.CancelTask(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - HandleSuccess(c) -} - -func RestartSelectedTask(c *gin.Context) { - ids := make(map[string][]string) - if err := c.ShouldBindJSON(&ids); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - list := ids["ids"] - for _, id := range list { - if err := services.RestartTask(id, services.GetCurrentUserId(c)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - HandleSuccess(c) -} - -// @Summary Get task log -// @Description Get task log -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/log [delete] -func GetTaskLog(c *gin.Context) { - type RequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` - Keyword string `form:"keyword"` - } - id := c.Param("id") - var reqData RequestData - if err := c.ShouldBindQuery(&reqData); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - logItems, logTotal, err := services.GetTaskLog(id, reqData.Keyword, reqData.PageNum, reqData.PageSize) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: logItems, - Total: logTotal, - }) -} - -// @Summary Get task error log -// @Description Get task error log -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/error-log [delete] -func GetTaskErrorLog(c *gin.Context) { - id := c.Param("id") - u := services.GetCurrentUser(c) - errLogItems, err := services.GetTaskErrorLog(id, u.Setting.MaxErrorLog) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: errLogItems, - }) -} - -// @Summary Get task list -// @Description Get task list -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param data body routes.TaskResultsRequestData true "req data" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/results [get] -func GetTaskResults(c *gin.Context) { - id := c.Param("id") - // 绑定数据 - data := TaskResultsRequestData{} - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取任务 - task, err := model.GetTask(id) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取结果 - results, total, err := task.GetResults(data.PageNum, data.PageSize) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: results, - Total: total, - }) -} - -// @Summary Get task results -// @Description Get task results -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/results/download [get] -func DownloadTaskResultsCsv(c *gin.Context) { - id := c.Param("id") - // 获取任务 - task, err := model.GetTask(id) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取结果 - results, _, err := task.GetResults(1, constants.Infinite) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 字段列表 - var columns []string - if len(results) == 0 { - columns = []string{} - } else { - item := results[0].(bson.M) - for key := range item { - columns = append(columns, key) - } - } - - // 缓冲 - bytesBuffer := &bytes.Buffer{} - - // 写入UTF-8 BOM,避免使用Microsoft Excel打开乱码 - bytesBuffer.WriteString("\xEF\xBB\xBF") - - writer := csv.NewWriter(bytesBuffer) - - // 写入表头 - if err := writer.Write(columns); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 写入内容 - for _, result := range results { - // 将result转换为[]string - item := result.(bson.M) - var values []string - for _, col := range columns { - value := utils.InterfaceToString(item[col]) - values = append(values, value) - } - - // 写入数据 - if err := writer.Write(values); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - } - - // 此时才会将缓冲区数据写入 - writer.Flush() - - // 设置下载的文件名 - c.Writer.Header().Set("Content-Disposition", "attachment;filename=data.csv") - - // 设置文件类型以及输出数据 - c.Data(http.StatusOK, "text/csv", bytesBuffer.Bytes()) -} - -// @Summary Cancel task -// @Description Cancel task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/cancel [post] -func CancelTask(c *gin.Context) { - id := c.Param("id") - if err := services.CancelTask(id); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -// @Summary Restart task -// @Description Restart task -// @Tags task -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "task id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tasks/{id}/restart [post] -func RestartTask(c *gin.Context) { - id := c.Param("id") - uid := services.GetCurrentUserId(c) - - if err := services.RestartTask(id, uid); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} diff --git a/backend/routes/token.go b/backend/routes/token.go deleted file mode 100644 index f62680a0..00000000 --- a/backend/routes/token.go +++ /dev/null @@ -1,96 +0,0 @@ -package routes - -import ( - "crawlab/model" - "crawlab/services" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" - "time" -) - -// @Summary Get token -// @Description token -// @Tags token -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tokens [get] -func GetTokens(c *gin.Context) { - u := services.GetCurrentUser(c) - - tokens, err := model.GetTokensByUserId(u.Id) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tokens, - }) -} - -// @Summary Put token -// @Description token -// @Tags token -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tokens [put] -func PutToken(c *gin.Context) { - u := services.GetCurrentUser(c) - - tokenStr, err := services.MakeToken(u) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - t := model.Token{ - Id: bson.NewObjectId(), - Token: tokenStr, - UserId: u.Id, - CreateTs: time.Now(), - UpdateTs: time.Now(), - } - - if err := t.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Delete token -// @Description Delete token -// @Tags token -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "token id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /tokens/{id} [delete] -func DeleteToken(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - if err := model.DeleteTokenById(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/user.go b/backend/routes/user.go deleted file mode 100644 index abb77170..00000000 --- a/backend/routes/user.go +++ /dev/null @@ -1,340 +0,0 @@ -package routes - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/services" - "crawlab/services/context" - "crawlab/utils" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "github.com/pkg/errors" - "net/http" - "strings" -) - -type UserListRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` -} - -type UserRequestData struct { - Username string `json:"username"` - Password string `json:"password"` - Role string `json:"role"` - Email string `json:"email"` -} - -// @Summary Get user -// @Description user -// @Tags user -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "user id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /users/{id} [get] -func GetUser(c *gin.Context) { - id := c.Param("id") - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - user, err := model.GetUser(bson.ObjectIdHex(id)) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: user, - }) -} - -// @Summary Get user list -// @Description Get user list -// @Tags token -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param data body routes.UserListRequestData true "data body" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /users [get] -func GetUserList(c *gin.Context) { - // 绑定数据 - data := UserListRequestData{} - if err := c.ShouldBindQuery(&data); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - if data.PageNum == 0 { - data.PageNum = 1 - } - if data.PageSize == 0 { - data.PageNum = 10 - } - - // 获取用户列表 - users, err := model.GetUserList(nil, (data.PageNum-1)*data.PageSize, data.PageSize, "-create_ts") - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 获取总用户数 - total, err := model.GetUserListTotal(nil) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - // 去除密码 - for i := range users { - users[i].Password = "" - } - - c.JSON(http.StatusOK, ListResponse{ - Status: "ok", - Message: "success", - Data: users, - Total: total, - }) -} - -// @Summary Put user -// @Description Put user -// @Tags user -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param reqData body routes.UserRequestData true "reqData body" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /users [put] -func PutUser(c *gin.Context) { - // 绑定请求数据 - var reqData UserRequestData - if err := c.ShouldBindJSON(&reqData); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - // 默认为正常用户 - if reqData.Role == "" { - reqData.Role = constants.RoleNormal - } - - // UserId - uid := services.GetCurrentUserId(c) - - // 空 UserId 处理 - if uid == "" { - uid = bson.ObjectIdHex(constants.ObjectIdNull) - } - - // 添加用户 - if err := services.CreateNewUser(reqData.Username, reqData.Password, reqData.Role, reqData.Email, uid); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Post user -// @Description Post user -// @Tags user -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param item body model.User true "user body" -// @Param id path string true "user id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /users/{id} [post] -func PostUser(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - var item model.User - if err := c.ShouldBindJSON(&item); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - - if item.UserId.Hex() == "" { - item.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - if err := model.UpdateUser(bson.ObjectIdHex(id), item); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -// @Summary Delete user -// @Description Delete user -// @Tags user -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "user id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /users/{id} [delete] -func DeleteUser(c *gin.Context) { - id := c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - - // 从数据库中删除该爬虫 - if err := model.RemoveUser(bson.ObjectIdHex(id)); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func Login(c *gin.Context) { - // 绑定请求数据 - var reqData UserRequestData - if err := c.ShouldBindJSON(&reqData); err != nil { - HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) - return - } - - // 获取用户 - user, err := model.GetUserByUsername(strings.ToLower(reqData.Username)) - if err != nil { - HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) - return - } - - // 校验密码 - encPassword := utils.EncryptPassword(reqData.Password) - if user.Password != encPassword { - HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) - return - } - - // 获取token - tokenStr, err := services.MakeToken(&user) - if err != nil { - HandleError(http.StatusUnauthorized, c, errors.New("not authorized")) - return - } - - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: tokenStr, - }) -} - -func GetMe(c *gin.Context) { - ctx := context.WithGinContext(c) - user := ctx.User() - if user == nil { - ctx.FailedWithError(constants.ErrorUserNotFound, http.StatusUnauthorized) - return - } - ctx.Success(struct { - *model.User - Password string `json:"password,omitempty"` - }{ - User: user, - }, nil) -} - -func PostMe(c *gin.Context) { - ctx := context.WithGinContext(c) - user := ctx.User() - if user == nil { - ctx.FailedWithError(constants.ErrorUserNotFound, http.StatusUnauthorized) - return - } - var reqBody model.User - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - if reqBody.Email != "" { - user.Email = reqBody.Email - } - if reqBody.Setting.NotificationTrigger != "" { - user.Setting.NotificationTrigger = reqBody.Setting.NotificationTrigger - } - if reqBody.Setting.DingTalkRobotWebhook != "" { - user.Setting.DingTalkRobotWebhook = reqBody.Setting.DingTalkRobotWebhook - } - if reqBody.Setting.WechatRobotWebhook != "" { - user.Setting.WechatRobotWebhook = reqBody.Setting.WechatRobotWebhook - } - user.Setting.EnabledNotifications = reqBody.Setting.EnabledNotifications - user.Setting.ErrorRegexPattern = reqBody.Setting.ErrorRegexPattern - if reqBody.Setting.MaxErrorLog != 0 { - user.Setting.MaxErrorLog = reqBody.Setting.MaxErrorLog - } - user.Setting.LogExpireDuration = reqBody.Setting.LogExpireDuration - - if user.UserId.Hex() == "" { - user.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - if err := user.Save(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func PostMeChangePassword(c *gin.Context) { - ctx := context.WithGinContext(c) - user := ctx.User() - if user == nil { - ctx.FailedWithError(constants.ErrorUserNotFound, http.StatusUnauthorized) - return - } - var reqBody model.User - if err := c.ShouldBindJSON(&reqBody); err != nil { - HandleErrorF(http.StatusBadRequest, c, "invalid request") - return - } - if reqBody.Password == "" { - HandleErrorF(http.StatusBadRequest, c, "password is empty") - return - } - if user.UserId.Hex() == "" { - user.UserId = bson.ObjectIdHex(constants.ObjectIdNull) - } - user.Password = utils.EncryptPassword(reqBody.Password) - if err := user.Save(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} diff --git a/backend/routes/utils.go b/backend/routes/utils.go deleted file mode 100644 index dfa5420e..00000000 --- a/backend/routes/utils.go +++ /dev/null @@ -1,39 +0,0 @@ -package routes - -import ( - "github.com/gin-gonic/gin" - "net/http" - "runtime/debug" -) - -func HandleError(statusCode int, c *gin.Context, err error) { - c.AbortWithStatusJSON(statusCode, Response{ - Status: "error", - Message: "failure", - Error: err.Error(), - }) -} - -func HandleErrorF(statusCode int, c *gin.Context, err string) { - debug.PrintStack() - c.AbortWithStatusJSON(statusCode, Response{ - Status: "ok", - Message: "error", - Error: err, - }) -} - -func HandleSuccess(c *gin.Context) { - c.AbortWithStatusJSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - }) -} - -func HandleSuccessData(c *gin.Context, data interface{}) { - c.AbortWithStatusJSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: data, - }) -} diff --git a/backend/routes/variable.go b/backend/routes/variable.go deleted file mode 100644 index 8b837538..00000000 --- a/backend/routes/variable.go +++ /dev/null @@ -1,111 +0,0 @@ -package routes - -import ( - "crawlab/model" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "net/http" -) - -// 新增 - -// @Summary Put variable -// @Description Put variable -// @Tags variable -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param variable body model.Variable true "reqData body" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /variable [put] -func PutVariable(c *gin.Context) { - var variable model.Variable - if err := c.ShouldBindJSON(&variable); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - if err := variable.Add(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -// 修改 - -// @Summary Post variable -// @Description Post variable -// @Tags variable -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param variable body model.Variable true "reqData body" -// @Param id path string true "variable id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /variable/{id} [post] -func PostVariable(c *gin.Context) { - var id = c.Param("id") - - if !bson.IsObjectIdHex(id) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var variable model.Variable - if err := c.ShouldBindJSON(&variable); err != nil { - HandleError(http.StatusBadRequest, c, err) - return - } - variable.Id = bson.ObjectIdHex(id) - if err := variable.Save(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) -} - -// 删除 - -// @Summary Delete variable -// @Description Delete variable -// @Tags variable -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Param id path string true "variable id" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /variable/{id} [delete] -func DeleteVariable(c *gin.Context) { - var idStr = c.Param("id") - if !bson.IsObjectIdHex(idStr) { - HandleErrorF(http.StatusBadRequest, c, "invalid id") - return - } - var id = bson.ObjectIdHex(idStr) - variable, err := model.GetVariable(id) - if err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - variable.Id = id - if err := variable.Delete(); err != nil { - HandleError(http.StatusInternalServerError, c, err) - return - } - HandleSuccess(c) - -} - -// 列表 - -// @Summary Get variable list -// @Description Get variable list -// @Tags variable -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /variables [get] -func GetVariableList(c *gin.Context) { - list := model.GetVariableList() - HandleSuccessData(c, list) -} diff --git a/backend/routes/version.go b/backend/routes/version.go deleted file mode 100644 index 8974e7fa..00000000 --- a/backend/routes/version.go +++ /dev/null @@ -1,30 +0,0 @@ -package routes - -import ( - "crawlab/services" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "net/http" - "runtime/debug" -) - -// @Summary Get latest release -// @Description Get latest release -// @Tags version -// @Produce json -// @Param Authorization header string true "Authorization token" -// @Success 200 json string Response -// @Failure 400 json string Response -// @Router /releases/latest [get] -func GetLatestRelease(c *gin.Context) { - latestRelease, err := services.GetLatestRelease() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } - c.JSON(http.StatusOK, Response{ - Status: "ok", - Message: "success", - Data: latestRelease, - }) -} diff --git a/backend/scripts/install-chromedriver.sh b/backend/scripts/install-chromedriver.sh deleted file mode 100644 index c2e86939..00000000 --- a/backend/scripts/install-chromedriver.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-chromedriver.lock - -export DEBIAN_FRONTEND=noninteractive -apt-get update -apt-get install unzip -DL=https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb -curl -sL "$DL" > /tmp/chrome.deb -apt install --no-install-recommends --no-install-suggests -y /tmp/chrome.deb -CHROMIUM_FLAGS='--no-sandbox --disable-dev-shm-usage' -sed -i '${s/$/'" $CHROMIUM_FLAGS"'/}' /opt/google/chrome/google-chrome -BASE_URL=https://chromedriver.storage.googleapis.com -VERSION=$(curl -sL "$BASE_URL/LATEST_RELEASE") -curl -sL "$BASE_URL/$VERSION/chromedriver_linux64.zip" -o /tmp/driver.zip -unzip /tmp/driver.zip -chmod 755 chromedriver -mv chromedriver /usr/local/bin - -# unlock global -rm /tmp/install.lock - -# unlock -rm /tmp/install-chromedriver.lock \ No newline at end of file diff --git a/backend/scripts/install-dotnet.sh b/backend/scripts/install-dotnet.sh deleted file mode 100755 index 73f3d0cb..00000000 --- a/backend/scripts/install-dotnet.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-dotnet.lock - -wget -q https://packages.microsoft.com/config/ubuntu/16.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb -dpkg -i packages-microsoft-prod.deb -apt-get install -y apt-transport-https -apt-get update -apt-get install -y dotnet-sdk-2.1 dotnet-runtime-2.1 aspnetcore-runtime-2.1 - -# unlock global -rm /tmp/install.lock - -# unlock -rm /tmp/install-dotnet.lock diff --git a/backend/scripts/install-firefox.sh b/backend/scripts/install-firefox.sh deleted file mode 100644 index f316b3db..00000000 --- a/backend/scripts/install-firefox.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-firefox.lock - -apt-get update -apt-get -y install firefox ttf-wqy-microhei ttf-wqy-zenhei xfonts-wqy -apt-get -y install libcanberra-gtk3-module - -# unlock global -rm /tmp/install.lock - -# unlock -rm /tmp/install-firefox.lock \ No newline at end of file diff --git a/backend/scripts/install-go.sh b/backend/scripts/install-go.sh deleted file mode 100644 index 44d43744..00000000 --- a/backend/scripts/install-go.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-go.lock - -# install golang -apt-get update -apt-get install -y golang - -# environment variables -export GOPROXY=https://goproxy.cn -export GOPATH=/opt/go - -# unlock global -rm /tmp/install.lock - -# unlock -rm /tmp/install-go.lock \ No newline at end of file diff --git a/backend/scripts/install-java.sh b/backend/scripts/install-java.sh deleted file mode 100755 index ab797cb9..00000000 --- a/backend/scripts/install-java.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-java.lock - -# install java -apt-get clean -apt-get update --fix-missing -apt-get install -y --fix-missing default-jdk -ln -s /usr/bin/java /usr/local/bin/java - -# unlock -rm /tmp/install-java.lock - -# unlock global -rm /tmp/install.lock diff --git a/backend/scripts/install-nodejs.sh b/backend/scripts/install-nodejs.sh deleted file mode 100644 index 61b4d778..00000000 --- a/backend/scripts/install-nodejs.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-nodejs.lock - -# install node.js -curl -sL https://deb.nodesource.com/setup_12.x | bash - -apt-get update && apt install -y nodejs - -# install chromium -# See https://crbug.com/795759 -apt-get update && apt-get install -yq libgconf-2-4 - -# Install latest chrome dev package and fonts to support major -# charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others) -# Note: this installs the necessary libs to make the bundled version -# of Chromium that Puppeteer -# installs, work. -apt-get update \ - && apt-get install -y wget gnupg \ - && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ - && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ - && apt-get update \ - && apt-get -y install xvfb gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 \ - libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 \ - libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 \ - libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 \ - libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget \ - && rm -rf /var/lib/apt/lists/* - -# install default dependencies -PUPPETEER_DOWNLOAD_HOST=https://npm.taobao.org/mirrors -npm config set puppeteer_download_host=https://npm.taobao.org/mirrors -npm install puppeteer-chromium-resolver crawlab-sdk -g --unsafe-perm=true --registry=https://registry.npm.taobao.org - -# unlock -rm /tmp/install-nodejs.lock - -# unlock global -rm /tmp/install.lock diff --git a/backend/scripts/install-nvm.sh b/backend/scripts/install-nvm.sh deleted file mode 100755 index d02f884c..00000000 --- a/backend/scripts/install-nvm.sh +++ /dev/null @@ -1,428 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -{ # this ensures the entire script is downloaded # - -nvm_has() { - type "$1" > /dev/null 2>&1 -} - -nvm_default_install_dir() { - [ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm" -} - -nvm_install_dir() { - if [ -n "$NVM_DIR" ]; then - printf %s "${NVM_DIR}" - else - nvm_default_install_dir - fi -} - -nvm_latest_version() { - echo "v0.35.2" -} - -nvm_profile_is_bash_or_zsh() { - local TEST_PROFILE - TEST_PROFILE="${1-}" - case "${TEST_PROFILE-}" in - *"/.bashrc" | *"/.bash_profile" | *"/.zshrc") - return - ;; - *) - return 1 - ;; - esac -} - -# -# Outputs the location to NVM depending on: -# * The availability of $NVM_SOURCE -# * The method used ("script" or "git" in the script, defaults to "git") -# NVM_SOURCE always takes precedence unless the method is "script-nvm-exec" -# -nvm_source() { - local NVM_METHOD - NVM_METHOD="$1" - local NVM_SOURCE_URL - NVM_SOURCE_URL="$NVM_SOURCE" - if [ "_$NVM_METHOD" = "_script-nvm-exec" ]; then - NVM_SOURCE_URL="https://raw.githubusercontent.com/nvm-sh/nvm/$(nvm_latest_version)/nvm-exec" - elif [ "_$NVM_METHOD" = "_script-nvm-bash-completion" ]; then - NVM_SOURCE_URL="https://raw.githubusercontent.com/nvm-sh/nvm/$(nvm_latest_version)/bash_completion" - elif [ -z "$NVM_SOURCE_URL" ]; then - if [ "_$NVM_METHOD" = "_script" ]; then - NVM_SOURCE_URL="https://raw.githubusercontent.com/nvm-sh/nvm/$(nvm_latest_version)/nvm.sh" - elif [ "_$NVM_METHOD" = "_git" ] || [ -z "$NVM_METHOD" ]; then - NVM_SOURCE_URL="https://github.com/nvm-sh/nvm.git" - else - echo >&2 "Unexpected value \"$NVM_METHOD\" for \$NVM_METHOD" - return 1 - fi - fi - echo "$NVM_SOURCE_URL" -} - -# -# Node.js version to install -# -nvm_node_version() { - echo "$NODE_VERSION" -} - -nvm_download() { - if nvm_has "curl"; then - curl --compressed -q "$@" - elif nvm_has "wget"; then - # Emulate curl with wget - ARGS=$(echo "$*" | command sed -e 's/--progress-bar /--progress=bar /' \ - -e 's/-L //' \ - -e 's/--compressed //' \ - -e 's/-I /--server-response /' \ - -e 's/-s /-q /' \ - -e 's/-o /-O /' \ - -e 's/-C - /-c /') - # shellcheck disable=SC2086 - eval wget $ARGS - fi -} - -install_nvm_from_git() { - local INSTALL_DIR - INSTALL_DIR="$(nvm_install_dir)" - - if [ -d "$INSTALL_DIR/.git" ]; then - echo "=> nvm is already installed in $INSTALL_DIR, trying to update using git" - command printf '\r=> ' - command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" fetch origin tag "$(nvm_latest_version)" --depth=1 2> /dev/null || { - echo >&2 "Failed to update nvm, run 'git fetch' in $INSTALL_DIR yourself." - exit 1 - } - else - # Cloning to $INSTALL_DIR - echo "=> Downloading nvm from git to '$INSTALL_DIR'" - command printf '\r=> ' - mkdir -p "${INSTALL_DIR}" - if [ "$(ls -A "${INSTALL_DIR}")" ]; then - command git init "${INSTALL_DIR}" || { - echo >&2 'Failed to initialize nvm repo. Please report this!' - exit 2 - } - command git --git-dir="${INSTALL_DIR}/.git" remote add origin "$(nvm_source)" 2> /dev/null \ - || command git --git-dir="${INSTALL_DIR}/.git" remote set-url origin "$(nvm_source)" || { - echo >&2 'Failed to add remote "origin" (or set the URL). Please report this!' - exit 2 - } - command git --git-dir="${INSTALL_DIR}/.git" fetch origin tag "$(nvm_latest_version)" --depth=1 || { - echo >&2 'Failed to fetch origin with tags. Please report this!' - exit 2 - } - else - command git -c advice.detachedHead=false clone "$(nvm_source)" -b "$(nvm_latest_version)" --depth=1 "${INSTALL_DIR}" || { - echo >&2 'Failed to clone nvm repo. Please report this!' - exit 2 - } - fi - fi - command git -c advice.detachedHead=false --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" checkout -f --quiet "$(nvm_latest_version)" - if [ -n "$(command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" show-ref refs/heads/master)" ]; then - if command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" branch --quiet 2>/dev/null; then - command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" branch --quiet -D master >/dev/null 2>&1 - else - echo >&2 "Your version of git is out of date. Please update it!" - command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" branch -D master >/dev/null 2>&1 - fi - fi - - echo "=> Compressing and cleaning up git repository" - if ! command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" reflog expire --expire=now --all; then - echo >&2 "Your version of git is out of date. Please update it!" - fi - if ! command git --git-dir="$INSTALL_DIR"/.git --work-tree="$INSTALL_DIR" gc --auto --aggressive --prune=now ; then - echo >&2 "Your version of git is out of date. Please update it!" - fi - return -} - -# -# Automatically install Node.js -# -nvm_install_node() { - local NODE_VERSION_LOCAL - NODE_VERSION_LOCAL="$(nvm_node_version)" - - if [ -z "$NODE_VERSION_LOCAL" ]; then - return 0 - fi - - echo "=> Installing Node.js version $NODE_VERSION_LOCAL" - nvm install "$NODE_VERSION_LOCAL" - local CURRENT_NVM_NODE - - CURRENT_NVM_NODE="$(nvm_version current)" - if [ "$(nvm_version "$NODE_VERSION_LOCAL")" == "$CURRENT_NVM_NODE" ]; then - echo "=> Node.js version $NODE_VERSION_LOCAL has been successfully installed" - else - echo >&2 "Failed to install Node.js $NODE_VERSION_LOCAL" - fi -} - -install_nvm_as_script() { - local INSTALL_DIR - INSTALL_DIR="$(nvm_install_dir)" - local NVM_SOURCE_LOCAL - NVM_SOURCE_LOCAL="$(nvm_source script)" - local NVM_EXEC_SOURCE - NVM_EXEC_SOURCE="$(nvm_source script-nvm-exec)" - local NVM_BASH_COMPLETION_SOURCE - NVM_BASH_COMPLETION_SOURCE="$(nvm_source script-nvm-bash-completion)" - - # Downloading to $INSTALL_DIR - mkdir -p "$INSTALL_DIR" - if [ -f "$INSTALL_DIR/nvm.sh" ]; then - echo "=> nvm is already installed in $INSTALL_DIR, trying to update the script" - else - echo "=> Downloading nvm as script to '$INSTALL_DIR'" - fi - nvm_download -s "$NVM_SOURCE_LOCAL" -o "$INSTALL_DIR/nvm.sh" || { - echo >&2 "Failed to download '$NVM_SOURCE_LOCAL'" - return 1 - } & - nvm_download -s "$NVM_EXEC_SOURCE" -o "$INSTALL_DIR/nvm-exec" || { - echo >&2 "Failed to download '$NVM_EXEC_SOURCE'" - return 2 - } & - nvm_download -s "$NVM_BASH_COMPLETION_SOURCE" -o "$INSTALL_DIR/bash_completion" || { - echo >&2 "Failed to download '$NVM_BASH_COMPLETION_SOURCE'" - return 2 - } & - for job in $(jobs -p | command sort) - do - wait "$job" || return $? - done - chmod a+x "$INSTALL_DIR/nvm-exec" || { - echo >&2 "Failed to mark '$INSTALL_DIR/nvm-exec' as executable" - return 3 - } -} - -nvm_try_profile() { - if [ -z "${1-}" ] || [ ! -f "${1}" ]; then - return 1 - fi - echo "${1}" -} - -# -# Detect profile file if not specified as environment variable -# (eg: PROFILE=~/.myprofile) -# The echo'ed path is guaranteed to be an existing file -# Otherwise, an empty string is returned -# -nvm_detect_profile() { - if [ "${PROFILE-}" = '/dev/null' ]; then - # the user has specifically requested NOT to have nvm touch their profile - return - fi - - if [ -n "${PROFILE}" ] && [ -f "${PROFILE}" ]; then - echo "${PROFILE}" - return - fi - - local DETECTED_PROFILE - DETECTED_PROFILE='' - - if [ -n "${BASH_VERSION-}" ]; then - if [ -f "$HOME/.bashrc" ]; then - DETECTED_PROFILE="$HOME/.bashrc" - elif [ -f "$HOME/.bash_profile" ]; then - DETECTED_PROFILE="$HOME/.bash_profile" - fi - elif [ -n "${ZSH_VERSION-}" ]; then - DETECTED_PROFILE="$HOME/.zshrc" - fi - - if [ -z "$DETECTED_PROFILE" ]; then - for EACH_PROFILE in ".profile" ".bashrc" ".bash_profile" ".zshrc" - do - if DETECTED_PROFILE="$(nvm_try_profile "${HOME}/${EACH_PROFILE}")"; then - break - fi - done - fi - - if [ -n "$DETECTED_PROFILE" ]; then - echo "$DETECTED_PROFILE" - fi -} - -# -# Check whether the user has any globally-installed npm modules in their system -# Node, and warn them if so. -# -nvm_check_global_modules() { - command -v npm >/dev/null 2>&1 || return 0 - - local NPM_VERSION - NPM_VERSION="$(npm --version)" - NPM_VERSION="${NPM_VERSION:--1}" - [ "${NPM_VERSION%%[!-0-9]*}" -gt 0 ] || return 0 - - local NPM_GLOBAL_MODULES - NPM_GLOBAL_MODULES="$( - npm list -g --depth=0 | - command sed -e '/ npm@/d' -e '/ (empty)$/d' - )" - - local MODULE_COUNT - MODULE_COUNT="$( - command printf %s\\n "$NPM_GLOBAL_MODULES" | - command sed -ne '1!p' | # Remove the first line - wc -l | command tr -d ' ' # Count entries - )" - - if [ "${MODULE_COUNT}" != '0' ]; then - # shellcheck disable=SC2016 - echo '=> You currently have modules installed globally with `npm`. These will no' - # shellcheck disable=SC2016 - echo '=> longer be linked to the active version of Node when you install a new node' - # shellcheck disable=SC2016 - echo '=> with `nvm`; and they may (depending on how you construct your `$PATH`)' - # shellcheck disable=SC2016 - echo '=> override the binaries of modules installed with `nvm`:' - echo - - command printf %s\\n "$NPM_GLOBAL_MODULES" - echo '=> If you wish to uninstall them at a later point (or re-install them under your' - # shellcheck disable=SC2016 - echo '=> `nvm` Nodes), you can remove them from the system Node as follows:' - echo - echo ' $ nvm use system' - echo ' $ npm uninstall -g a_module' - echo - fi -} - -nvm_do_install() { - if [ -n "${NVM_DIR-}" ] && ! [ -d "${NVM_DIR}" ]; then - if [ -e "${NVM_DIR}" ]; then - echo >&2 "File \"${NVM_DIR}\" has the same name as installation directory." - exit 1 - fi - - if [ "${NVM_DIR}" = "$(nvm_default_install_dir)" ]; then - mkdir "${NVM_DIR}" - else - echo >&2 "You have \$NVM_DIR set to \"${NVM_DIR}\", but that directory does not exist. Check your profile files and environment." - exit 1 - fi - fi - if [ -z "${METHOD}" ]; then - # Autodetect install method - if nvm_has git; then - install_nvm_from_git - elif nvm_has nvm_download; then - install_nvm_as_script - else - echo >&2 'You need git, curl, or wget to install nvm' - exit 1 - fi - elif [ "${METHOD}" = 'git' ]; then - if ! nvm_has git; then - echo >&2 "You need git to install nvm" - exit 1 - fi - install_nvm_from_git - elif [ "${METHOD}" = 'script' ]; then - if ! nvm_has nvm_download; then - echo >&2 "You need curl or wget to install nvm" - exit 1 - fi - install_nvm_as_script - else - echo >&2 "The environment variable \$METHOD is set to \"${METHOD}\", which is not recognized as a valid installation method." - exit 1 - fi - - echo - - local NVM_PROFILE - NVM_PROFILE="$(nvm_detect_profile)" - local PROFILE_INSTALL_DIR - PROFILE_INSTALL_DIR="$(nvm_install_dir | command sed "s:^$HOME:\$HOME:")" - - SOURCE_STR="\\nexport NVM_DIR=\"${PROFILE_INSTALL_DIR}\"\\n[ -s \"\$NVM_DIR/nvm.sh\" ] && \\. \"\$NVM_DIR/nvm.sh\" # This loads nvm\\n" - - # shellcheck disable=SC2016 - COMPLETION_STR='[ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion" # This loads nvm bash_completion\n' - BASH_OR_ZSH=false - - if [ -z "${NVM_PROFILE-}" ] ; then - local TRIED_PROFILE - if [ -n "${PROFILE}" ]; then - TRIED_PROFILE="${NVM_PROFILE} (as defined in \$PROFILE), " - fi - echo "=> Profile not found. Tried ${TRIED_PROFILE-}~/.bashrc, ~/.bash_profile, ~/.zshrc, and ~/.profile." - echo "=> Create one of them and run this script again" - echo " OR" - echo "=> Append the following lines to the correct file yourself:" - command printf "${SOURCE_STR}" - echo - else - if nvm_profile_is_bash_or_zsh "${NVM_PROFILE-}"; then - BASH_OR_ZSH=true - fi - if ! command grep -qc '/nvm.sh' "$NVM_PROFILE"; then - echo "=> Appending nvm source string to $NVM_PROFILE" - command printf "${SOURCE_STR}" >> "$NVM_PROFILE" - else - echo "=> nvm source string already in ${NVM_PROFILE}" - fi - # shellcheck disable=SC2016 - if ${BASH_OR_ZSH} && ! command grep -qc '$NVM_DIR/bash_completion' "$NVM_PROFILE"; then - echo "=> Appending bash_completion source string to $NVM_PROFILE" - command printf "$COMPLETION_STR" >> "$NVM_PROFILE" - else - echo "=> bash_completion source string already in ${NVM_PROFILE}" - fi - fi - if ${BASH_OR_ZSH} && [ -z "${NVM_PROFILE-}" ] ; then - echo "=> Please also append the following lines to the if you are using bash/zsh shell:" - command printf "${COMPLETION_STR}" - fi - - # Source nvm - # shellcheck source=/dev/null - \. "$(nvm_install_dir)/nvm.sh" - - nvm_check_global_modules - - nvm_install_node - - nvm_reset - - echo "=> Close and reopen your terminal to start using nvm or run the following to use it now:" - command printf "${SOURCE_STR}" - if ${BASH_OR_ZSH} ; then - command printf "${COMPLETION_STR}" - fi -} - -# -# Unsets the various functions defined -# during the execution of the install script -# -nvm_reset() { - unset -f nvm_has nvm_install_dir nvm_latest_version nvm_profile_is_bash_or_zsh \ - nvm_source nvm_node_version nvm_download install_nvm_from_git nvm_install_node \ - install_nvm_as_script nvm_try_profile nvm_detect_profile nvm_check_global_modules \ - nvm_do_install nvm_reset nvm_default_install_dir -} - -[ "_$NVM_ENV" = "_testing" ] || nvm_do_install - -} # this ensures the entire script is downloaded # \ No newline at end of file diff --git a/backend/scripts/install-php.sh b/backend/scripts/install-php.sh deleted file mode 100755 index d3d392f4..00000000 --- a/backend/scripts/install-php.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# lock global -touch /tmp/install.lock - -# lock -touch /tmp/install-php.lock - -apt-get install -y php - -# unlock global -rm /tmp/install.lock - -# unlock -rm /tmp/install-php.lock diff --git a/backend/scripts/install-ruby.sh b/backend/scripts/install-ruby.sh deleted file mode 100755 index b6bc6fed..00000000 --- a/backend/scripts/install-ruby.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -apt-get update -apt-get install -y curl - -curl -sSL https://get.rvm.io | bash -s stable -source /etc/profile.d/rvm.sh - -echo `rvm list known` -rvm install 2.6.1 - -echo `ruby -v` diff --git a/backend/scripts/install.sh b/backend/scripts/install.sh deleted file mode 100644 index 68c5b3ac..00000000 --- a/backend/scripts/install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# fail immediately if error -set -e - -# install node.js -if [ "${CRAWLAB_SERVER_LANG_NODE}" = "Y" ]; -then - echo "installing node.js" - /bin/sh /app/backend/scripts/install-nodejs.sh - echo "installed node.js" -fi - -# install java -if [ "${CRAWLAB_SERVER_LANG_JAVA}" = "Y" ]; -then - echo "installing java" - /bin/sh /app/backend/scripts/install-java.sh - echo "installed java" -fi - -# install dotnet -if [ "${CRAWLAB_SERVER_LANG_DOTNET}" = "Y" ]; -then - echo "installing dotnet" - /bin/sh /app/backend/scripts/install-dotnet.sh - echo "installed dotnet" -fi - -# install php -if [ "${CRAWLAB_SERVER_LANG_PHP}" = "Y" ]; -then - echo "installing php" - /bin/sh /app/backend/scripts/install-php.sh - echo "installed php" -fi - -# install go -if [ "${CRAWLAB_SERVER_LANG_GO}" = "Y" ]; -then - echo "installing go" - /bin/sh /app/backend/scripts/install-go.sh - echo "installed go" -fi diff --git a/backend/services/auth.go b/backend/services/auth.go deleted file mode 100644 index 096d9f14..00000000 --- a/backend/services/auth.go +++ /dev/null @@ -1,20 +0,0 @@ -package services - -import ( - "crawlab/constants" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" -) - -func GetAuthQuery(query bson.M, c *gin.Context) bson.M { - user := GetCurrentUser(c) - if user.Role == constants.RoleAdmin { - // 获得所有数据 - return query - } else { - // 只获取自己的数据 - query["user_id"] = user.Id - return query - } -} - diff --git a/backend/services/challenge/base.go b/backend/services/challenge/base.go deleted file mode 100644 index a7758708..00000000 --- a/backend/services/challenge/base.go +++ /dev/null @@ -1,138 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "encoding/json" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "io/ioutil" - "path" - "runtime/debug" -) - -type Service interface { - Check() (bool, error) -} - -func GetService(name string, uid bson.ObjectId) Service { - switch name { - case constants.ChallengeLogin7d: - return &Login7dService{UserId: uid} - case constants.ChallengeLogin30d: - return &Login30dService{UserId: uid} - case constants.ChallengeLogin90d: - return &Login90dService{UserId: uid} - case constants.ChallengeLogin180d: - return &Login180dService{UserId: uid} - case constants.ChallengeCreateCustomizedSpider: - return &CreateCustomizedSpiderService{UserId: uid} - case constants.ChallengeCreateConfigurableSpider: - return &CreateConfigurableSpiderService{UserId: uid} - case constants.ChallengeCreateSchedule: - return &CreateScheduleService{UserId: uid} - case constants.ChallengeCreateNodes: - return &CreateNodesService{UserId: uid} - case constants.ChallengeRunRandom: - return &RunRandomService{UserId: uid} - case constants.ChallengeScrape1k: - return &Scrape1kService{UserId: uid} - case constants.ChallengeScrape10k: - return &Scrape10kService{UserId: uid} - case constants.ChallengeScrape100k: - return &Scrape100kService{UserId: uid} - case constants.ChallengeInstallDep: - return &InstallDepService{UserId: uid} - case constants.ChallengeInstallLang: - return &InstallLangService{UserId: uid} - case constants.ChallengeViewDisclaimer: - return &ViewDisclaimerService{UserId: uid} - case constants.ChallengeCreateUser: - return &CreateUserService{UserId: uid} - } - return nil -} - -func AddChallengeAchievement(name string, uid bson.ObjectId) error { - ch, err := model.GetChallengeByName(name) - if err != nil { - return err - } - ca := model.ChallengeAchievement{ - ChallengeId: ch.Id, - UserId: uid, - } - if err := ca.Add(); err != nil { - return err - } - return nil -} - -func CheckChallengeAndUpdate(ch model.Challenge, uid bson.ObjectId) error { - svc := GetService(ch.Name, uid) - achieved, err := svc.Check() - if err != nil { - return err - } - if achieved && !ch.Achieved { - if err := AddChallengeAchievement(ch.Name, uid); err != nil { - return err - } - } - return nil -} - -func CheckChallengeAndUpdateAll(uid bson.ObjectId) error { - challenges, err := model.GetChallengeListWithAchieved(nil, 0, constants.Infinite, "-_id", uid) - if err != nil { - return err - } - for _, ch := range challenges { - if err := CheckChallengeAndUpdate(ch, uid); err != nil { - continue - } - } - return nil -} - -func InitChallengeService() error { - // 读取文件 - contentBytes, err := ioutil.ReadFile(path.Join("data", "challenge_data.json")) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 反序列化 - var challenges []model.Challenge - if err := json.Unmarshal(contentBytes, &challenges); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - for _, ch := range challenges { - chDb, err := model.GetChallengeByName(ch.Name) - if err != nil { - continue - } - if chDb.Name == "" { - if err := ch.Add(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } else { - ch.Id = chDb.Id - ch.CreateTs = chDb.CreateTs - if err := ch.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - } - - return nil -} diff --git a/backend/services/challenge/create_configurable_spider.go b/backend/services/challenge/create_configurable_spider.go deleted file mode 100644 index 45e969f7..00000000 --- a/backend/services/challenge/create_configurable_spider.go +++ /dev/null @@ -1,23 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type CreateConfigurableSpiderService struct { - UserId bson.ObjectId -} - -func (s *CreateConfigurableSpiderService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "type": constants.Configurable, - } - _, count, err := model.GetSpiderList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return count > 0, nil -} diff --git a/backend/services/challenge/create_customized_spider.go b/backend/services/challenge/create_customized_spider.go deleted file mode 100644 index 6c61318f..00000000 --- a/backend/services/challenge/create_customized_spider.go +++ /dev/null @@ -1,23 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type CreateCustomizedSpiderService struct { - UserId bson.ObjectId -} - -func (s *CreateCustomizedSpiderService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "type": constants.Customized, - } - _, count, err := model.GetSpiderList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return count > 0, nil -} diff --git a/backend/services/challenge/create_nodes.go b/backend/services/challenge/create_nodes.go deleted file mode 100644 index 42ec25f7..00000000 --- a/backend/services/challenge/create_nodes.go +++ /dev/null @@ -1,22 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type CreateNodesService struct { - UserId bson.ObjectId -} - -func (s *CreateNodesService) Check() (bool, error) { - query := bson.M{ - "status": constants.StatusOnline, - } - list, err := model.GetScheduleList(query) - if err != nil { - return false, err - } - return len(list) >= 3, nil -} diff --git a/backend/services/challenge/create_schedule.go b/backend/services/challenge/create_schedule.go deleted file mode 100644 index 3e0ce0e1..00000000 --- a/backend/services/challenge/create_schedule.go +++ /dev/null @@ -1,21 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type CreateScheduleService struct { - UserId bson.ObjectId -} - -func (s *CreateScheduleService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - } - list, err := model.GetScheduleList(query) - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/create_user.go b/backend/services/challenge/create_user.go deleted file mode 100644 index e0272801..00000000 --- a/backend/services/challenge/create_user.go +++ /dev/null @@ -1,21 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type CreateUserService struct { - UserId bson.ObjectId -} - -func (s *CreateUserService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - } - list, err := model.GetUserList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/install_dep.go b/backend/services/challenge/install_dep.go deleted file mode 100644 index 4730249e..00000000 --- a/backend/services/challenge/install_dep.go +++ /dev/null @@ -1,23 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type InstallDepService struct { - UserId bson.ObjectId -} - -func (s *InstallDepService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "type": constants.ActionTypeInstallDep, - } - list, err := model.GetActionList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/install_lang.go b/backend/services/challenge/install_lang.go deleted file mode 100644 index 15732a2f..00000000 --- a/backend/services/challenge/install_lang.go +++ /dev/null @@ -1,23 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type InstallLangService struct { - UserId bson.ObjectId -} - -func (s *InstallLangService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "type": constants.ActionTypeInstallLang, - } - list, err := model.GetActionList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/login_180d.go b/backend/services/challenge/login_180d.go deleted file mode 100644 index 96cc9e26..00000000 --- a/backend/services/challenge/login_180d.go +++ /dev/null @@ -1,18 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Login180dService struct { - UserId bson.ObjectId -} - -func (s *Login180dService) Check() (bool, error) { - days, err := model.GetVisitDays(s.UserId) - if err != nil { - return false, err - } - return days >= 180, nil -} diff --git a/backend/services/challenge/login_30d.go b/backend/services/challenge/login_30d.go deleted file mode 100644 index 5234d5fe..00000000 --- a/backend/services/challenge/login_30d.go +++ /dev/null @@ -1,18 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Login30dService struct { - UserId bson.ObjectId -} - -func (s *Login30dService) Check() (bool, error) { - days, err := model.GetVisitDays(s.UserId) - if err != nil { - return false, err - } - return days >= 30, nil -} diff --git a/backend/services/challenge/login_7d.go b/backend/services/challenge/login_7d.go deleted file mode 100644 index 91540423..00000000 --- a/backend/services/challenge/login_7d.go +++ /dev/null @@ -1,18 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Login7dService struct { - UserId bson.ObjectId -} - -func (s *Login7dService) Check() (bool, error) { - days, err := model.GetVisitDays(s.UserId) - if err != nil { - return false, err - } - return days >= 7, nil -} diff --git a/backend/services/challenge/login_90d.go b/backend/services/challenge/login_90d.go deleted file mode 100644 index a8526b87..00000000 --- a/backend/services/challenge/login_90d.go +++ /dev/null @@ -1,18 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Login90dService struct { - UserId bson.ObjectId -} - -func (s *Login90dService) Check() (bool, error) { - days, err := model.GetVisitDays(s.UserId) - if err != nil { - return false, err - } - return days >= 90, nil -} diff --git a/backend/services/challenge/run_random.go b/backend/services/challenge/run_random.go deleted file mode 100644 index 30c63f0c..00000000 --- a/backend/services/challenge/run_random.go +++ /dev/null @@ -1,25 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type RunRandomService struct { - UserId bson.ObjectId -} - -func (s *RunRandomService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "run_type": constants.RunTypeRandom, - "status": constants.StatusFinished, - "schedule_id": bson.ObjectIdHex(constants.ObjectIdNull), - } - list, err := model.GetTaskList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/scrape_100k.go b/backend/services/challenge/scrape_100k.go deleted file mode 100644 index 68a90eda..00000000 --- a/backend/services/challenge/scrape_100k.go +++ /dev/null @@ -1,24 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Scrape100kService struct { - UserId bson.ObjectId -} - -func (s *Scrape100kService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "result_count": bson.M{ - "$gte": 100000, - }, - } - list, err := model.GetTaskList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/scrape_10k.go b/backend/services/challenge/scrape_10k.go deleted file mode 100644 index ae70b450..00000000 --- a/backend/services/challenge/scrape_10k.go +++ /dev/null @@ -1,24 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Scrape10kService struct { - UserId bson.ObjectId -} - -func (s *Scrape10kService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "result_count": bson.M{ - "$gte": 10000, - }, - } - list, err := model.GetTaskList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/scrape_1k.go b/backend/services/challenge/scrape_1k.go deleted file mode 100644 index cad2469f..00000000 --- a/backend/services/challenge/scrape_1k.go +++ /dev/null @@ -1,24 +0,0 @@ -package challenge - -import ( - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type Scrape1kService struct { - UserId bson.ObjectId -} - -func (s *Scrape1kService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "result_count": bson.M{ - "$gte": 1000, - }, - } - list, err := model.GetTaskList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/challenge/view_disclaimer.go b/backend/services/challenge/view_disclaimer.go deleted file mode 100644 index fc9fe21c..00000000 --- a/backend/services/challenge/view_disclaimer.go +++ /dev/null @@ -1,23 +0,0 @@ -package challenge - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/globalsign/mgo/bson" -) - -type ViewDisclaimerService struct { - UserId bson.ObjectId -} - -func (s *ViewDisclaimerService) Check() (bool, error) { - query := bson.M{ - "user_id": s.UserId, - "type": constants.ActionTypeViewDisclaimer, - } - list, err := model.GetActionList(query, 0, 1, "-_id") - if err != nil { - return false, err - } - return len(list) > 0, nil -} diff --git a/backend/services/clean.go b/backend/services/clean.go deleted file mode 100644 index bbd3571d..00000000 --- a/backend/services/clean.go +++ /dev/null @@ -1,122 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/model" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" -) - -func InitTaskCleanUserIds() { - adminUser, err := GetAdminUser() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - tasks, err := model.GetTaskList(nil, 0, constants.Infinite, "+_id") - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - for _, t := range tasks { - if !t.ScheduleId.Valid() { - t.ScheduleId = bson.ObjectIdHex(constants.ObjectIdNull) - if err := t.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - - if !t.UserId.Valid() { - t.UserId = adminUser.Id - if err := t.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - } -} - -func InitProjectCleanUserIds() { - adminUser, err := GetAdminUser() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - projects, err := model.GetProjectList(nil, "+_id") - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - for _, p := range projects { - if !p.UserId.Valid() { - p.UserId = adminUser.Id - if err := p.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - } -} - -func InitSpiderCleanUserIds() { - adminUser, err := GetAdminUser() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - spiders, _ := model.GetSpiderAllList(nil) - for _, s := range spiders { - if !s.UserId.Valid() { - s.UserId = adminUser.Id - if err := s.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - } -} - -func InitScheduleCleanUserIds() { - adminUser, err := GetAdminUser() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - schedules, _ := model.GetScheduleList(nil) - for _, s := range schedules { - if !s.UserId.Valid() { - s.UserId = adminUser.Id - if err := s.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - continue - } - } - } -} - -func InitCleanService() error { - if model.IsMaster() { - // 清理任务UserIds - InitTaskCleanUserIds() - // 清理项目UserIds - InitProjectCleanUserIds() - // 清理爬虫UserIds - InitSpiderCleanUserIds() - // 清理定时任务UserIds - InitScheduleCleanUserIds() - } - return nil -} diff --git a/backend/services/config_spider.go b/backend/services/config_spider.go deleted file mode 100644 index c2135bed..00000000 --- a/backend/services/config_spider.go +++ /dev/null @@ -1,278 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/model" - "crawlab/model/config_spider" - "crawlab/services/spider_handler" - "crawlab/utils" - "errors" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - uuid "github.com/satori/go.uuid" - "github.com/spf13/viper" - "gopkg.in/yaml.v2" - "os" - "path/filepath" - "runtime/debug" - "strings" -) - -func GenerateConfigSpiderFiles(spider model.Spider, configData entity.ConfigSpiderData) error { - // 校验Spiderfile正确性 - if err := ValidateSpiderfile(configData); err != nil { - return err - } - - // 构造代码生成器 - generator := config_spider.ScrapyGenerator{ - Spider: spider, - ConfigData: configData, - } - - // 生成代码 - if err := generator.Generate(); err != nil { - return err - } - - return nil -} - -// 验证Spiderfile -func ValidateSpiderfile(configData entity.ConfigSpiderData) error { - // 获取所有字段 - fields := config_spider.GetAllFields(configData) - - // 校验是否存在 start_url - if configData.StartUrl == "" { - return errors.New("spiderfile invalid: start_url is empty") - } - - // 校验是否存在 start_stage - if configData.StartStage == "" { - return errors.New("spiderfile invalid: start_stage is empty") - } - - // 校验是否存在 stages - if len(configData.Stages) == 0 { - return errors.New("spiderfile invalid: stages is empty") - } - - // 校验stages - dict := map[string]int{} - for _, stage := range configData.Stages { - stageName := stage.Name - - // stage 名称不能为空 - if stageName == "" { - return errors.New("spiderfile invalid: stage name is empty") - } - - // stage 名称不能为保留字符串 - // NOTE: 如果有其他Engine,可以扩展,默认为Scrapy - if configData.Engine == "" || configData.Engine == constants.EngineScrapy { - if strings.Contains(constants.ScrapyProtectedStageNames, stageName) { - return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is protected", stageName)) - } - } else { - return errors.New(fmt.Sprintf("spiderfile invalid: engine '%s' is not implemented", configData.Engine)) - } - - // stage 名称不能重复 - if dict[stageName] == 1 { - return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is duplicated", stageName)) - } - dict[stageName] = 1 - - // stage 字段不能为空 - if len(stage.Fields) == 0 { - return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has no fields", stageName)) - } - - // 是否包含 next_stage - hasNextStage := false - - // 遍历字段列表 - for _, field := range stage.Fields { - // stage 的 next stage 只能有一个 - if field.NextStage != "" { - if hasNextStage { - return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has more than 1 next_stage", stageName)) - } - hasNextStage = true - } - - // 字段里 css 和 xpath 只能包含一个 - if field.Css != "" && field.Xpath != "" { - return errors.New(fmt.Sprintf("spiderfile invalid: field '%s' in stage '%s' has both css and xpath set which is prohibited", field.Name, stageName)) - } - } - - // stage 里 page_css 和 page_xpath 只能包含一个 - if stage.PageCss != "" && stage.PageXpath != "" { - return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both page_css and page_xpath set which is prohibited", stageName)) - } - - // stage 里 list_css 和 list_xpath 只能包含一个 - if stage.ListCss != "" && stage.ListXpath != "" { - return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both list_css and list_xpath set which is prohibited", stageName)) - } - - // 如果 stage 的 is_list 为 true 但 list_css 为空,报错 - if stage.IsList && (stage.ListCss == "" && stage.ListXpath == "") { - return errors.New("spiderfile invalid: stage with is_list = true should have either list_css or list_xpath being set") - } - } - - // 校验字段唯一性 - if !IsUniqueConfigSpiderFields(fields) { - return errors.New("spiderfile invalid: fields not unique") - } - - // 字段名称不能为保留字符串 - for _, field := range fields { - if strings.Contains(constants.ScrapyProtectedFieldNames, field.Name) { - return errors.New(fmt.Sprintf("spiderfile invalid: field name '%s' is protected", field.Name)) - } - } - - return nil -} - -func IsUniqueConfigSpiderFields(fields []entity.Field) bool { - dict := map[string]int{} - for _, field := range fields { - if dict[field.Name] == 1 { - return false - } - dict[field.Name] = 1 - } - return true -} - -func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error { - spiderDir := spider.Src - - // 删除已有的爬虫文件 - for _, fInfo := range utils.ListDir(spiderDir) { - // 不删除Spiderfile - if fInfo.Name() == "Spiderfile" { - continue - } - - // 删除其他文件 - if err := os.RemoveAll(filepath.Join(spiderDir, fInfo.Name())); err != nil { - return err - } - } - - // 拷贝爬虫文件 - tplDir := "./template/scrapy" - for _, fInfo := range utils.ListDir(tplDir) { - // 跳过Spiderfile - if fInfo.Name() == "Spiderfile" { - continue - } - - srcPath := filepath.Join(tplDir, fInfo.Name()) - if fInfo.IsDir() { - dirPath := filepath.Join(spiderDir, fInfo.Name()) - if err := utils.CopyDir(srcPath, dirPath); err != nil { - return err - } - } else { - if err := utils.CopyFile(srcPath, filepath.Join(spiderDir, fInfo.Name())); err != nil { - return err - } - } - } - - // 更改爬虫文件 - if err := GenerateConfigSpiderFiles(spider, configData); err != nil { - return err - } - - // 打包为 zip 文件 - files, err := utils.GetFilesFromDir(spiderDir) - if err != nil { - return err - } - randomId := uuid.NewV4() - tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip") - spiderZipFileName := spider.Name + ".zip" - if err := utils.Compress(files, tmpFilePath); err != nil { - return err - } - - // 获取 GridFS 实例 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 判断文件是否已经存在 - var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { - // 已经存在文件,则删除 - if err := gf.RemoveId(gfFile.Id); err != nil { - log.Errorf("remove grid fs error: %s", err.Error()) - debug.PrintStack() - return err - } - } - - // 上传到GridFs - fid, err := RetryUploadToGridFs(spiderZipFileName, tmpFilePath) - if err != nil { - log.Errorf("upload to grid fs error: %s", err.Error()) - return err - } - - // 保存爬虫 FileId - spider.FileId = fid - _ = spider.Save() - - // 获取爬虫同步实例 - spiderSync := spider_handler.SpiderSync{ - Spider: spider, - } - - // 获取gfFile - gfFile2 := model.GetGridFs(spider.FileId) - - // 生成MD5 - spiderSync.CreateMd5File(gfFile2.Md5) - - return nil -} - -func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error { - // Spiderfile 路径 - sfPath := filepath.Join(spider.Src, "Spiderfile") - - // 生成Yaml内容 - sfContentByte, err := yaml.Marshal(configData) - if err != nil { - return err - } - - // 打开文件 - var f *os.File - if utils.Exists(sfPath) { - f, err = os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777) - } else { - f, err = os.OpenFile(sfPath, os.O_CREATE, 0777) - } - if err != nil { - return err - } - defer f.Close() - - // 写入内容 - if _, err := f.Write(sfContentByte); err != nil { - return err - } - - return nil -} diff --git a/backend/services/context/context.go b/backend/services/context/context.go deleted file mode 100644 index 760be3d7..00000000 --- a/backend/services/context/context.go +++ /dev/null @@ -1,99 +0,0 @@ -package context - -import ( - "crawlab/constants" - "crawlab/errors" - "crawlab/model" - "fmt" - "github.com/apex/log" - "github.com/gin-gonic/gin" - "github.com/go-playground/validator/v10" - errors2 "github.com/pkg/errors" - "net/http" - "runtime/debug" -) - -type Context struct { - *gin.Context -} - -func (c *Context) User() *model.User { - userIfe, exists := c.Get(constants.ContextUser) - if !exists { - return nil - } - user, ok := userIfe.(*model.User) - if !ok { - return nil - } - return user -} -func (c *Context) Success(data interface{}, metas ...interface{}) { - var meta interface{} - if len(metas) == 0 { - meta = gin.H{} - } else { - meta = metas[0] - } - if data == nil { - data = gin.H{} - } - c.JSON(http.StatusOK, gin.H{ - "status": "ok", - "message": "success", - "data": data, - "meta": meta, - "error": "", - }) -} -func (c *Context) Failed(err error, variables ...interface{}) { - c.failed(err, http.StatusOK, variables...) -} -func (c *Context) failed(err error, httpCode int, variables ...interface{}) { - errStr := err.Error() - if len(variables) > 0 { - errStr = fmt.Sprintf(errStr, variables...) - } - log.Errorf("handle error:" + errStr) - debug.PrintStack() - causeError := errors2.Cause(err) - switch causeError.(type) { - case errors.OPError: - opError := causeError.(errors.OPError) - - c.AbortWithStatusJSON(opError.HttpCode, gin.H{ - "status": "ok", - "message": "error", - "error": errStr, - }) - - case validator.ValidationErrors: - validatorErrors := causeError.(validator.ValidationErrors) - //firstError := validatorErrors[0].(validator.FieldError) - c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{ - "status": "ok", - "message": "error", - "error": validatorErrors.Error(), - }) - default: - fmt.Println("deprecated....") - c.AbortWithStatusJSON(httpCode, gin.H{ - "status": "ok", - "message": "error", - "error": errStr, - }) - } -} -func (c *Context) FailedWithError(err error, httpCode ...int) { - - var code = 200 - if len(httpCode) > 0 { - code = httpCode[0] - } - c.failed(err, code) - -} - -func WithGinContext(context *gin.Context) *Context { - return &Context{Context: context} -} diff --git a/backend/services/doc.go b/backend/services/doc.go deleted file mode 100644 index 572e5cb4..00000000 --- a/backend/services/doc.go +++ /dev/null @@ -1,27 +0,0 @@ -package services - -import ( - "github.com/apex/log" - "github.com/imroc/req" - "runtime/debug" -) - -func GetDocs() (data string, err error) { - // 获取远端数据 - res, err := req.Get("https://docs.crawlab.cn/search_plus_index.json") - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return data, err - } - - // 反序列化 - data, err = res.ToString() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return data, err - } - - return data, nil -} diff --git a/backend/services/file.go b/backend/services/file.go deleted file mode 100644 index d126fcab..00000000 --- a/backend/services/file.go +++ /dev/null @@ -1,65 +0,0 @@ -package services - -import ( - "crawlab/model" - "github.com/apex/log" - "os" - "path" - "runtime/debug" - "strings" -) - -func GetFileNodeTree(dstPath string, level int) (f model.File, err error) { - return getFileNodeTree(dstPath, level, dstPath) -} - -func getFileNodeTree(dstPath string, level int, rootPath string) (f model.File, err error) { - dstF, err := os.Open(dstPath) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return f, err - } - defer dstF.Close() - fileInfo, err := dstF.Stat() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return f, nil - } - if !fileInfo.IsDir() { //如果dstF是文件 - return model.File{ - Label: fileInfo.Name(), - Name: fileInfo.Name(), - Path: strings.Replace(dstPath, rootPath, "", -1), - IsDir: false, - Size: fileInfo.Size(), - Children: nil, - }, nil - } else { //如果dstF是文件夹 - dir, err := dstF.Readdir(0) //获取文件夹下各个文件或文件夹的fileInfo - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return f, nil - } - f = model.File{ - Label: path.Base(dstPath), - Name: path.Base(dstPath), - Path: strings.Replace(dstPath, rootPath, "", -1), - IsDir: true, - Size: 0, - Children: nil, - } - for _, subFileInfo := range dir { - subFileNode, err := getFileNodeTree(path.Join(dstPath, subFileInfo.Name()), level+1, rootPath) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return f, err - } - f.Children = append(f.Children, subFileNode) - } - return f, nil - } -} diff --git a/backend/services/git.go b/backend/services/git.go deleted file mode 100644 index 97d4ba52..00000000 --- a/backend/services/git.go +++ /dev/null @@ -1,603 +0,0 @@ -package services - -import ( - "crawlab/lib/cron" - "crawlab/model" - "crawlab/services/spider_handler" - "crawlab/utils" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "gopkg.in/src-d/go-git.v4" - "gopkg.in/src-d/go-git.v4/config" - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/object" - "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh" - "gopkg.in/src-d/go-git.v4/storage/memory" - "io/ioutil" - "net/url" - "os" - "path" - "regexp" - "runtime/debug" - "strings" - "time" -) - -var GitCron *GitCronScheduler - -type GitCronScheduler struct { - cron *cron.Cron -} - -type GitBranch struct { - Hash string `json:"hash"` - Name string `json:"name"` - Label string `json:"label"` -} - -type GitTag struct { - Hash string `json:"hash"` - Name string `json:"name"` - Label string `json:"label"` -} - -type GitCommit struct { - Hash string `json:"hash"` - TreeHash string `json:"tree_hash"` - Author string `json:"author"` - Email string `json:"email"` - Message string `json:"message"` - IsHead bool `json:"is_head"` - Ts time.Time `json:"ts"` - Branches []GitBranch `json:"branches"` - RemoteBranches []GitBranch `json:"remote_branches"` - Tags []GitTag `json:"tags"` -} - -func (g *GitCronScheduler) Start() error { - c := cron.New(cron.WithSeconds()) - - // 启动cron服务 - g.cron.Start() - - // 更新任务列表 - if err := g.Update(); err != nil { - log.Errorf("update scheduler error: %s", err.Error()) - debug.PrintStack() - return err - } - - // 每30秒更新一次任务列表 - spec := "*/30 * * * * *" - if _, err := c.AddFunc(spec, UpdateGitCron); err != nil { - log.Errorf("add func update schedulers error: %s", err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func (g *GitCronScheduler) RemoveAll() { - entries := g.cron.Entries() - for i := 0; i < len(entries); i++ { - g.cron.Remove(entries[i].ID) - } -} - -func (g *GitCronScheduler) Update() error { - // 删除所有定时任务 - g.RemoveAll() - - // 获取开启 Git 自动同步的爬虫 - spiders, err := model.GetSpiderAllList(bson.M{"git_auto_sync": true}) - if err != nil { - log.Errorf("get spider list error: %s", err.Error()) - debug.PrintStack() - return err - } - - // 遍历任务列表 - for _, s := range spiders { - // 添加到定时任务 - if err := g.AddJob(s); err != nil { - log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), s.Name, s.GitSyncFrequency) - debug.PrintStack() - return err - } - } - - return nil -} - -func (g *GitCronScheduler) AddJob(s model.Spider) error { - spec := s.GitSyncFrequency - - // 添加定时任务 - _, err := g.cron.AddFunc(spec, AddGitCronJob(s)) - if err != nil { - log.Errorf("add func task error: %s", err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -// 保存爬虫Git同步错误 -func SaveSpiderGitSyncError(s model.Spider, errMsg string) { - s, _ = model.GetSpider(s.Id) - s.GitSyncError = errMsg - if err := s.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } -} - -// 获得Git分支 -func GetGitRemoteBranchesPlain(gitUrl string, username string, password string) (branches []string, err error) { - storage := memory.NewStorage() - u, _ := url.Parse(gitUrl) - var listOptions git.ListOptions - if strings.HasPrefix(gitUrl, "http") { - gitUrl = fmt.Sprintf( - "%s://%s:%s@%s%s", - u.Scheme, - username, - password, - u.Hostname(), - u.Path, - ) - } else { - auth, err := ssh.NewPublicKeysFromFile(username, path.Join(os.Getenv("HOME"), ".ssh", "id_rsa"), "") - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return branches, err - } - listOptions = git.ListOptions{ - Auth: auth, - } - } - remote := git.NewRemote(storage, &config.RemoteConfig{ - URLs: []string{ - gitUrl, - }}) - rfs, err := remote.List(&listOptions) - if err != nil { - return - } - for _, rf := range rfs { - if rf.Type() == plumbing.SymbolicReference { - continue - } - regex := regexp.MustCompile("refs/heads/(.*)$") - res := regex.FindStringSubmatch(rf.String()) - if len(res) > 1 { - branches = append(branches, res[1]) - } - } - - return branches, nil -} - -// 重置爬虫Git -func ResetSpiderGit(s model.Spider) (err error) { - // 删除文件夹 - if err := os.RemoveAll(s.Src); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 创建空文件夹 - if err := os.MkdirAll(s.Src, os.ModePerm); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 同步到GridFS - if err := UploadSpiderToGridFsFromMaster(s); err != nil { - return err - } - - return nil -} - -// 同步爬虫Git -func SyncSpiderGit(s model.Spider) (err error) { - // 如果 .git 不存在,初始化一个仓库 - if !utils.Exists(path.Join(s.Src, ".git")) { - _, err := git.PlainInit(s.Src, false) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - } - - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 生成 URL - gitUrl := s.GitUrl - if s.GitUsername != "" && s.GitPassword != "" { - u, err := url.Parse(s.GitUrl) - if err != nil { - SaveSpiderGitSyncError(s, err.Error()) - return err - } - gitUrl = fmt.Sprintf( - "%s://%s:%s@%s%s", - u.Scheme, - s.GitUsername, - s.GitPassword, - u.Hostname(), - u.Path, - ) - } - - // 创建 remote - _ = repo.DeleteRemote("origin") - _, err = repo.CreateRemote(&config.RemoteConfig{ - Name: "origin", - URLs: []string{gitUrl}, - }) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 生成验证信息 - var auth ssh.AuthMethod - if !strings.HasPrefix(s.GitUrl, "http") { - // 为 SSH - regex := regexp.MustCompile("^(?:ssh://?)?([0-9a-zA-Z_]+)@") - res := regex.FindStringSubmatch(s.GitUrl) - username := s.GitUsername - if username == "" { - if len(res) > 1 { - username = res[1] - } else { - username = "git" - } - } - auth, err = ssh.NewPublicKeysFromFile(username, path.Join(os.Getenv("HOME"), ".ssh", "id_rsa"), "") - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - } - - // 获取 repo - _ = repo.Fetch(&git.FetchOptions{ - RemoteName: "origin", - Force: true, - Auth: auth, - Tags: git.AllTags, - }) - - // 获得 WorkTree - wt, err := repo.Worktree() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 拉取 repo - if err := wt.Pull(&git.PullOptions{ - RemoteName: "origin", - Auth: auth, - ReferenceName: plumbing.HEAD, - SingleBranch: false, - }); err != nil { - if err.Error() == "already up-to-date" { - // 检查是否为 Scrapy - sync := spider_handler.SpiderSync{Spider: s} - sync.CheckIsScrapy() - - // 同步到GridFS - if err := UploadSpiderToGridFsFromMaster(s); err != nil { - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 如果没有错误,则保存空字符串 - SaveSpiderGitSyncError(s, "") - - return nil - } - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 切换分支 - if err := wt.Checkout(&git.CheckoutOptions{ - Branch: plumbing.NewBranchReferenceName(s.GitBranch), - }); err != nil { - log.Error(err.Error()) - debug.PrintStack() - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 同步到GridFS - if err := UploadSpiderToGridFsFromMaster(s); err != nil { - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 获取更新后的爬虫 - s, err = model.GetSpider(s.Id) - if err != nil { - SaveSpiderGitSyncError(s, err.Error()) - return err - } - - // 检查是否为 Scrapy - sync := spider_handler.SpiderSync{Spider: s} - sync.CheckIsScrapy() - - // 如果没有错误,则保存空字符串 - SaveSpiderGitSyncError(s, "") - - return nil -} - -// 添加Git定时任务 -func AddGitCronJob(s model.Spider) func() { - return func() { - if err := SyncSpiderGit(s); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - } -} - -// 更新Git定时任务 -func UpdateGitCron() { - if err := GitCron.Update(); err != nil { - log.Errorf(err.Error()) - return - } -} - -// 获取SSH公钥 -func GetGitSshPublicKey() string { - if !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh")) || - !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa")) || - !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa.pub")) { - log.Errorf("no ssh public key") - debug.PrintStack() - return "" - } - content, err := ioutil.ReadFile(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa.pub")) - if err != nil { - return "" - } - return string(content) -} - -// 获取Git分支 -func GetGitBranches(s model.Spider) (branches []GitBranch, err error) { - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return branches, err - } - - iter, err := repo.Branches() - if iter == nil { - return branches, nil - } - if err := iter.ForEach(func(reference *plumbing.Reference) error { - branches = append(branches, GitBranch{ - Hash: reference.Hash().String(), - Name: reference.Name().String(), - Label: reference.Name().Short(), - }) - return nil - }); err != nil { - return branches, err - } - - return branches, nil -} - -// 获取Git Tags -func GetGitTags(s model.Spider) (tags []GitTag, err error) { - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return tags, err - } - - iter, err := repo.Tags() - if iter == nil { - return tags, nil - } - if err := iter.ForEach(func(reference *plumbing.Reference) error { - tags = append(tags, GitTag{ - Hash: reference.Hash().String(), - Name: reference.Name().String(), - Label: reference.Name().Short(), - }) - return nil - }); err != nil { - return tags, err - } - - return tags, nil -} - -// 获取Git Head Hash -func GetGitHeadHash(repo *git.Repository) string { - head, _ := repo.Head() - return head.Hash().String() -} - -// 获取Git远端分支 -func GetGitRemoteBranches(s model.Spider) (branches []GitBranch, err error) { - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return branches, err - } - - iter, err := repo.References() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return branches, err - } - if err := iter.ForEach(func(reference *plumbing.Reference) error { - if reference.Name().IsRemote() { - log.Infof(reference.Hash().String()) - log.Infof(reference.Name().String()) - branches = append(branches, GitBranch{ - Hash: reference.Hash().String(), - Name: reference.Name().String(), - Label: reference.Name().Short(), - }) - } - return nil - }); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return branches, err - } - return branches, err -} - -// 获取Git Commits -func GetGitCommits(s model.Spider) (commits []GitCommit, err error) { - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return commits, err - } - - // 获取分支列表 - branches, err := GetGitBranches(s) - branchesDict := map[string][]GitBranch{} - for _, b := range branches { - branchesDict[b.Hash] = append(branchesDict[b.Hash], b) - } - - // 获取分支列表 - remoteBranches, err := GetGitRemoteBranches(s) - remoteBranchesDict := map[string][]GitBranch{} - for _, b := range remoteBranches { - remoteBranchesDict[b.Hash] = append(remoteBranchesDict[b.Hash], b) - } - - // 获取标签列表 - tags, err := GetGitTags(s) - tagsDict := map[string][]GitTag{} - for _, t := range tags { - tagsDict[t.Hash] = append(tagsDict[t.Hash], t) - } - - // 获取日志遍历器 - iter, err := repo.Log(&git.LogOptions{ - All: true, - }) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return commits, err - } - - // 遍历日志 - if err := iter.ForEach(func(commit *object.Commit) error { - gc := GitCommit{ - Hash: commit.Hash.String(), - TreeHash: commit.TreeHash.String(), - Message: commit.Message, - Author: commit.Author.Name, - Email: commit.Author.Email, - Ts: commit.Author.When, - IsHead: commit.Hash.String() == GetGitHeadHash(repo), - Branches: branchesDict[commit.Hash.String()], - RemoteBranches: remoteBranchesDict[commit.Hash.String()], - Tags: tagsDict[commit.Hash.String()], - } - commits = append(commits, gc) - return nil - }); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return commits, err - } - - return commits, nil -} - -func GitCheckout(s model.Spider, hash string) (err error) { - // 打开 repo - repo, err := git.PlainOpen(s.Src) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - - // 获取worktree - wt, err := repo.Worktree() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - //判断远程origin路径是否和当前的GitUrl是同一个,如果不是删掉原来的路径,重新拉取远程代码 - remote, err := repo.Remote("origin") - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - if remote.String() != s.GitUrl { - utils.RemoveFiles(s.Src) - return SyncSpiderGit(s) - } - - // Checkout - if err := wt.Checkout(&git.CheckoutOptions{ - Hash: plumbing.NewHash(hash), - Create: false, - Force: true, - Keep: false, - }); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return err - } - - return nil -} diff --git a/backend/services/local_node/local_node.go b/backend/services/local_node/local_node.go deleted file mode 100644 index ad1321ca..00000000 --- a/backend/services/local_node/local_node.go +++ /dev/null @@ -1,25 +0,0 @@ -package local_node - -import ( - "crawlab/model" - "github.com/spf13/viper" -) - -func GetLocalNode() *LocalNode { - return localNode -} -func CurrentNode() *model.Node { - return GetLocalNode().Current() -} - -func InitLocalNode() (node *LocalNode, err error) { - registerType := viper.GetString("server.register.type") - ip := viper.GetString("server.register.ip") - customNodeName := viper.GetString("server.register.customNodeName") - - localNode, err = NewLocalNode(ip, customNodeName, registerType) - if err != nil { - return nil, err - } - return localNode, err -} diff --git a/backend/services/local_node/mongo_info.go b/backend/services/local_node/mongo_info.go deleted file mode 100644 index d8d4df06..00000000 --- a/backend/services/local_node/mongo_info.go +++ /dev/null @@ -1,62 +0,0 @@ -package local_node - -import ( - "crawlab/model" - "github.com/apex/log" - "github.com/cenkalti/backoff/v4" - "go.uber.org/atomic" - "sync" - "time" -) - -var locker atomic.Int32 - -type mongo struct { - node *model.Node - sync.RWMutex -} - -func (n *mongo) load(retry bool) (err error) { - n.Lock() - defer n.Unlock() - var node model.Node - if retry { - b := backoff.NewConstantBackOff(1 * time.Second) - err = backoff.Retry(func() error { - node, err = model.GetNodeByKey(GetLocalNode().Identify) - if err != nil { - log.WithError(err).Warnf("Get current node info from database failed. Will after %f seconds, try again.", b.NextBackOff().Seconds()) - } - return err - }, b) - } else { - node, err = model.GetNodeByKey(localNode.Identify) - } - - if err != nil { - return - } - n.node = &node - return nil -} -func (n *mongo) watch() { - timer := time.NewTicker(time.Second * 5) - for range timer.C { - if locker.CAS(0, 1) { - - err := n.load(false) - - if err != nil { - log.WithError(err).Errorf("load current node from database failed") - } - locker.Store(0) - } - continue - } -} - -func (n *mongo) Current() *model.Node { - n.RLock() - defer n.RUnlock() - return n.node -} diff --git a/backend/services/local_node/node_info.go b/backend/services/local_node/node_info.go deleted file mode 100644 index d037da6d..00000000 --- a/backend/services/local_node/node_info.go +++ /dev/null @@ -1,74 +0,0 @@ -package local_node - -import ( - "errors" - "github.com/hashicorp/go-sockaddr" - "os" -) - -var localNode *LocalNode - -type IdentifyType string - -const ( - Ip = IdentifyType("ip") - Mac = IdentifyType("mac") - Hostname = IdentifyType("hostname") -) - -type local struct { - Ip string - Mac string - Hostname string - Identify string - IdentifyType IdentifyType -} -type LocalNode struct { - local - mongo -} - -func (l *LocalNode) Ready() error { - err := localNode.load(true) - if err != nil { - return err - } - go localNode.watch() - return nil -} - -func NewLocalNode(ip string, identify string, identifyTypeString string) (node *LocalNode, err error) { - addrs, err := sockaddr.GetPrivateInterfaces() - if ip == "" { - if err != nil { - return node, err - } - if len(addrs) == 0 { - return node, errors.New("address not found") - } - ipaddr := *sockaddr.ToIPAddr(addrs[0].SockAddr) - ip = ipaddr.NetIP().String() - } - - mac := addrs[0].HardwareAddr.String() - hostname, err := os.Hostname() - if err != nil { - return node, err - } - local := local{Ip: ip, Mac: mac, Hostname: hostname} - switch IdentifyType(identifyTypeString) { - case Ip: - local.Identify = local.Ip - local.IdentifyType = Ip - case Mac: - local.Identify = local.Mac - local.IdentifyType = Mac - case Hostname: - local.Identify = local.Hostname - local.IdentifyType = Hostname - default: - local.Identify = identify - local.IdentifyType = IdentifyType(identifyTypeString) - } - return &LocalNode{local: local, mongo: mongo{}}, nil -} diff --git a/backend/services/log.go b/backend/services/log.go deleted file mode 100644 index 3e95a3d8..00000000 --- a/backend/services/log.go +++ /dev/null @@ -1,188 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/lib/cron" - "crawlab/model" - "crawlab/utils" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "io/ioutil" - "os" - "path/filepath" - "runtime/debug" - "time" -) - -// 任务日志频道映射 -var TaskLogChanMap = utils.NewChanMap() - -// 定时删除日志 -func DeleteLogPeriodically() { - logDir := viper.GetString("log.path") - if !utils.Exists(logDir) { - log.Error("Can Not Set Delete Logs Periodically,No Log Dir") - return - } - rd, err := ioutil.ReadDir(logDir) - if err != nil { - log.Error("Read Log Dir Failed") - return - } - - for _, fi := range rd { - if fi.IsDir() { - log.Info(filepath.Join(logDir, fi.Name())) - _ = os.RemoveAll(filepath.Join(logDir, fi.Name())) - log.Info("Delete Log File Success") - } - } - -} - -// 删除本地日志 -func RemoveLocalLog(path string) error { - if err := model.RemoveFile(path); err != nil { - log.Error("remove local file error: " + err.Error()) - return err - } - return nil -} - -// 删除远程日志 -func RemoveRemoteLog(task model.Task) error { - msg := entity.NodeMessage{ - Type: constants.MsgTypeRemoveLog, - LogPath: task.LogPath, - TaskId: task.Id, - } - // 发布获取日志消息 - channel := "nodes:" + task.NodeId.Hex() - if _, err := database.RedisClient.Publish(channel, utils.GetJson(msg)); err != nil { - log.Errorf("publish redis error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} - -// 删除日志文件 -func RemoveLogByTaskId(id string) error { - t, err := model.GetTask(id) - if err != nil { - log.Error("get task error:" + err.Error()) - return err - } - removeLog(t) - - return nil -} - -func RemoveLogByTaskStatus(status string) error { - tasks, err := model.GetTaskList(bson.M{"status": status}, 0, constants.Infinite, "-create_ts") - if err != nil { - log.Error("get tasks error:" + err.Error()) - return err - } - for _, task := range tasks { - RemoveLogByTaskId(task.Id) - } - return nil -} - -func removeLog(t model.Task) { - if err := RemoveLocalLog(t.LogPath); err != nil { - log.Errorf("remove local log error: %s", err.Error()) - debug.PrintStack() - } - if err := RemoveRemoteLog(t); err != nil { - log.Errorf("remove remote log error: %s", err.Error()) - debug.PrintStack() - } -} - -// 删除日志文件 -func RemoveLogBySpiderId(id bson.ObjectId) error { - tasks, err := model.GetTaskList(bson.M{"spider_id": id}, 0, constants.Infinite, "-create_ts") - if err != nil { - log.Errorf("get tasks error: %s", err.Error()) - debug.PrintStack() - } - for _, task := range tasks { - removeLog(task) - } - return nil -} - -// 初始化定时删除日志 -func InitDeleteLogPeriodically() error { - c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc(viper.GetString("log.deleteFrequency"), DeleteLogPeriodically); err != nil { - return err - } - - c.Start() - return nil -} - -func InitLogIndexes() error { - s, c := database.GetCol("logs") - defer s.Close() - se, ce := database.GetCol("error_logs") - defer se.Close() - - _ = c.EnsureIndex(mgo.Index{ - Key: []string{"task_id", "seq"}, - }) - _ = c.EnsureIndex(mgo.Index{ - Key: []string{"task_id", "msg"}, - }) - _ = c.EnsureIndex(mgo.Index{ - Key: []string{"expire_ts"}, - Sparse: true, - ExpireAfter: 1 * time.Second, - }) - _ = ce.EnsureIndex(mgo.Index{ - Key: []string{"task_id"}, - }) - _ = ce.EnsureIndex(mgo.Index{ - Key: []string{"log_id"}, - }) - _ = ce.EnsureIndex(mgo.Index{ - Key: []string{"expire_ts"}, - Sparse: true, - ExpireAfter: 1 * time.Second, - }) - - return nil -} - -func InitLogService() error { - logLevel := viper.GetString("log.level") - if logLevel != "" { - log.SetLevelFromString(logLevel) - } - log.Info("initialized log config successfully") - if viper.GetString("log.isDeletePeriodically") == "Y" { - if err := InitDeleteLogPeriodically(); err != nil { - log.Error("init DeletePeriodically failed") - return err - } - log.Info("initialized periodically cleaning log successfully") - } else { - log.Info("periodically cleaning log is switched off") - } - - if model.IsMaster() { - if err := InitLogIndexes(); err != nil { - log.Errorf(err.Error()) - return err - } - } - - return nil -} diff --git a/backend/services/log_test.go b/backend/services/log_test.go deleted file mode 100644 index 1e3f76d4..00000000 --- a/backend/services/log_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package services - -import ( - "crawlab/config" - "crawlab/utils" - "fmt" - "github.com/apex/log" - . "github.com/smartystreets/goconvey/convey" - "github.com/spf13/viper" - "os" - "testing" -) - -func TestDeleteLogPeriodically(t *testing.T) { - Convey("Test DeleteLogPeriodically", t, func() { - err := config.InitConfig("../conf/config.yml") - So(err, ShouldBeNil) - log.Info("初始化配置成功") - logDir := viper.GetString("log.path") - log.Info(logDir) - DeleteLogPeriodically() - }) -} - -func TestGetLocalLog(t *testing.T) { - //create a log file for test - logPath := "../logs/crawlab/test.log" - f, err := os.Create(logPath) - defer utils.Close(f) - if err != nil { - fmt.Println(err) - - } else { - _, err = f.WriteString("This is for test") - fmt.Println(err) - } - - //delete the test log file - _ = os.Remove(logPath) - -} diff --git a/backend/services/msg_handler/handler.go b/backend/services/msg_handler/handler.go deleted file mode 100644 index bee4113c..00000000 --- a/backend/services/msg_handler/handler.go +++ /dev/null @@ -1,37 +0,0 @@ -package msg_handler - -import ( - "crawlab/constants" - "crawlab/entity" - "github.com/apex/log" -) - -type Handler interface { - Handle() error -} - -func GetMsgHandler(msg entity.NodeMessage) Handler { - log.Debugf("received msg , type is : %s", msg.Type) - //if msg.Type == constants.MsgTypeGetLog || msg.Type == constants.MsgTypeRemoveLog { - // // 日志相关 - // return &Log{ - // msg: msg, - // } - //} else if msg.Type == constants.MsgTypeCancelTask { - // // 任务相关 - // return &Task{ - // msg: msg, - // } - if msg.Type == constants.MsgTypeGetSystemInfo { - // 系统信息相关 - return &SystemInfo{ - msg: msg, - } - } else if msg.Type == constants.MsgTypeRemoveSpider { - // 爬虫相关 - return &Spider{ - SpiderId: msg.SpiderId, - } - } - return nil -} diff --git a/backend/services/msg_handler/msg_log.go b/backend/services/msg_handler/msg_log.go deleted file mode 100644 index 2a17ed99..00000000 --- a/backend/services/msg_handler/msg_log.go +++ /dev/null @@ -1,54 +0,0 @@ -package msg_handler - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/model" - "crawlab/utils" - "github.com/apex/log" - "runtime/debug" -) - -type Log struct { - msg entity.NodeMessage -} - -func (g *Log) Handle() error { - if g.msg.Type == constants.MsgTypeGetLog { - return g.get() - } else if g.msg.Type == constants.MsgTypeRemoveLog { - return g.remove() - } - return nil -} - -func (g *Log) get() error { - // 发出的消息 - msgSd := entity.NodeMessage{ - Type: constants.MsgTypeGetLog, - TaskId: g.msg.TaskId, - } - // 获取本地日志 - logStr, err := model.GetLocalLog(g.msg.LogPath) - if err != nil { - log.Errorf("get node local log error: %s", err.Error()) - debug.PrintStack() - msgSd.Error = err.Error() - msgSd.Log = err.Error() - } else { - msgSd.Log = utils.BytesToString(logStr) - } - // 发布消息给主节点 - if err := database.Pub(constants.ChannelMasterNode, msgSd); err != nil { - log.Errorf("pub log to master node error: %s", err.Error()) - debug.PrintStack() - return err - } - log.Infof(msgSd.Log) - return nil -} - -func (g *Log) remove() error { - return model.RemoveFile(g.msg.LogPath) -} diff --git a/backend/services/msg_handler/msg_spider.go b/backend/services/msg_handler/msg_spider.go deleted file mode 100644 index dcd6ce06..00000000 --- a/backend/services/msg_handler/msg_spider.go +++ /dev/null @@ -1,24 +0,0 @@ -package msg_handler - -import ( - "crawlab/model" - "crawlab/utils" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "path/filepath" -) - -type Spider struct { - SpiderId string -} - -func (s *Spider) Handle() error { - // 移除本地的爬虫目录 - spider, err := model.GetSpider(bson.ObjectIdHex(s.SpiderId)) - if err != nil { - return err - } - path := filepath.Join(viper.GetString("spider.path"), spider.Name) - utils.RemoveFiles(path) - return nil -} diff --git a/backend/services/msg_handler/msg_system_info.go b/backend/services/msg_handler/msg_system_info.go deleted file mode 100644 index 9de5c74a..00000000 --- a/backend/services/msg_handler/msg_system_info.go +++ /dev/null @@ -1,29 +0,0 @@ -package msg_handler - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/model" -) - -type SystemInfo struct { - msg entity.NodeMessage -} - -func (s *SystemInfo) Handle() error { - // 获取环境信息 - sysInfo, err := model.GetLocalSystemInfo() - if err != nil { - return err - } - msgSd := entity.NodeMessage{ - Type: constants.MsgTypeGetSystemInfo, - NodeId: s.msg.NodeId, - SysInfo: sysInfo, - } - if err := database.Pub(constants.ChannelMasterNode, msgSd); err != nil { - return err - } - return nil -} diff --git a/backend/services/msg_handler/msg_task.go b/backend/services/msg_handler/msg_task.go deleted file mode 100644 index 21b95430..00000000 --- a/backend/services/msg_handler/msg_task.go +++ /dev/null @@ -1,40 +0,0 @@ -package msg_handler - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "crawlab/utils" - "github.com/apex/log" - "runtime/debug" - "time" -) - -type Task struct { - msg entity.NodeMessage -} - -func (t *Task) Handle() error { - log.Infof("received cancel task msg, task_id: %s", t.msg.TaskId) - // 取消任务 - ch := utils.TaskExecChanMap.ChanBlocked(t.msg.TaskId) - if ch != nil { - ch <- constants.TaskCancel - } else { - log.Infof("chan is empty, update status to abnormal") - // 节点可能被重启,找不到chan - task, err := model.GetTask(t.msg.TaskId) - if err != nil { - log.Errorf("task not found, task_id: %s", t.msg.TaskId) - debug.PrintStack() - return err - } - task.Status = constants.StatusAbnormal - task.FinishTs = time.Now() - if err := task.Save(); err != nil { - debug.PrintStack() - log.Infof("cancel task error: %s", err.Error()) - } - } - return nil -} diff --git a/backend/services/node.go b/backend/services/node.go deleted file mode 100644 index e3ef237a..00000000 --- a/backend/services/node.go +++ /dev/null @@ -1,255 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/model" - "crawlab/services/local_node" - "crawlab/utils" - "encoding/json" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "time" -) - -type Data struct { - Key string `json:"key"` - Mac string `json:"mac"` - Ip string `json:"ip"` - Hostname string `json:"hostname"` - Name string `json:"name"` - NameType string `json:"name_type"` - - Master bool `json:"master"` - UpdateTs time.Time `json:"update_ts"` - UpdateTsUnix int64 `json:"update_ts_unix"` -} - -// 所有调用IsMasterNode的方法,都永远会在master节点执行,所以GetCurrentNode方法返回永远是master节点 -// 该ID的节点是否为主节点 -func IsMasterNode(id string) bool { - curNode := local_node.CurrentNode() - //curNode, _ := model.GetCurrentNode() - node, _ := model.GetNode(bson.ObjectIdHex(id)) - return curNode.Id == node.Id -} - -// 获取节点数据 -func GetNodeData() (Data, error) { - localNode := local_node.GetLocalNode() - key := localNode.Identify - if key == "" { - return Data{}, nil - } - - value, err := database.RedisClient.HGet("nodes", key) - data := Data{} - if err := json.Unmarshal([]byte(value), &data); err != nil { - return data, err - } - return data, err -} -func GetRedisNode(key string) (*Data, error) { - // 获取节点数据 - value, err := database.RedisClient.HGet("nodes", key) - if err != nil { - log.Errorf(err.Error()) - return nil, err - } - - // 解析节点列表数据 - var data Data - if err := json.Unmarshal([]byte(value), &data); err != nil { - log.Errorf(err.Error()) - return nil, err - } - return &data, nil -} - -// 更新所有节点状态 -func UpdateNodeStatus() { - // 从Redis获取节点keys - list, err := database.RedisClient.HScan("nodes") - if err != nil { - log.Errorf("get redis node keys error: %s", err.Error()) - return - } - var offlineKeys []string - // 遍历节点keys - for _, dataStr := range list { - var data Data - if err := json.Unmarshal([]byte(dataStr), &data); err != nil { - log.Errorf(err.Error()) - continue - } - // 如果记录的更新时间超过60秒,该节点被认为离线 - if time.Now().Unix()-data.UpdateTsUnix > 60 { - offlineKeys = append(offlineKeys, data.Key) - // 在Redis中删除该节点 - if err := database.RedisClient.HDel("nodes", data.Key); err != nil { - log.Errorf("delete redis node key error:%s, key:%s", err.Error(), data.Key) - } - continue - } - - // 处理node信息 - if err = UpdateNodeInfo(&data); err != nil { - log.Errorf(err.Error()) - continue - } - } - if len(offlineKeys) > 0 { - s, c := database.GetCol("nodes") - defer s.Close() - _, err = c.UpdateAll(bson.M{ - "key": bson.M{ - "$in": offlineKeys, - }, - }, bson.M{ - "$set": bson.M{ - "status": constants.StatusOffline, - "update_ts": time.Now(), - "update_ts_unix": time.Now().Unix(), - }, - }) - if err != nil { - log.Errorf(err.Error()) - } - } -} - -// 处理节点信息 -func UpdateNodeInfo(data *Data) (err error) { - // 更新节点信息到数据库 - s, c := database.GetCol("nodes") - defer s.Close() - - _, err = c.Upsert(bson.M{"key": data.Key}, bson.M{ - "$set": bson.M{ - "status": constants.StatusOnline, - "key": data.Key, - "name_type": data.NameType, - "ip": data.Ip, - "port": "8000", - "mac": data.Mac, - "is_master": data.Master, - "update_ts": time.Now(), - "update_ts_unix": time.Now().Unix(), - }, - "$setOnInsert": bson.M{ - "name": data.Name, - "_id": bson.NewObjectId(), - }, - }) - return err -} - -// 更新节点数据 -func UpdateNodeData() { - localNode := local_node.GetLocalNode() - key := localNode.Identify - // 构造节点数据 - data := Data{ - Key: key, - Mac: localNode.Mac, - Ip: localNode.Ip, - Hostname: localNode.Hostname, - Name: localNode.Identify, - NameType: string(localNode.IdentifyType), - Master: model.IsMaster(), - UpdateTs: time.Now(), - UpdateTsUnix: time.Now().Unix(), - } - - // 注册节点到Redis - dataBytes, err := json.Marshal(&data) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - if err := database.RedisClient.HSet("nodes", key, utils.BytesToString(dataBytes)); err != nil { - log.Errorf(err.Error()) - return - } -} - -// 发送心跳信息到Redis,每5秒发送一次 -func SendHeartBeat() { - for { - UpdateNodeData() - time.Sleep(5 * time.Second) - } -} - -// 每10秒刷新一次节点信息 -func UpdateNodeStatusPeriodically() { - for { - UpdateNodeStatus() - time.Sleep(10 * time.Second) - } -} - -// 每60秒更新异常节点信息 -func UpdateOfflineNodeTaskToAbnormalPeriodically() { - for { - nodes, err := model.GetNodeList(bson.M{"status": constants.StatusOffline}) - if err != nil { - log.Errorf("get nodes error: " + err.Error()) - debug.PrintStack() - continue - } - for _, n := range nodes { - if err := model.UpdateTaskToAbnormal(n.Id); err != nil { - log.Errorf("update task to abnormal error: " + err.Error()) - debug.PrintStack() - continue - } - } - time.Sleep(60 * time.Second) - } -} - -// 初始化节点服务 -func InitNodeService() error { - node, err := local_node.InitLocalNode() - if err != nil { - return err - } - - // 每5秒更新一次本节点信息 - go SendHeartBeat() - - // 首次更新节点数据(注册到Redis) - UpdateNodeData() - if model.IsMaster() { - err = model.UpdateMasterNodeInfo(node.Identify, node.Ip, node.Mac, node.Hostname) - if err != nil { - return err - } - } - - // 节点准备完毕 - if err = node.Ready(); err != nil { - return err - } - - // 如果为主节点 - if model.IsMaster() { - // 每10秒刷新所有节点信息 - go UpdateNodeStatusPeriodically() - - // 每60秒更新离线节点任务为异常 - go UpdateOfflineNodeTaskToAbnormalPeriodically() - } - - // 更新在当前节点执行中的任务状态为:abnormal - if err := model.UpdateTaskToAbnormal(node.Current().Id); err != nil { - debug.PrintStack() - return err - } - - return nil -} diff --git a/backend/services/notification/mail.go b/backend/services/notification/mail.go deleted file mode 100644 index 2231151b..00000000 --- a/backend/services/notification/mail.go +++ /dev/null @@ -1,138 +0,0 @@ -package notification - -import ( - "errors" - "github.com/apex/log" - "github.com/matcornic/hermes" - "gopkg.in/gomail.v2" - "net/mail" - "os" - "runtime/debug" - "strconv" -) - -func SendMail(toEmail string, toName string, subject string, content string) error { - // hermes instance - h := hermes.Hermes{ - Theme: new(hermes.Default), - Product: hermes.Product{ - Name: "Crawlab Team", - Copyright: "© 2019 Crawlab, Made by Crawlab-Team", - }, - } - - // config - port, _ := strconv.Atoi(os.Getenv("CRAWLAB_NOTIFICATION_MAIL_PORT")) - password := os.Getenv("CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD") - SMTPUser := os.Getenv("CRAWLAB_NOTIFICATION_MAIL_SMTP_USER") - smtpConfig := smtpAuthentication{ - Server: os.Getenv("CRAWLAB_NOTIFICATION_MAIL_SERVER"), - Port: port, - SenderEmail: os.Getenv("CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL"), - SenderIdentity: os.Getenv("CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY"), - SMTPPassword: password, - SMTPUser: SMTPUser, - } - options := sendOptions{ - To: toEmail, - Subject: subject, - } - - // email instance - email := hermes.Email{ - Body: hermes.Body{ - Name: toName, - FreeMarkdown: hermes.Markdown(content + GetFooter()), - }, - } - - // generate html - html, err := h.GenerateHTML(email) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // generate text - text, err := h.GeneratePlainText(email) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // send the email - if err := send(smtpConfig, options, html, text); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -type smtpAuthentication struct { - Server string - Port int - SenderEmail string - SenderIdentity string - SMTPUser string - SMTPPassword string -} - -// sendOptions are options for sending an email -type sendOptions struct { - To string - Subject string -} - -// send sends the email -func send(smtpConfig smtpAuthentication, options sendOptions, htmlBody string, txtBody string) error { - - if smtpConfig.Server == "" { - return errors.New("SMTP server config is empty") - } - if smtpConfig.Port == 0 { - return errors.New("SMTP port config is empty") - } - - if smtpConfig.SMTPUser == "" { - return errors.New("SMTP user is empty") - } - - if smtpConfig.SenderIdentity == "" { - return errors.New("SMTP sender identity is empty") - } - - if smtpConfig.SenderEmail == "" { - return errors.New("SMTP sender email is empty") - } - - if options.To == "" { - return errors.New("no receiver emails configured") - } - - from := mail.Address{ - Name: smtpConfig.SenderIdentity, - Address: smtpConfig.SenderEmail, - } - - m := gomail.NewMessage() - m.SetHeader("From", from.String()) - m.SetHeader("To", options.To) - m.SetHeader("Subject", options.Subject) - - m.SetBody("text/plain", txtBody) - m.AddAlternative("text/html", htmlBody) - - d := gomail.NewPlainDialer(smtpConfig.Server, smtpConfig.Port, smtpConfig.SMTPUser, smtpConfig.SMTPPassword) - - return d.DialAndSend(m) -} - -func GetFooter() string { - return ` -[Github](https://github.com/crawlab-team/crawlab) | [Documentation](http://docs.crawlab.cn) | [Docker](https://hub.docker.com/r/tikazyq/crawlab) -` -} diff --git a/backend/services/notification/mobile.go b/backend/services/notification/mobile.go deleted file mode 100644 index e140ecc5..00000000 --- a/backend/services/notification/mobile.go +++ /dev/null @@ -1,59 +0,0 @@ -package notification - -import ( - "errors" - "github.com/apex/log" - "github.com/imroc/req" - "runtime/debug" -) - -func SendMobileNotification(webhook string, title string, content string) error { - type ResBody struct { - ErrCode int `json:"errcode"` - ErrMsg string `json:"errmsg"` - } - - // 请求头 - header := req.Header{ - "Content-Type": "application/json; charset=utf-8", - } - - // 请求数据 - data := req.Param{ - "msgtype": "markdown", - "markdown": req.Param{ - "title": title, - "text": content, - "content": content, - }, - "at": req.Param{ - "atMobiles": []string{}, - "isAtAll": false, - }, - } - - // 发起请求 - res, err := req.Post(webhook, header, req.BodyJSON(&data)) - if err != nil { - log.Errorf("dingtalk notification error: " + err.Error()) - debug.PrintStack() - return err - } - - // 解析响应 - var resBody ResBody - if err := res.ToJSON(&resBody); err != nil { - log.Errorf("dingtalk notification error: " + err.Error()) - debug.PrintStack() - return err - } - - // 判断响应是否报错 - if resBody.ErrCode != 0 { - log.Errorf("dingtalk notification error: " + resBody.ErrMsg) - debug.PrintStack() - return errors.New(resBody.ErrMsg) - } - - return nil -} diff --git a/backend/services/repo.go b/backend/services/repo.go deleted file mode 100644 index a2afb8cd..00000000 --- a/backend/services/repo.go +++ /dev/null @@ -1,93 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/utils" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "github.com/imroc/req" - uuid "github.com/satori/go.uuid" - "github.com/spf13/viper" - "path" - "path/filepath" - "runtime/debug" - "strings" -) - -func DownloadRepo(fullName string, userId bson.ObjectId) (err error) { - // 下载 zip 文件 - url := fmt.Sprintf("%s/%s.zip", viper.GetString("repo.ossUrl"), fullName) - progress := func(current, total int64) { - fmt.Println(float32(current)/float32(total)*100, "%") - } - res, err := req.Get(url, req.DownloadProgress(progress)) - if err != nil { - log.Errorf("download repo error: " + err.Error()) - debug.PrintStack() - return err - } - spiderName := strings.Replace(fullName, "/", "_", -1) - randomId := uuid.NewV4() - tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spiderName+"."+randomId.String()+".zip") - if err := res.ToFile(tmpFilePath); err != nil { - log.Errorf("to file error: " + err.Error()) - debug.PrintStack() - return err - } - - // 解压 zip 文件 - tmpFile := utils.OpenFile(tmpFilePath) - if err := utils.DeCompress(tmpFile, viper.GetString("other.tmppath")); err != nil { - log.Errorf("de-compress error: " + err.Error()) - debug.PrintStack() - return err - } - - // 拷贝文件 - spiderPath := path.Join(viper.GetString("spider.path"), spiderName) - srcDirPath := fmt.Sprintf("%s/data/github.com/%s", viper.GetString("other.tmppath"), fullName) - if err := utils.CopyDir(srcDirPath, spiderPath); err != nil { - log.Errorf("copy error: " + err.Error()) - debug.PrintStack() - return err - } - - // 创建爬虫 - spider := model.GetSpiderByName(spiderName) - if spider.Name == "" { - // 新增 - spider = model.Spider{ - Id: bson.NewObjectId(), - Name: spiderName, - DisplayName: spiderName, - Type: constants.Customized, - Src: spiderPath, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - UserId: userId, - } - if err := spider.Add(); err != nil { - log.Error("add spider error: " + err.Error()) - debug.PrintStack() - return err - } - } else { - // 更新 - if err := spider.Save(); err != nil { - log.Error("save spider error: " + err.Error()) - debug.PrintStack() - return err - } - } - - // 上传爬虫 - if err := UploadSpiderToGridFsFromMaster(spider); err != nil { - log.Error("upload spider error: " + err.Error()) - debug.PrintStack() - return err - } - - return nil -} diff --git a/backend/services/rpc/base.go b/backend/services/rpc/base.go deleted file mode 100644 index 866fe48b..00000000 --- a/backend/services/rpc/base.go +++ /dev/null @@ -1,146 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/model" - "crawlab/services/local_node" - "crawlab/utils" - "encoding/json" - "errors" - "fmt" - "github.com/apex/log" - "github.com/cenkalti/backoff/v4" - "github.com/gomodule/redigo/redis" - uuid "github.com/satori/go.uuid" - "runtime/debug" -) - -// RPC服务基础类 -type Service interface { - ServerHandle() (entity.RpcMessage, error) - ClientHandle() (interface{}, error) -} - -// 客户端处理消息函数 -func ClientFunc(msg entity.RpcMessage) func() (entity.RpcMessage, error) { - return func() (replyMsg entity.RpcMessage, err error) { - // 请求ID - msg.Id = uuid.NewV4().String() - - // 发送RPC消息 - msgStr := utils.ObjectToString(msg) - if err := database.RedisClient.LPush(fmt.Sprintf("rpc:%s", msg.NodeId), msgStr); err != nil { - log.Errorf("RpcClientFunc error: " + err.Error()) - debug.PrintStack() - return replyMsg, err - } - - // 获取RPC回复消息 - dataStr, err := database.RedisClient.BRPop(fmt.Sprintf("rpc:%s:%s", msg.NodeId, msg.Id), msg.Timeout) - if err != nil { - log.Errorf("RpcClientFunc error: " + err.Error()) - debug.PrintStack() - return replyMsg, err - } - - // 反序列化消息 - if err := json.Unmarshal([]byte(dataStr), &replyMsg); err != nil { - log.Errorf("RpcClientFunc error: " + err.Error()) - debug.PrintStack() - return replyMsg, err - } - - // 如果返回消息有错误,返回错误 - if replyMsg.Error != "" { - return replyMsg, errors.New(replyMsg.Error) - } - - return - } -} - -// 获取RPC服务 -func GetService(msg entity.RpcMessage) Service { - switch msg.Method { - case constants.RpcInstallLang: - return &InstallLangService{msg: msg} - case constants.RpcInstallDep: - return &InstallDepService{msg: msg} - case constants.RpcUninstallDep: - return &UninstallDepService{msg: msg} - case constants.RpcGetLang: - return &GetLangService{msg: msg} - case constants.RpcGetInstalledDepList: - return &GetInstalledDepsService{msg: msg} - case constants.RpcCancelTask: - return &CancelTaskService{msg: msg} - case constants.RpcGetSystemInfoService: - return &GetSystemInfoService{msg: msg} - } - return nil -} - -// 处理RPC消息 -func handleMsg(msgStr string, node *model.Node) { - // 反序列化消息 - var msg entity.RpcMessage - if err := json.Unmarshal([]byte(msgStr), &msg); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 获取service - service := GetService(msg) - - // 根据Method调用本地方法 - replyMsg, err := service.ServerHandle() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 发送返回消息 - if err := database.RedisClient.LPush(fmt.Sprintf("rpc:%s:%s", node.Id.Hex(), replyMsg.Id), utils.ObjectToString(replyMsg)); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } -} - -// 初始化服务端RPC服务 -func InitRpcService() error { - go func() { - node := local_node.CurrentNode() - for { - // 获取当前节点 - //node, err := model.GetCurrentNode() - //if err != nil { - // log.Errorf(err.Error()) - // debug.PrintStack() - // continue - //} - b := backoff.NewExponentialBackOff() - bp := backoff.WithMaxRetries(b, 10) - var msgStr string - var err error - err = backoff.Retry(func() error { - msgStr, err = database.RedisClient.BRPop(fmt.Sprintf("rpc:%s", node.Id.Hex()), 0) - - if err != nil && err != redis.ErrNil { - log.WithError(err).Warnf("waiting for redis pool active connection. will after %f seconds try again.", b.NextBackOff().Seconds()) - return err - } - return err - }, bp) - if err != nil { - continue - } - // 处理消息 - go handleMsg(msgStr, node) - } - }() - return nil -} diff --git a/backend/services/rpc/cancel_task.go b/backend/services/rpc/cancel_task.go deleted file mode 100644 index 0e1d4617..00000000 --- a/backend/services/rpc/cancel_task.go +++ /dev/null @@ -1,63 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "crawlab/utils" - "errors" - "fmt" - "github.com/globalsign/mgo/bson" -) - -type CancelTaskService struct { - msg entity.RpcMessage -} - -func (s *CancelTaskService) ServerHandle() (entity.RpcMessage, error) { - taskId := utils.GetRpcParam("task_id", s.msg.Params) - nodeId := utils.GetRpcParam("node_id", s.msg.Params) - if err := CancelTaskLocal(taskId, nodeId); err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - s.msg.Result = "success" - return s.msg, nil -} - -func (s *CancelTaskService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - _, err = ClientFunc(s.msg)() - if err != nil { - return - } - - return -} - -func CancelTaskLocal(taskId string, nodeId string) error { - if !utils.TaskExecChanMap.HasChanKey(taskId) { - _ = model.UpdateTaskToAbnormal(bson.ObjectIdHex(nodeId)) - return errors.New(fmt.Sprintf("task id (%s) does not exist", taskId)) - } - ch := utils.TaskExecChanMap.ChanBlocked(taskId) - ch <- constants.TaskCancel - return nil -} - -func CancelTaskRemote(taskId string, nodeId string) (err error) { - params := make(map[string]string) - params["task_id"] = taskId - params["node_id"] = nodeId - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcCancelTask, - Params: params, - Timeout: 60, - }) - _, err = s.ClientHandle() - if err != nil { - return - } - return -} diff --git a/backend/services/rpc/get_installed_deps.go b/backend/services/rpc/get_installed_deps.go deleted file mode 100644 index 9017fa64..00000000 --- a/backend/services/rpc/get_installed_deps.go +++ /dev/null @@ -1,123 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/utils" - "encoding/json" - "os/exec" - "regexp" - "runtime/debug" - "strings" -) - -type GetInstalledDepsService struct { - msg entity.RpcMessage -} - -func (s *GetInstalledDepsService) ServerHandle() (entity.RpcMessage, error) { - lang := utils.GetRpcParam("lang", s.msg.Params) - deps, err := GetInstalledDepsLocal(lang) - if err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - resultStr, _ := json.Marshal(deps) - s.msg.Result = string(resultStr) - return s.msg, nil -} - -func (s *GetInstalledDepsService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - s.msg, err = ClientFunc(s.msg)() - if err != nil { - return o, err - } - - // 反序列化 - var output []entity.Dependency - if err := json.Unmarshal([]byte(s.msg.Result), &output); err != nil { - return o, err - } - o = output - - return -} - -// 获取本地已安装依赖列表 -func GetInstalledDepsLocal(lang string) (deps []entity.Dependency, err error) { - if lang == constants.Python { - deps, err = GetPythonInstalledDepListLocal() - } else if lang == constants.Nodejs { - deps, err = GetNodejsInstalledDepListLocal() - } - return deps, err -} - -// 获取Python本地已安装依赖列表 -func GetPythonInstalledDepListLocal() ([]entity.Dependency, error) { - var list []entity.Dependency - - cmd := exec.Command("pip", "freeze") - outputBytes, err := cmd.Output() - if err != nil { - debug.PrintStack() - return list, err - } - - for _, line := range strings.Split(string(outputBytes), "\n") { - arr := strings.Split(line, "==") - if len(arr) < 2 { - continue - } - dep := entity.Dependency{ - Name: strings.ToLower(arr[0]), - Version: arr[1], - Installed: true, - } - list = append(list, dep) - } - - return list, nil -} - -// 获取Node.js本地已安装依赖列表 -func GetNodejsInstalledDepListLocal() ([]entity.Dependency, error) { - var list []entity.Dependency - - cmd := exec.Command("npm", "ls", "-g", "--depth", "0") - outputBytes, _ := cmd.Output() - - regex := regexp.MustCompile("\\s(.*)@(.*)") - for _, line := range strings.Split(string(outputBytes), "\n") { - arr := regex.FindStringSubmatch(line) - if len(arr) < 3 { - continue - } - dep := entity.Dependency{ - Name: strings.ToLower(arr[1]), - Version: arr[2], - Installed: true, - } - list = append(list, dep) - } - - return list, nil -} - -func GetInstalledDepsRemote(nodeId string, lang string) (deps []entity.Dependency, err error) { - params := make(map[string]string) - params["lang"] = lang - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcGetInstalledDepList, - Params: params, - Timeout: 60, - }) - o, err := s.ClientHandle() - if err != nil { - return - } - deps = o.([]entity.Dependency) - return -} diff --git a/backend/services/rpc/get_lang.go b/backend/services/rpc/get_lang.go deleted file mode 100644 index 93d8baf1..00000000 --- a/backend/services/rpc/get_lang.go +++ /dev/null @@ -1,82 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/utils" - "encoding/json" -) - -type GetLangService struct { - msg entity.RpcMessage -} - -func (s *GetLangService) ServerHandle() (entity.RpcMessage, error) { - langName := utils.GetRpcParam("lang", s.msg.Params) - lang := utils.GetLangFromLangNamePlain(langName) - l := GetLangLocal(lang) - lang.InstallStatus = l.InstallStatus - - // 序列化 - resultStr, _ := json.Marshal(lang) - s.msg.Result = string(resultStr) - return s.msg, nil -} - -func (s *GetLangService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - s.msg, err = ClientFunc(s.msg)() - if err != nil { - return o, err - } - - var output entity.Lang - if err := json.Unmarshal([]byte(s.msg.Result), &output); err != nil { - return o, err - } - o = output - - return -} - -func GetLangLocal(lang entity.Lang) entity.Lang { - // 检查是否存在执行路径 - for _, p := range lang.ExecutablePaths { - if utils.Exists(p) { - lang.InstallStatus = constants.InstallStatusInstalled - return lang - } - } - - //// 检查是否正在安装 - //if utils.Exists(lang.LockPath) { - // lang.InstallStatus = constants.InstallStatusInstalling - // return lang - //} - // - //// 检查其他语言是否在安装 - //if utils.Exists("/tmp/install.lock") { - // lang.InstallStatus = constants.InstallStatusInstallingOther - // return lang - //} - - lang.InstallStatus = constants.InstallStatusNotInstalled - return lang -} - -func GetLangRemote(nodeId string, lang entity.Lang) (l entity.Lang, err error) { - params := make(map[string]string) - params["lang"] = lang.ExecutableName - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcGetLang, - Params: params, - Timeout: 60, - }) - o, err := s.ClientHandle() - if err != nil { - return - } - l = o.(entity.Lang) - return -} diff --git a/backend/services/rpc/get_system_info.go b/backend/services/rpc/get_system_info.go deleted file mode 100644 index 7e290656..00000000 --- a/backend/services/rpc/get_system_info.go +++ /dev/null @@ -1,67 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "encoding/json" -) - -type GetSystemInfoService struct { - msg entity.RpcMessage -} - -func (s *GetSystemInfoService) ServerHandle() (entity.RpcMessage, error) { - sysInfo, err := GetSystemInfoServiceLocal() - if err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - - // 序列化 - resultStr, _ := json.Marshal(sysInfo) - s.msg.Result = string(resultStr) - return s.msg, nil -} - -func (s *GetSystemInfoService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - s.msg, err = ClientFunc(s.msg)() - if err != nil { - return o, err - } - - var output entity.SystemInfo - if err := json.Unmarshal([]byte(s.msg.Result), &output); err != nil { - return o, err - } - o = output - - return -} - -func GetSystemInfoServiceLocal() (sysInfo entity.SystemInfo, err error) { - // 获取环境信息 - sysInfo, err = model.GetLocalSystemInfo() - if err != nil { - return sysInfo, err - } - return sysInfo, nil -} - -func GetSystemInfoServiceRemote(nodeId string) (sysInfo entity.SystemInfo, err error) { - params := make(map[string]string) - params["node_id"] = nodeId - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcGetSystemInfoService, - Params: params, - Timeout: 60, - }) - o, err := s.ClientHandle() - if err != nil { - return - } - sysInfo = o.(entity.SystemInfo) - return -} diff --git a/backend/services/rpc/install_dep.go b/backend/services/rpc/install_dep.go deleted file mode 100644 index e615688a..00000000 --- a/backend/services/rpc/install_dep.go +++ /dev/null @@ -1,100 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/utils" - "errors" - "fmt" - "github.com/apex/log" - "os/exec" - "runtime/debug" -) - -type InstallDepService struct { - msg entity.RpcMessage -} - -func (s *InstallDepService) ServerHandle() (entity.RpcMessage, error) { - lang := utils.GetRpcParam("lang", s.msg.Params) - depName := utils.GetRpcParam("dep_name", s.msg.Params) - if err := InstallDepLocal(lang, depName); err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - s.msg.Result = "success" - return s.msg, nil -} - -func (s *InstallDepService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - _, err = ClientFunc(s.msg)() - if err != nil { - return - } - - return -} - -func InstallDepLocal(lang string, depName string) error { - if lang == constants.Python { - _, err := InstallPythonDepLocal(depName) - if err != nil { - return err - } - } else if lang == constants.Nodejs { - _, err := InstallNodejsDepLocal(depName) - if err != nil { - return err - } - } else { - return errors.New(fmt.Sprintf("%s is not implemented", lang)) - } - return nil -} - -// 安装Python本地依赖 -func InstallPythonDepLocal(depName string) (string, error) { - // 依赖镜像URL - url := "https://pypi.tuna.tsinghua.edu.cn/simple" - - cmd := exec.Command("pip", "install", depName, "-i", url) - outputBytes, err := cmd.Output() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return fmt.Sprintf("error: %s", err.Error()), err - } - return string(outputBytes), nil -} - -func InstallNodejsDepLocal(depName string) (string, error) { - // 依赖镜像URL - url := "https://registry.npm.taobao.org" - - cmd := exec.Command("npm", "install", depName, "-g", "--registry", url) - outputBytes, err := cmd.Output() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return fmt.Sprintf("error: %s", err.Error()), err - } - return string(outputBytes), nil -} - -func InstallDepRemote(nodeId string, lang string, depName string) (err error) { - params := make(map[string]string) - params["lang"] = lang - params["dep_name"] = depName - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcInstallDep, - Params: params, - Timeout: 300, - }) - _, err = s.ClientHandle() - if err != nil { - return - } - return -} diff --git a/backend/services/rpc/install_lang.go b/backend/services/rpc/install_lang.go deleted file mode 100644 index 39f5a6d8..00000000 --- a/backend/services/rpc/install_lang.go +++ /dev/null @@ -1,73 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/utils" - "errors" - "fmt" - "github.com/apex/log" - "os/exec" - "path" - "runtime/debug" -) - -type InstallLangService struct { - msg entity.RpcMessage -} - -func (s *InstallLangService) ServerHandle() (entity.RpcMessage, error) { - lang := utils.GetRpcParam("lang", s.msg.Params) - output, err := InstallLangLocal(lang) - s.msg.Result = output - if err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - return s.msg, nil -} - -func (s *InstallLangService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - go func() { - _, err := ClientFunc(s.msg)() - if err != nil { - return - } - }() - - return -} - -// 本地安装语言 -func InstallLangLocal(lang string) (o string, err error) { - l := utils.GetLangFromLangNamePlain(lang) - if l.Name == "" || l.InstallScript == "" { - return "", errors.New(fmt.Sprintf("%s is not implemented", lang)) - } - cmd := exec.Command("/bin/sh", path.Join("scripts", l.InstallScript)) - output, err := cmd.Output() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return string(output), err - } - return -} - -// 远端安装语言 -func InstallLangRemote(nodeId string, lang string) (o string, err error) { - params := make(map[string]string) - params["lang"] = lang - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcInstallLang, - Params: params, - Timeout: 60, - }) - _, err = s.ClientHandle() - if err != nil { - return - } - return -} diff --git a/backend/services/rpc/remove_spider.go b/backend/services/rpc/remove_spider.go deleted file mode 100644 index 1d8ff90c..00000000 --- a/backend/services/rpc/remove_spider.go +++ /dev/null @@ -1,62 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "crawlab/utils" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "path/filepath" -) - -type RemoveSpiderService struct { - msg entity.RpcMessage -} - -func (s *RemoveSpiderService) ServerHandle() (entity.RpcMessage, error) { - spiderId := utils.GetRpcParam("spider_id", s.msg.Params) - if err := RemoveSpiderServiceLocal(spiderId); err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - s.msg.Result = "success" - return s.msg, nil -} - -func (s *RemoveSpiderService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - _, err = ClientFunc(s.msg)() - if err != nil { - return - } - - return -} - -func RemoveSpiderServiceLocal(spiderId string) error { - // 移除本地的爬虫目录 - spider, err := model.GetSpider(bson.ObjectIdHex(spiderId)) - if err != nil { - return err - } - path := filepath.Join(viper.GetString("spider.path"), spider.Name) - utils.RemoveFiles(path) - return nil -} - -func RemoveSpiderServiceRemote(spiderId string, nodeId string) (err error) { - params := make(map[string]string) - params["spider_id"] = spiderId - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcRemoveSpider, - Params: params, - Timeout: 60, - }) - _, err = s.ClientHandle() - if err != nil { - return - } - return -} diff --git a/backend/services/rpc/uninstall_dep.go b/backend/services/rpc/uninstall_dep.go deleted file mode 100644 index 1b8b8ecb..00000000 --- a/backend/services/rpc/uninstall_dep.go +++ /dev/null @@ -1,96 +0,0 @@ -package rpc - -import ( - "crawlab/constants" - "crawlab/entity" - "crawlab/utils" - "errors" - "fmt" - "github.com/apex/log" - "os/exec" - "runtime/debug" -) - -type UninstallDepService struct { - msg entity.RpcMessage -} - -func (s *UninstallDepService) ServerHandle() (entity.RpcMessage, error) { - lang := utils.GetRpcParam("lang", s.msg.Params) - depName := utils.GetRpcParam("dep_name", s.msg.Params) - if err := UninstallDepLocal(lang, depName); err != nil { - s.msg.Error = err.Error() - return s.msg, err - } - s.msg.Result = "success" - return s.msg, nil -} - -func (s *UninstallDepService) ClientHandle() (o interface{}, err error) { - // 发起 RPC 请求,获取服务端数据 - _, err = ClientFunc(s.msg)() - if err != nil { - return - } - - return -} - -func UninstallDepLocal(lang string, depName string) error { - if lang == constants.Python { - output, err := UninstallPythonDepLocal(depName) - if err != nil { - log.Debugf(output) - return err - } - } else if lang == constants.Nodejs { - output, err := UninstallNodejsDepLocal(depName) - if err != nil { - log.Debugf(output) - return err - } - } else { - return errors.New(fmt.Sprintf("%s is not implemented", lang)) - } - return nil -} - -func UninstallPythonDepLocal(depName string) (string, error) { - cmd := exec.Command("pip", "uninstall", "-y", depName) - outputBytes, err := cmd.Output() - if err != nil { - log.Errorf(string(outputBytes)) - log.Errorf(err.Error()) - debug.PrintStack() - return fmt.Sprintf("error: %s", err.Error()), err - } - return string(outputBytes), nil -} - -func UninstallNodejsDepLocal(depName string) (string, error) { - cmd := exec.Command("npm", "uninstall", depName, "-g") - outputBytes, err := cmd.Output() - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return fmt.Sprintf("error: %s", err.Error()), err - } - return string(outputBytes), nil -} - -func UninstallDepRemote(nodeId string, lang string, depName string) (err error) { - params := make(map[string]string) - params["lang"] = lang - params["dep_name"] = depName - s := GetService(entity.RpcMessage{ - NodeId: nodeId, - Method: constants.RpcUninstallDep, - Params: params, - Timeout: 300, - }) - _, err = s.ClientHandle() - if err != nil { - return - } - return -} diff --git a/backend/services/schedule.go b/backend/services/schedule.go deleted file mode 100644 index 017fba7d..00000000 --- a/backend/services/schedule.go +++ /dev/null @@ -1,274 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/lib/cron" - "crawlab/model" - "errors" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - uuid "github.com/satori/go.uuid" - "runtime/debug" -) - -var Sched *Scheduler - -type Scheduler struct { - cron *cron.Cron -} - -func AddScheduleTask(s model.Schedule) func() { - return func() { - // 生成任务ID - id := uuid.NewV4() - - // 参数 - var param string - - // 爬虫 - spider, err := model.GetSpider(s.SpiderId) - if err != nil { - return - } - - // scrapy 爬虫 - if spider.IsScrapy { - if s.ScrapySpider == "" { - log.Errorf("scrapy spider is not set") - debug.PrintStack() - return - } - param = s.ScrapySpider + " -L " + s.ScrapyLogLevel + " " + s.Param - } else { - param = s.Param - } - - if s.RunType == constants.RunTypeAllNodes { - // 所有节点 - nodes, err := model.GetNodeList(nil) - if err != nil { - return - } - for _, node := range nodes { - t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - NodeId: node.Id, - Param: param, - UserId: s.UserId, - RunType: constants.RunTypeAllNodes, - ScheduleId: s.Id, - Type: constants.TaskTypeSpider, - } - - if _, err := AddTask(t); err != nil { - return - } - } - } else if s.RunType == constants.RunTypeRandom { - // 随机 - t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - Param: param, - UserId: s.UserId, - RunType: constants.RunTypeRandom, - ScheduleId: s.Id, - Type: constants.TaskTypeSpider, - } - if _, err := AddTask(t); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - } else if s.RunType == constants.RunTypeSelectedNodes { - // 指定节点 - for _, nodeId := range s.NodeIds { - t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - NodeId: nodeId, - Param: param, - UserId: s.UserId, - RunType: constants.RunTypeSelectedNodes, - ScheduleId: s.Id, - Type: constants.TaskTypeSpider, - } - - if _, err := AddTask(t); err != nil { - return - } - } - } else { - return - } - } -} - -func UpdateSchedules() { - if err := Sched.Update(); err != nil { - log.Errorf(err.Error()) - return - } -} - -func (s *Scheduler) Start() error { - exec := cron.New(cron.WithSeconds()) - - // 启动cron服务 - s.cron.Start() - - // 更新任务列表 - if err := s.Update(); err != nil { - log.Errorf("update scheduler error: %s", err.Error()) - debug.PrintStack() - return err - } - - // 每30秒更新一次任务列表 - spec := "*/30 * * * * *" - if _, err := exec.AddFunc(spec, UpdateSchedules); err != nil { - log.Errorf("add func update schedulers error: %s", err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func (s *Scheduler) AddJob(job model.Schedule) error { - spec := job.Cron - - // 添加定时任务 - eid, err := s.cron.AddFunc(spec, AddScheduleTask(job)) - if err != nil { - log.Errorf("add func task error: %s", err.Error()) - debug.PrintStack() - return err - } - - // 更新EntryID - job.EntryId = eid - - // 更新状态 - job.Status = constants.ScheduleStatusRunning - job.Enabled = true - - // 保存定时任务 - if err := job.Save(); err != nil { - log.Errorf("job save error: %s", err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func (s *Scheduler) RemoveAll() { - entries := s.cron.Entries() - for i := 0; i < len(entries); i++ { - s.cron.Remove(entries[i].ID) - } -} - -// 验证cron表达式是否正确 -func ParserCron(spec string) error { - parser := cron.NewParser( - cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor, - ) - - if _, err := parser.Parse(spec); err != nil { - return err - } - return nil -} - -// 禁用定时任务 -func (s *Scheduler) Disable(id bson.ObjectId) error { - schedule, err := model.GetSchedule(id) - if err != nil { - return err - } - if schedule.EntryId == 0 { - return errors.New("entry id not found") - } - - // 从cron服务中删除该任务 - s.cron.Remove(schedule.EntryId) - - // 更新状态 - schedule.Status = constants.ScheduleStatusStop - schedule.Enabled = false - - if err = schedule.Save(); err != nil { - return err - } - return nil -} - -// 启用定时任务 -func (s *Scheduler) Enable(id bson.ObjectId) error { - schedule, err := model.GetSchedule(id) - if err != nil { - return err - } - if err := s.AddJob(schedule); err != nil { - return err - } - return nil -} - -func (s *Scheduler) Update() error { - // 删除所有定时任务 - s.RemoveAll() - - // 获取所有定时任务 - sList, err := model.GetScheduleList(bson.M{"enabled": true}) - if err != nil { - log.Errorf("get scheduler list error: %s", err.Error()) - debug.PrintStack() - return err - } - - user, err := model.GetUserByUsername("admin") - if err != nil { - log.Errorf("get admin user error: %s", err.Error()) - return err - } - - // 遍历任务列表 - for i := 0; i < len(sList); i++ { - // 单个任务 - job := sList[i] - - if job.Status == constants.ScheduleStatusStop { - continue - } - - // 兼容以前版本 - if job.UserId.Hex() == "" { - job.UserId = user.Id - } - - // 添加到定时任务 - if err := s.AddJob(job); err != nil { - log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), job.Name, job.Cron) - debug.PrintStack() - return err - } - } - - return nil -} - -func InitScheduler() error { - Sched = &Scheduler{ - cron: cron.New(cron.WithSeconds()), - } - if err := Sched.Start(); err != nil { - log.Errorf("start scheduler error: %s", err.Error()) - debug.PrintStack() - return err - } - return nil -} diff --git a/backend/services/scrapy.go b/backend/services/scrapy.go deleted file mode 100644 index eee7893d..00000000 --- a/backend/services/scrapy.go +++ /dev/null @@ -1,285 +0,0 @@ -package services - -import ( - "bytes" - "crawlab/constants" - "crawlab/entity" - "crawlab/model" - "encoding/json" - "errors" - "fmt" - "github.com/Unknwon/goconfig" - "github.com/apex/log" - "io/ioutil" - "os" - "os/exec" - "path" - "runtime/debug" - "strconv" - "strings" -) - -func GetScrapySpiderNames(s model.Spider) ([]string, error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("scrapy", "list") - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return []string{}, errors.New(stderr.String()) - } - - spiderNames := strings.Split(stdout.String(), "\n") - - var res []string - for _, sn := range spiderNames { - if sn != "" { - res = append(res, sn) - } - } - - return res, nil -} - -func GetScrapySettings(s model.Spider) (res []map[string]interface{}, err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("crawlab", "settings") - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf(stderr.String()) - debug.PrintStack() - return res, errors.New(stderr.String()) - } - - if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return res, err - } - - return res, nil -} - -func SaveScrapySettings(s model.Spider, settingsData []entity.ScrapySettingParam) (err error) { - // 读取 scrapy.cfg - cfg, err := goconfig.LoadConfigFile(path.Join(s.Src, "scrapy.cfg")) - if err != nil { - return - } - modName, err := cfg.GetValue("settings", "default") - if err != nil { - return - } - - // 定位到 settings.py 文件 - arr := strings.Split(modName, ".") - dirName := arr[0] - fileName := arr[1] - filePath := fmt.Sprintf("%s/%s/%s.py", s.Src, dirName, fileName) - - // 生成文件内容 - content := "" - for _, param := range settingsData { - var line string - switch param.Type { - case constants.String: - line = fmt.Sprintf("%s = '%s'", param.Key, param.Value) - case constants.Number: - n := int64(param.Value.(float64)) - s := strconv.FormatInt(n, 10) - line = fmt.Sprintf("%s = %s", param.Key, s) - case constants.Boolean: - if param.Value.(bool) { - line = fmt.Sprintf("%s = %s", param.Key, "True") - } else { - line = fmt.Sprintf("%s = %s", param.Key, "False") - } - case constants.Array: - arr := param.Value.([]interface{}) - var arrStr []string - for _, s := range arr { - arrStr = append(arrStr, s.(string)) - } - line = fmt.Sprintf("%s = ['%s']", param.Key, strings.Join(arrStr, "','")) - case constants.Object: - value := param.Value.(map[string]interface{}) - var arr []string - for k, v := range value { - n := int64(v.(float64)) - s := strconv.FormatInt(n, 10) - arr = append(arr, fmt.Sprintf("'%s': %s", k, s)) - } - line = fmt.Sprintf("%s = {%s}", param.Key, strings.Join(arr, ",")) - } - content += line + "\n" - } - - // 写到 settings.py - if err := ioutil.WriteFile(filePath, []byte(content), os.ModePerm); err != nil { - return err - } - - // 同步到GridFS - if err := UploadSpiderToGridFsFromMaster(s); err != nil { - return err - } - - return -} - -func GetScrapyItems(s model.Spider) (res []map[string]interface{}, err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("crawlab", "items") - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf(stderr.String()) - debug.PrintStack() - return res, errors.New(stderr.String()) - } - - if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return res, err - } - - return res, nil -} - -func SaveScrapyItems(s model.Spider, itemsData []entity.ScrapyItem) (err error) { - // 读取 scrapy.cfg - cfg, err := goconfig.LoadConfigFile(path.Join(s.Src, "scrapy.cfg")) - if err != nil { - return - } - modName, err := cfg.GetValue("settings", "default") - if err != nil { - return - } - - // 定位到 settings.py 文件 - arr := strings.Split(modName, ".") - dirName := arr[0] - fileName := "items" - filePath := fmt.Sprintf("%s/%s/%s.py", s.Src, dirName, fileName) - - // 生成文件内容 - content := "" - content += "import scrapy\n" - content += "\n\n" - for _, item := range itemsData { - content += fmt.Sprintf("class %s(scrapy.Item):\n", item.Name) - for _, field := range item.Fields { - content += fmt.Sprintf(" %s = scrapy.Field()\n", field) - } - content += "\n\n" - } - - // 写到 settings.py - if err := ioutil.WriteFile(filePath, []byte(content), os.ModePerm); err != nil { - return err - } - - // 同步到GridFS - if err := UploadSpiderToGridFsFromMaster(s); err != nil { - return err - } - - return -} - -func GetScrapyPipelines(s model.Spider) (res []string, err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("crawlab", "pipelines") - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf(stderr.String()) - debug.PrintStack() - return res, errors.New(stderr.String()) - } - - if err := json.Unmarshal([]byte(stdout.String()), &res); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return res, err - } - - return res, nil -} - -func GetScrapySpiderFilepath(s model.Spider, spiderName string) (res string, err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("crawlab", "find_spider_filepath", "-n", spiderName) - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf(stderr.String()) - debug.PrintStack() - return res, err - } - - res = strings.Replace(stdout.String(), "\n", "", 1) - - return res, nil -} - -func CreateScrapySpider(s model.Spider, name string, domain string, template string) (err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("scrapy", "genspider", name, domain, "-t", template) - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf("stdout: " + stdout.String()) - log.Errorf("stderr: " + stderr.String()) - debug.PrintStack() - return err - } - - return -} - -func CreateScrapyProject(s model.Spider) (err error) { - var stdout bytes.Buffer - var stderr bytes.Buffer - - cmd := exec.Command("scrapy", "startproject", s.Name, s.Src) - cmd.Dir = s.Src - cmd.Stdout = &stdout - cmd.Stderr = &stderr - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - log.Errorf("stdout: " + stdout.String()) - log.Errorf("stderr: " + stderr.String()) - debug.PrintStack() - return err - } - - return -} diff --git a/backend/services/spider.go b/backend/services/spider.go deleted file mode 100644 index 59aa1fad..00000000 --- a/backend/services/spider.go +++ /dev/null @@ -1,624 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/lib/cron" - "crawlab/model" - "crawlab/services/spider_handler" - "crawlab/utils" - "errors" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "gopkg.in/yaml.v2" - "io" - "io/ioutil" - "os" - "path" - "path/filepath" - "runtime/debug" - "time" -) - -type SpiderFileData struct { - FileName string - File []byte -} - -type SpiderUploadMessage struct { - FileId string - FileName string - SpiderId string -} - -// 从主节点上传爬虫到GridFS -func UploadSpiderToGridFsFromMaster(spider model.Spider) error { - // 爬虫所在目录 - spiderDir := spider.Src - - // 打包为 zip 文件 - files, err := utils.GetFilesFromDir(spiderDir) - if err != nil { - return err - } - randomId := uuid.NewV4() - tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip") - spiderZipFileName := spider.Name + ".zip" - if err := utils.Compress(files, tmpFilePath); err != nil { - return err - } - - // 获取 GridFS 实例 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 判断文件是否已经存在 - var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { - // 已经存在文件,则删除 - log.Errorf(gfFile.Id.Hex() + " already exists. removing...") - if err := gf.RemoveId(gfFile.Id); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - } - - // 上传到GridFs - fid, err := RetryUploadToGridFs(spiderZipFileName, tmpFilePath) - if err != nil { - log.Errorf("upload to grid fs error: %s", err.Error()) - } - - // 保存爬虫 FileId - spider.FileId = fid - if err := spider.Save(); err != nil { - return err - } - - // 获取爬虫同步实例 - spiderSync := spider_handler.SpiderSync{ - Spider: spider, - } - - // 获取gfFile - gfFile2 := model.GetGridFs(spider.FileId) - - // 生成MD5 - spiderSync.CreateMd5File(gfFile2.Md5) - - // 检查是否为 Scrapy 爬虫 - spiderSync.CheckIsScrapy() - - return nil -} - -// 上传zip文件到GridFS -func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { - fid = "" - - // 获取MongoDB GridFS连接 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 创建一个新GridFS文件 - f, err := gf.Create(fileName) - if err != nil { - log.Errorf("create file error: " + err.Error()) - debug.PrintStack() - return - } - - // 分片读取爬虫zip文件 - err = ReadFileByStep(filePath, WriteToGridFS, f) - if err != nil { - log.Errorf("read file by step error: " + err.Error()) - debug.PrintStack() - return "", err - } - - // 删除zip文件 - if err = os.Remove(filePath); err != nil { - log.Errorf("remove file error: " + err.Error()) - debug.PrintStack() - return - } - - // 关闭文件,提交写入 - if err = f.Close(); err != nil { - log.Errorf("close file error: " + err.Error()) - debug.PrintStack() - return "", err - } - - // 文件ID - fid = f.Id().(bson.ObjectId) - - return fid, nil -} - -// 带重试功能的上传至 GridFS -func RetryUploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err error) { - maxErrCount := 10 - errCount := 0 - for { - if errCount > maxErrCount { - break - } - fid, err = UploadToGridFs(fileName, filePath) - if err != nil { - errCount++ - log.Errorf("upload to grid fs error: %s", err.Error()) - time.Sleep(3 * time.Second) - continue - } - return fid, nil - } - return fid, errors.New("unable to upload to gridfs, please re-upload the spider") -} - -// 写入grid fs -func WriteToGridFS(content []byte, f *mgo.GridFile) { - if _, err := f.Write(content); err != nil { - debug.PrintStack() - return - } -} - -//分片读取大文件 -func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCreate *mgo.GridFile) error { - f, err := os.OpenFile(filePath, os.O_RDONLY, 0777) - if err != nil { - log.Infof("can't opened this file") - return err - } - defer utils.Close(f) - s := make([]byte, 4096) - for { - switch nr, err := f.Read(s[:]); true { - case nr < 0: - _, _ = fmt.Fprintf(os.Stderr, "cat: error reading: %s\n", err.Error()) - debug.PrintStack() - case nr == 0: // EOF - return nil - case nr > 0: - handle(s[0:nr], fileCreate) - } - } -} - -// 发布所有爬虫 -func PublishAllSpiders() { - // 获取爬虫列表 - spiders, _, _ := model.GetSpiderList(nil, 0, constants.Infinite, "-_id") - if len(spiders) == 0 { - return - } - log.Infof("start sync spider to local, total: %d", len(spiders)) - // 遍历爬虫列表 - for _, spider := range spiders { - // 异步发布爬虫 - go func(s model.Spider) { - PublishSpider(s) - }(spider) - } -} - -// 发布爬虫 -func PublishSpider(spider model.Spider) { - var gfFile *model.GridFs - if spider.FileId.Hex() != constants.ObjectIdNull { - // 查询gf file,不存在则标记为爬虫文件不存在 - gfFile = model.GetGridFs(spider.FileId) - if gfFile == nil { - log.Errorf("get grid fs file error: cannot find grid fs file") - log.Errorf("grid fs file_id: " + spider.FileId.Hex()) - log.Errorf("spider_name: " + spider.Name) - debug.PrintStack() - //spider.FileId = constants.ObjectIdNull - //if err := spider.Save(); err != nil { - // return - //} - return - } - } - - // 如果FileId为空,表示还没有上传爬虫到GridFS,则跳过 - if spider.FileId == bson.ObjectIdHex(constants.ObjectIdNull) { - return - } - - // 获取爬虫同步实例 - spiderSync := spider_handler.SpiderSync{ - Spider: spider, - } - - // 安装依赖 - if viper.GetString("setting.autoInstall") == "Y" { - go spiderSync.InstallDeps() - } - - //目录不存在,则直接下载 - path := filepath.Join(viper.GetString("spider.path"), spider.Name) - if !utils.Exists(path) { - log.Infof("path not found: %s", path) - spiderSync.Download() - spiderSync.CreateMd5File(gfFile.Md5) - spiderSync.CheckIsScrapy() - return - } - - // md5文件不存在,则下载 - md5 := filepath.Join(path, spider_handler.Md5File) - if !utils.Exists(md5) { - log.Infof("md5 file not found: %s", md5) - spiderSync.RemoveDownCreate(gfFile.Md5) - return - } - - // md5值不一样,则下载 - md5Str := utils.GetSpiderMd5Str(md5) - if gfFile.Md5 != md5Str { - log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str) - spiderSync.RemoveDownCreate(gfFile.Md5) - return - } -} - -func RemoveSpider(id string) error { - // 获取该爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - return err - } - - // 删除爬虫文件目录 - path := filepath.Join(viper.GetString("spider.path"), spider.Name) - utils.RemoveFiles(path) - - // 删除其他节点的爬虫目录 - //msg := entity.NodeMessage{ - // Type: constants.MsgTypeRemoveSpider, - // SpiderId: id, - //} - //if err := database.Pub(constants.ChannelAllNode, msg); err != nil { - // return err - //} - - // 从数据库中删除该爬虫 - if err := model.RemoveSpider(bson.ObjectIdHex(id)); err != nil { - return err - } - - // 删除日志文件 - if err := RemoveLogBySpiderId(spider.Id); err != nil { - return err - } - - // 删除爬虫对应的task任务 - if err := model.RemoveTaskBySpiderId(spider.Id); err != nil { - return err - } - - // TODO 定时任务如何处理 - return nil -} - -func CancelSpider(id string) error { - // 获取该爬虫 - spider, err := model.GetSpider(bson.ObjectIdHex(id)) - if err != nil { - return err - } - - // 获取该爬虫待定或运行中的任务列表 - query := bson.M{ - "spider_id": spider.Id, - "status": bson.M{ - "$in": []string{ - constants.StatusPending, - constants.StatusRunning, - }, - }, - } - tasks, err := model.GetTaskList(query, 0, constants.Infinite, "-create_ts") - if err != nil { - return err - } - - // 遍历任务列表,依次停止 - for _, task := range tasks { - if err := CancelTask(task.Id); err != nil { - return err - } - } - - return nil -} - -func cloneGridFsFile(spider model.Spider, newName string) (err error) { - // 构造新爬虫 - newSpider := spider - newSpider.Id = bson.NewObjectId() - newSpider.Name = newName - newSpider.DisplayName = newName - newSpider.Src = path.Join(path.Dir(spider.Src), newName) - newSpider.CreateTs = time.Now() - newSpider.UpdateTs = time.Now() - - // GridFS连接实例 - s, gf := database.GetGridFs("files") - defer s.Close() - - // 被克隆爬虫的GridFS文件 - f, err := gf.OpenId(spider.FileId) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 新爬虫的GridFS文件 - fNew, err := gf.Create(newSpider.Name + ".zip") - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 生成唯一ID - randomId := uuid.NewV4() - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, 0777); err != nil { - log.Errorf("mkdir other.tmppath error: %v", err.Error()) - return err - } - } - - // 创建临时文件 - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile := utils.OpenFile(tmpFilePath) - - // 拷贝到临时文件 - if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 关闭临时文件 - if err := tmpFile.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 读取内容 - fContent, err := ioutil.ReadFile(tmpFilePath) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 写入GridFS文件 - if _, err := fNew.Write(fContent); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 关闭被克隆爬虫GridFS文件 - if err = f.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 将新爬虫文件复制 - newSpider.FileId = fNew.Id().(bson.ObjectId) - - // 保存新爬虫 - if err := newSpider.Add(); err != nil { - return err - } - - // 关闭新爬虫GridFS文件 - if err := fNew.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 删除临时文件 - if err := os.RemoveAll(tmpFilePath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - // 同步爬虫 - PublishSpider(newSpider) - - return nil -} - -func CopySpider(spider model.Spider, newName string) error { - // 克隆GridFS文件 - if err := cloneGridFsFile(spider, newName); err != nil { - return err - } - - return nil -} - -func UpdateSpiderDedup(spider model.Spider) error { - col := utils.GetSpiderCol(spider.Col, spider.Name) - - s, c := database.GetCol(col) - defer s.Close() - - if !spider.IsDedup { - _ = c.DropIndex(spider.DedupField) - //if err := c.DropIndex(spider.DedupField); err != nil { - // return err - //} - return nil - } - - if err := c.EnsureIndex(mgo.Index{ - Key: []string{spider.DedupField}, - Unique: true, - }); err != nil { - return err - } - - return nil -} - -func InitDemoSpiders() { - // 添加Demo爬虫 - templateSpidersDir := "./template/spiders" - for _, info := range utils.ListDir(templateSpidersDir) { - if !info.IsDir() { - continue - } - spiderName := info.Name() - - // 如果爬虫在数据库中不存在,则添加 - spider := model.GetSpiderByName(spiderName) - if spider.Name != "" { - // 存在同名爬虫,跳过 - continue - } - - // 拷贝爬虫 - templateSpiderPath := path.Join(templateSpidersDir, spiderName) - spiderPath := path.Join(viper.GetString("spider.path"), spiderName) - if utils.Exists(spiderPath) { - utils.RemoveFiles(spiderPath) - } - if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil { - log.Errorf("copy error: " + err.Error()) - debug.PrintStack() - continue - } - - // 构造配置数据 - configData := entity.ConfigSpiderData{} - - // 读取YAML文件 - yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile")) - if err != nil { - log.Errorf("read yaml error: " + err.Error()) - //debug.PrintStack() - continue - } - - // 反序列化 - if err := yaml.Unmarshal(yamlFile, &configData); err != nil { - log.Errorf("unmarshal error: " + err.Error()) - debug.PrintStack() - continue - } - - if configData.Type == constants.Customized { - // 添加该爬虫到数据库 - spider = model.Spider{ - Id: bson.NewObjectId(), - Name: spiderName, - DisplayName: configData.DisplayName, - Type: constants.Customized, - Col: configData.Col, - Src: spiderPath, - Remark: configData.Remark, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - Cmd: configData.Cmd, - UserId: bson.ObjectIdHex(constants.ObjectIdNull), - } - if err := spider.Add(); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - - // 上传爬虫到GridFS - if err := UploadSpiderToGridFsFromMaster(spider); err != nil { - log.Errorf("upload spider error: " + err.Error()) - debug.PrintStack() - continue - } - } else if configData.Type == constants.Configurable || configData.Type == "config" { - // 添加该爬虫到数据库 - spider = model.Spider{ - Id: bson.NewObjectId(), - Name: configData.Name, - DisplayName: configData.DisplayName, - Type: constants.Configurable, - Col: configData.Col, - Src: spiderPath, - Remark: configData.Remark, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - Config: configData, - UserId: bson.ObjectIdHex(constants.ObjectIdNull), - } - if err := spider.Add(); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - - // 根据序列化后的数据处理爬虫文件 - if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - } - } - - // 发布所有爬虫 - PublishAllSpiders() -} - -// 启动爬虫服务 -func InitSpiderService() error { - // 构造定时任务执行器 - cPub := cron.New(cron.WithSeconds()) - if _, err := cPub.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { - return err - } - - // 启动定时任务 - cPub.Start() - - if model.IsMaster() && viper.GetString("setting.demoSpiders") == "Y" { - // 初始化Demo爬虫 - InitDemoSpiders() - } - - if model.IsMaster() { - // 构造 Git 定时任务 - GitCron = &GitCronScheduler{ - cron: cron.New(cron.WithSeconds()), - } - - // 启动 Git 定时任务 - if err := GitCron.Start(); err != nil { - return err - } - - // 清理UserId - InitSpiderCleanUserIds() - } - - return nil -} diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go deleted file mode 100644 index 389ea6cd..00000000 --- a/backend/services/spider_handler/spider.go +++ /dev/null @@ -1,250 +0,0 @@ -package spider_handler - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/model" - "crawlab/services/local_node" - "crawlab/utils" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "io" - "os" - "os/exec" - "path" - "path/filepath" - "runtime/debug" - "strings" - "sync" -) - -const ( - Md5File = "md5.txt" -) - -type SpiderSync struct { - Spider model.Spider -} - -func (s *SpiderSync) CreateMd5File(md5 string) { - path := filepath.Join(viper.GetString("spider.path"), s.Spider.Name) - utils.CreateDirPath(path) - - fileName := filepath.Join(path, Md5File) - file := utils.OpenFile(fileName) - defer utils.Close(file) - if file != nil { - if _, err := file.WriteString(md5 + "\n"); err != nil { - log.Errorf("file write string error: %s", err.Error()) - debug.PrintStack() - } - } -} - -func (s *SpiderSync) CheckIsScrapy() { - if s.Spider.Type == constants.Configurable { - return - } - if viper.GetString("setting.checkScrapy") != "Y" { - return - } - s.Spider.IsScrapy = utils.Exists(path.Join(s.Spider.Src, "scrapy.cfg")) - if s.Spider.IsScrapy { - s.Spider.Cmd = "scrapy crawl" - } - if err := s.Spider.Save(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } -} - -func (s *SpiderSync) AfterRemoveDownCreate() { - if model.IsMaster() { - s.CheckIsScrapy() - } -} - -func (s *SpiderSync) RemoveDownCreate(md5 string) { - s.RemoveSpiderFile() - s.Download() - s.CreateMd5File(md5) - s.AfterRemoveDownCreate() -} - -// 获得下载锁的key -func (s *SpiderSync) GetLockDownloadKey(spiderId string) string { - //node, _ := model.GetCurrentNode() - node := local_node.CurrentNode() - - return node.Id.Hex() + "#" + spiderId -} - -// 删除本地文件 -func (s *SpiderSync) RemoveSpiderFile() { - path := filepath.Join( - viper.GetString("spider.path"), - s.Spider.Name, - ) - //爬虫文件有变化,先删除本地文件 - if err := os.RemoveAll(path); err != nil { - log.Errorf("remove spider files error: %s, path: %s", err.Error(), path) - debug.PrintStack() - } -} - -// 检测是否已经下载中 -func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, string) { - key := s.GetLockDownloadKey(spiderId) - key2, err := database.RedisClient.HGet("spider", key) - if err != nil { - return false, key2 - } - if key2 == "" { - return false, key2 - } - return true, key2 -} - -// 下载爬虫 -func (s *SpiderSync) Download() { - spiderId := s.Spider.Id.Hex() - fileId := s.Spider.FileId.Hex() - isDownloading, key := s.CheckDownLoading(spiderId, fileId) - if isDownloading { - log.Infof(fmt.Sprintf("spider is already being downloaded, spider id: %s", s.Spider.Id.Hex())) - return - } else { - _ = database.RedisClient.HSet("spider", key, key) - } - - session, gf := database.GetGridFs("files") - defer session.Close() - - f, err := gf.OpenId(bson.ObjectIdHex(fileId)) - defer utils.Close(f) - if err != nil { - log.Errorf("open file id: " + fileId + ", spider id:" + spiderId + ", error: " + err.Error()) - debug.PrintStack() - return - } - - // 生成唯一ID - randomId := uuid.NewV4() - tmpPath := viper.GetString("other.tmppath") - if !utils.Exists(tmpPath) { - if err := os.MkdirAll(tmpPath, 0777); err != nil { - log.Errorf("mkdir other.tmppath error: %v", err.Error()) - return - } - } - // 创建临时文件 - tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip") - tmpFile := utils.OpenFile(tmpFilePath) - - // 将该文件写入临时文件 - if _, err := io.Copy(tmpFile, f); err != nil { - log.Errorf("copy file error: %s, file_id: %s", err.Error(), f.Id()) - debug.PrintStack() - return - } - - // 解压缩临时文件到目标文件夹 - dstPath := filepath.Join( - viper.GetString("spider.path"), - s.Spider.Name, - ) - if err := utils.DeCompress(tmpFile, dstPath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - //递归修改目标文件夹权限 - // 解决scrapy.setting中开启LOG_ENABLED 和 LOG_FILE时不能创建log文件的问题 - cmd := exec.Command("chmod", "-R", "777", dstPath) - if err := cmd.Run(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 关闭临时文件 - if err := tmpFile.Close(); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - // 删除临时文件 - if err := os.Remove(tmpFilePath); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - - _ = database.RedisClient.HDel("spider", key) -} - -// locks for dependency installation -var installLockMap sync.Map - -// install dependencies -func (s *SpiderSync) InstallDeps() { - langs := utils.GetLangList() - for _, l := range langs { - // no dep file name is found, skip - if l.DepFileName == "" { - continue - } - - // being locked, i.e. installation is running, skip - key := s.Spider.Name + "|" + l.Name - _, locked := installLockMap.Load(key) - if locked { - continue - } - - // no dep file found, skip - if !utils.Exists(path.Join(s.Spider.Src, l.DepFileName)) { - continue - } - - // no dep install executable found, skip - if !utils.Exists(l.DepExecutablePath) { - continue - } - - // lock - installLockMap.Store(key, true) - - // command to install dependencies - cmd := exec.Command(l.DepExecutablePath, strings.Split(l.InstallDepArgs, " ")...) - - // working directory - cmd.Dir = s.Spider.Src - - // compatibility with node.js - if l.ExecutableName == constants.Nodejs { - deps, err := utils.GetPackageJsonDeps(path.Join(s.Spider.Src, l.DepFileName)) - if err != nil { - continue - } - cmd = exec.Command(l.DepExecutablePath, strings.Split(l.InstallDepArgs+" "+strings.Join(deps, " "), " ")...) - } - - // start executing command - output, err := cmd.Output() - if err != nil { - log.Errorf("install dep error: " + err.Error()) - log.Errorf(string(output)) - debug.PrintStack() - } - - // unlock - installLockMap.Delete(key) - } -} diff --git a/backend/services/spider_handler/spider_test.go b/backend/services/spider_handler/spider_test.go deleted file mode 100644 index 66d47455..00000000 --- a/backend/services/spider_handler/spider_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package spider_handler - -import ( - "crawlab/config" - "crawlab/database" - "crawlab/model" - "github.com/apex/log" - "github.com/globalsign/mgo/bson" - "runtime/debug" - "testing" -) - -var s SpiderSync - -func init() { - if err := config.InitConfig("../../conf/config.yml"); err != nil { - log.Fatal("Init config failed") - } - log.Infof("初始化配置成功") - - // 初始化Mongodb数据库 - if err := database.InitMongo(); err != nil { - log.Error("init mongodb error:" + err.Error()) - debug.PrintStack() - panic(err) - } - log.Info("初始化Mongodb数据库成功") - - // 初始化Redis数据库 - if err := database.InitRedis(); err != nil { - log.Error("init redis error:" + err.Error()) - debug.PrintStack() - panic(err) - } - log.Info("初始化Redis数据库成功") - - s = SpiderSync{ - Spider: model.Spider{ - Id: bson.ObjectIdHex("5d8d8326bc3c4f000186e5df"), - Name: "scrapy-pre_sale", - FileId: bson.ObjectIdHex("5d8d8326bc3c4f000186e5db"), - Src: "/opt/crawlab/spiders/scrapy-pre_sale", - }, - } -} - -func TestSpiderSync_CreateMd5File(t *testing.T) { - s.CreateMd5File("this is md5") -} - -func TestSpiderSync_Download(t *testing.T) { - s.Download() -} diff --git a/backend/services/sys_exec/linux_mac.go b/backend/services/sys_exec/linux_mac.go deleted file mode 100644 index 394dce47..00000000 --- a/backend/services/sys_exec/linux_mac.go +++ /dev/null @@ -1,30 +0,0 @@ -// +build !windows - -package sys_exec - -import ( - "os/exec" - "syscall" -) - -func BuildCmd(cmdStr string) *exec.Cmd { - return exec.Command("sh", "-c", cmdStr) -} - -func Setpgid(cmd *exec.Cmd) { - if cmd == nil { - return - } - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - } else { - cmd.SysProcAttr.Setpgid = true - } -} - -func KillProcess(cmd *exec.Cmd) error { - if cmd == nil { - return nil - } - return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) -} diff --git a/backend/services/sys_exec/windows.go b/backend/services/sys_exec/windows.go deleted file mode 100644 index 63199b09..00000000 --- a/backend/services/sys_exec/windows.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build windows - -package sys_exec - -import ( - "os/exec" -) - -func BuildCmd(cmdStr string) *exec.Cmd { - return exec.Command("cmd", "/C", cmdStr) -} - -func Setpgid(cmd *exec.Cmd) { - return -} - -func KillProcess(cmd *exec.Cmd) error { - if cmd != nil && cmd.Process != nil { - if err := cmd.Process.Kill(); err != nil { - return err - } - } - return nil -} diff --git a/backend/services/system.go b/backend/services/system.go deleted file mode 100644 index a4b19e98..00000000 --- a/backend/services/system.go +++ /dev/null @@ -1,393 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/database" - "crawlab/entity" - "crawlab/lib/cron" - "crawlab/model" - "crawlab/services/rpc" - "crawlab/utils" - "encoding/json" - "errors" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/imroc/req" - "os/exec" - "regexp" - "runtime/debug" - "sort" - "strings" - "sync" -) - -// 系统信息 chan 映射 -var SystemInfoChanMap = utils.NewChanMap() - -// 从远端获取系统信息 -func GetRemoteSystemInfo(nodeId string) (sysInfo entity.SystemInfo, err error) { - // 发送消息 - msg := entity.NodeMessage{ - Type: constants.MsgTypeGetSystemInfo, - NodeId: nodeId, - } - - // 序列化 - msgBytes, _ := json.Marshal(&msg) - if _, err := database.RedisClient.Publish("nodes:"+nodeId, utils.BytesToString(msgBytes)); err != nil { - return entity.SystemInfo{}, err - } - - // 通道 - ch := SystemInfoChanMap.ChanBlocked(nodeId) - - // 等待响应,阻塞 - sysInfoStr := <-ch - - // 反序列化 - if err := json.Unmarshal([]byte(sysInfoStr), &sysInfo); err != nil { - return sysInfo, err - } - - return sysInfo, nil -} - -// 获取系统信息 -func GetSystemInfo(nodeId string) (sysInfo entity.SystemInfo, err error) { - if IsMasterNode(nodeId) { - sysInfo, err = rpc.GetSystemInfoServiceLocal() - } else { - sysInfo, err = rpc.GetSystemInfoServiceRemote(nodeId) - } - return -} - -// 获取语言列表 -func GetLangList(nodeId string) []entity.Lang { - list := utils.GetLangList() - for i, lang := range list { - status, _ := GetLangInstallStatus(nodeId, lang) - list[i].InstallStatus = status - } - return list -} - -// 获取语言安装状态 -func GetLangInstallStatus(nodeId string, lang entity.Lang) (string, error) { - _, err := model.GetTaskByFilter(bson.M{ - "node_id": nodeId, - "cmd": fmt.Sprintf("sh %s", utils.GetSystemScriptPath(lang.InstallScript)), - "status": bson.M{ - "$in": []string{constants.StatusPending, constants.StatusRunning}, - }, - }) - if err == nil { - // 任务正在运行,正在安装 - return constants.InstallStatusInstalling, nil - } - if err != mgo.ErrNotFound { - // 发生错误 - return "", err - } - // 获取状态 - if IsMasterNode(nodeId) { - lang := rpc.GetLangLocal(lang) - return lang.InstallStatus, nil - } else { - lang, err := rpc.GetLangRemote(nodeId, lang) - if err != nil { - return "", err - } - return lang.InstallStatus, nil - } -} - -// 是否已安装该依赖 -func IsInstalledDep(installedDepList []entity.Dependency, dep entity.Dependency) bool { - for _, _dep := range installedDepList { - if strings.ToLower(_dep.Name) == strings.ToLower(dep.Name) { - return true - } - } - return false -} - -// ========Python======== - -// 初始化函数 -func InitDepsFetcher() error { - c := cron.New(cron.WithSeconds()) - c.Start() - if _, err := c.AddFunc("0 */5 * * * *", UpdatePythonDepList); err != nil { - return err - } - - go func() { - UpdatePythonDepList() - }() - return nil -} - -type PythonDepJsonData struct { - Info PythonDepJsonDataInfo `json:"info"` -} - -type PythonDepJsonDataInfo struct { - Name string `json:"name"` - Summary string `json:"summary"` - Version string `json:"version"` -} - -type PythonDepNameDict struct { - Name string `json:"name"` - Weight int `json:"weight"` -} - -type PythonDepNameDictSlice []PythonDepNameDict - -func (s PythonDepNameDictSlice) Len() int { return len(s) } -func (s PythonDepNameDictSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s PythonDepNameDictSlice) Less(i, j int) bool { return s[i].Weight > s[j].Weight } - -// 获取Python本地依赖列表 -func GetPythonDepList(nodeId string, searchDepName string) ([]entity.Dependency, error) { - var list []entity.Dependency - - // 先从 Redis 获取 - depList, err := GetPythonDepListFromRedis() - if err != nil { - return list, err - } - - // 过滤相似的依赖 - var depNameList PythonDepNameDictSlice - for _, depName := range depList { - if strings.HasPrefix(strings.ToLower(depName), strings.ToLower(searchDepName)) { - var weight int - if strings.ToLower(depName) == strings.ToLower(searchDepName) { - weight = 3 - } else if strings.HasPrefix(strings.ToLower(depName), strings.ToLower(searchDepName)) { - weight = 2 - } else { - weight = 1 - } - depNameList = append(depNameList, PythonDepNameDict{ - Name: depName, - Weight: weight, - }) - } - } - - // 获取已安装依赖列表 - var installedDepList []entity.Dependency - if IsMasterNode(nodeId) { - installedDepList, err = rpc.GetInstalledDepsLocal(constants.Python) - if err != nil { - return list, err - } - } else { - installedDepList, err = rpc.GetInstalledDepsRemote(nodeId, constants.Python) - if err != nil { - return list, err - } - } - - // 根据依赖名排序 - sort.Stable(depNameList) - - // 遍历依赖名列表,取前20个 - for i, depNameDict := range depNameList { - if i > 20 { - break - } - dep := entity.Dependency{ - Name: depNameDict.Name, - } - dep.Installed = IsInstalledDep(installedDepList, dep) - list = append(list, dep) - } - - // 从依赖源获取信息 - //list, err = GetPythonDepListWithInfo(list) - - return list, nil -} - -// 获取Python依赖的源数据信息 -func GetPythonDepListWithInfo(depList []entity.Dependency) ([]entity.Dependency, error) { - var goSync sync.WaitGroup - for i, dep := range depList { - if i > 10 { - break - } - goSync.Add(1) - go func(i int, dep entity.Dependency, depList []entity.Dependency, n *sync.WaitGroup) { - url := fmt.Sprintf("https://pypi.org/pypi/%s/json", dep.Name) - res, err := req.Get(url) - if err != nil { - n.Done() - return - } - var data PythonDepJsonData - if err := res.ToJSON(&data); err != nil { - n.Done() - return - } - depList[i].Version = data.Info.Version - depList[i].Description = data.Info.Summary - n.Done() - }(i, dep, depList, &goSync) - } - goSync.Wait() - return depList, nil -} - -func FetchPythonDepInfo(depName string) (entity.Dependency, error) { - url := fmt.Sprintf("https://pypi.org/pypi/%s/json", depName) - res, err := req.Get(url) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return entity.Dependency{}, err - } - var data PythonDepJsonData - if res.Response().StatusCode == 404 { - return entity.Dependency{}, errors.New("get depName from [https://pypi.org] error: 404") - } - if err := res.ToJSON(&data); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return entity.Dependency{}, err - } - dep := entity.Dependency{ - Name: depName, - Version: data.Info.Version, - Description: data.Info.Summary, - } - return dep, nil -} - -// 从Redis获取Python依赖列表 -func GetPythonDepListFromRedis() ([]string, error) { - var list []string - - // 从 Redis 获取字符串 - rawData, err := database.RedisClient.HGet("system", "deps:python") - if err != nil { - return list, err - } - - // 反序列化 - if err := json.Unmarshal([]byte(rawData), &list); err != nil { - return list, err - } - - // 如果为空,则从依赖源获取列表 - if len(list) == 0 { - UpdatePythonDepList() - } - - return list, nil -} - -// 从Python依赖源获取依赖列表并返回 -func FetchPythonDepList() ([]string, error) { - // 依赖URL - url := "https://pypi.tuna.tsinghua.edu.cn/simple" - - // 输出列表 - var list []string - - // 请求URL - res, err := req.Get(url) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return list, err - } - - // 获取响应数据 - text, err := res.ToString() - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return list, err - } - - // 从响应数据中提取依赖名 - regex := regexp.MustCompile("(.*)") - for _, line := range strings.Split(text, "\n") { - arr := regex.FindStringSubmatch(line) - if len(arr) < 2 { - continue - } - list = append(list, arr[1]) - } - - // 赋值给列表 - return list, nil -} - -// 更新Python依赖列表到Redis -func UpdatePythonDepList() { - // 从依赖源获取列表 - list, _ := FetchPythonDepList() - - // 序列化 - listBytes, err := json.Marshal(list) - if err != nil { - log.Error(err.Error()) - debug.PrintStack() - return - } - - // 设置Redis - if err := database.RedisClient.HSet("system", "deps:python", string(listBytes)); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return - } -} - -// ========./Python======== - -// ========Node.js======== - -// 获取Nodejs本地依赖列表 -func GetNodejsDepList(nodeId string, searchDepName string) (depList []entity.Dependency, err error) { - // 执行shell命令 - cmd := exec.Command("npm", "search", "--json", searchDepName) - outputBytes, _ := cmd.Output() - - // 获取已安装依赖列表 - var installedDepList []entity.Dependency - if IsMasterNode(nodeId) { - installedDepList, err = rpc.GetInstalledDepsLocal(constants.Nodejs) - if err != nil { - return depList, err - } - } else { - installedDepList, err = rpc.GetInstalledDepsRemote(nodeId, constants.Nodejs) - if err != nil { - return depList, err - } - } - - // 反序列化 - if err := json.Unmarshal(outputBytes, &depList); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return depList, err - } - - // 遍历安装列表 - for i, dep := range depList { - depList[i].Installed = IsInstalledDep(installedDepList, dep) - } - - return depList, nil -} - -// ========./Node.js======== diff --git a/backend/services/task.go b/backend/services/task.go deleted file mode 100644 index 78eb576a..00000000 --- a/backend/services/task.go +++ /dev/null @@ -1,1051 +0,0 @@ -package services - -import ( - "bufio" - "crawlab/constants" - "crawlab/database" - "crawlab/lib/cron" - "crawlab/model" - "crawlab/services/local_node" - "crawlab/services/notification" - "crawlab/services/rpc" - "crawlab/services/spider_handler" - "crawlab/services/sys_exec" - "crawlab/utils" - "encoding/json" - "errors" - "fmt" - "github.com/apex/log" - "github.com/globalsign/mgo" - "github.com/globalsign/mgo/bson" - "github.com/imroc/req" - "github.com/satori/go.uuid" - "github.com/spf13/viper" - "net/http" - "os" - "os/exec" - "path/filepath" - "runtime/debug" - "strconv" - "strings" - "sync" - "time" -) - -var Exec *Executor - -// 任务执行锁 -//Added by cloud: 2019/09/04,solve data race -var LockList sync.Map - -// 任务消息 -type TaskMessage struct { - Id string - Cmd string -} - -// 序列化任务消息 -func (m *TaskMessage) ToString() (string, error) { - data, err := json.Marshal(&m) - if err != nil { - return "", err - } - return utils.BytesToString(data), err -} - -// 任务执行器 -type Executor struct { - Cron *cron.Cron -} - -// 启动任务执行器 -func (ex *Executor) Start() error { - // 启动cron服务 - ex.Cron.Start() - - // 加入执行器到定时任务 - spec := "0/1 * * * * *" // 每秒执行一次 - for i := 0; i < viper.GetInt("task.workers"); i++ { - // WorkerID - id := i - - // 初始化任务锁 - LockList.Store(id, false) - - // 加入定时任务 - _, err := ex.Cron.AddFunc(spec, GetExecuteTaskFunc(id)) - if err != nil { - return err - } - } - - return nil -} - -// 派发任务 -func AssignTask(task model.Task) error { - // 生成任务信息 - msg := TaskMessage{ - Id: task.Id, - } - - // 序列化 - msgStr, err := msg.ToString() - if err != nil { - return err - } - - // 队列名称 - var queue string - if utils.IsObjectIdNull(task.NodeId) { - queue = "tasks:public" - } else { - queue = "tasks:node:" + task.NodeId.Hex() - } - - // 任务入队 - if err := database.RedisClient.RPush(queue, msgStr); err != nil { - return err - } - return nil -} - -// 设置环境变量 -func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spider) *exec.Cmd { - // 默认把Node.js的全局node_modules加入环境变量 - envPath := os.Getenv("PATH") - nodePath := "/usr/lib/node_modules" - if !strings.Contains(envPath, nodePath) { - _ = os.Setenv("PATH", nodePath+":"+envPath) - } - _ = os.Setenv("NODE_PATH", nodePath) - - // default results collection - col := utils.GetSpiderCol(spider.Col, spider.Name) - - // 默认环境变量 - cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id) - cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+col) - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host")) - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port")) - if viper.GetString("mongo.db") != "" { - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_DB="+viper.GetString("mongo.db")) - } - if viper.GetString("mongo.username") != "" { - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_USERNAME="+viper.GetString("mongo.username")) - } - if viper.GetString("mongo.password") != "" { - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PASSWORD="+viper.GetString("mongo.password")) - } - if viper.GetString("mongo.authSource") != "" { - cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_AUTHSOURCE="+viper.GetString("mongo.authSource")) - } - cmd.Env = append(cmd.Env, "PYTHONUNBUFFERED=0") - cmd.Env = append(cmd.Env, "PYTHONIOENCODING=utf-8") - cmd.Env = append(cmd.Env, "TZ=Asia/Shanghai") - cmd.Env = append(cmd.Env, "CRAWLAB_DEDUP_FIELD="+spider.DedupField) - cmd.Env = append(cmd.Env, "CRAWLAB_DEDUP_METHOD="+spider.DedupMethod) - if spider.IsDedup { - cmd.Env = append(cmd.Env, "CRAWLAB_IS_DEDUP=1") - } else { - cmd.Env = append(cmd.Env, "CRAWLAB_IS_DEDUP=0") - } - - //任务环境变量 - for _, env := range envs { - cmd.Env = append(cmd.Env, env.Name+"="+env.Value) - } - - // 全局环境变量 - variables := model.GetVariableList() - for _, variable := range variables { - cmd.Env = append(cmd.Env, variable.Key+"="+variable.Value) - } - return cmd -} - -func SetLogConfig(wg *sync.WaitGroup, cmd *exec.Cmd, t model.Task, u model.User) error { - - esChan := make(chan string, 1) - esClientStr := viper.GetString("setting.esClient") - spiderLogIndex := viper.GetString("setting.spiderLogIndex") - // get stdout reader - stdout, err := cmd.StdoutPipe() - if err != nil { - log.Errorf("get stdout error: %s", err.Error()) - debug.PrintStack() - return err - } - readerStdout := bufio.NewReader(stdout) - - // get stderr reader - stderr, err := cmd.StderrPipe() - if err != nil { - log.Errorf("get stdout error: %s", err.Error()) - debug.PrintStack() - return err - } - readerStderr := bufio.NewReader(stderr) - - var seq int64 - var logs []model.LogItem - isStdoutFinished := false - isStderrFinished := false - - // periodically (5 sec) insert log items - wg.Add(3) - go func() { - defer wg.Done() - for { - _ = model.AddLogItems(logs) - logs = []model.LogItem{} - if isStdoutFinished && isStderrFinished { - break - } - time.Sleep(5 * time.Second) - } - }() - - // expire duration (in seconds) - expireDuration := u.Setting.LogExpireDuration - if expireDuration == 0 { - // by default 1 day - expireDuration = 3600 * 24 - } - - // read stdout - go func() { - defer wg.Done() - for { - line, err := readerStdout.ReadString('\n') - if err != nil { - isStdoutFinished = true - break - } - line = strings.Replace(line, "\n", "", -1) - seq++ - l := model.LogItem{ - Id: bson.NewObjectId(), - Seq: seq, - Message: line, - TaskId: t.Id, - Ts: time.Now(), - ExpireTs: time.Now().Add(time.Duration(expireDuration) * time.Second), - } - - if esClientStr != "" { - esChan <- l.Message - go database.WriteMsgToES(time.Now(), esChan, spiderLogIndex) - } - - logs = append(logs, l) - } - }() - - // read stderr - go func() { - defer wg.Done() - for { - line, err := readerStderr.ReadString('\n') - if err != nil { - isStderrFinished = true - break - } - line = strings.Replace(line, "\n", "", -1) - seq++ - l := model.LogItem{ - Id: bson.NewObjectId(), - Seq: seq, - Message: line, - TaskId: t.Id, - Ts: time.Now(), - ExpireTs: time.Now().Add(time.Duration(expireDuration) * time.Second), - } - - if esClientStr != "" { - esChan <- l.Message - go database.WriteMsgToES(time.Now(), esChan, spiderLogIndex) - } - logs = append(logs, l) - } - }() - - wg.Wait() - return nil -} - -func FinishOrCancelTask(ch chan string, cmd *exec.Cmd, s model.Spider, t model.Task) { - // 传入信号,此处阻塞 - signal := <-ch - log.Infof("process received signal: %s", signal) - - if signal == constants.TaskCancel && cmd.Process != nil { - // 终止进程 - if err := sys_exec.KillProcess(cmd); err != nil { - log.Errorf("process kill error: %s", err.Error()) - debug.PrintStack() - - t.Error = "kill process error: " + err.Error() - t.Status = constants.StatusError - } else { - t.Error = "user kill the process ..." - t.Status = constants.StatusCancelled - } - } else { - // 保存任务 - t.Status = constants.StatusFinished - } - - t.FinishTs = time.Now() - _ = t.Save() - - go FinishUpTask(s, t) -} - -func StartTaskProcess(cmd *exec.Cmd, t model.Task) error { - if err := cmd.Start(); err != nil { - log.Errorf("start spider error:{}", err.Error()) - debug.PrintStack() - - t.Error = "start task error: " + err.Error() - t.Status = constants.StatusError - t.FinishTs = time.Now() - _ = t.Save() - return err - } - return nil -} - -func WaitTaskProcess(cmd *exec.Cmd, t model.Task, s model.Spider) error { - if err := cmd.Wait(); err != nil { - log.Errorf("wait process finish error: %s", err.Error()) - debug.PrintStack() - - if exitError, ok := err.(*exec.ExitError); ok { - exitCode := exitError.ExitCode() - log.Errorf("exit error, exit code: %d", exitCode) - - // 非kill 的错误类型 - if exitCode != -1 { - // 非手动kill保存为错误状态 - t.Error = err.Error() - t.FinishTs = time.Now() - t.Status = constants.StatusError - _ = t.Save() - - FinishUpTask(s, t) - } - } - - return err - } - - return nil -} - -// 执行shell命令 -func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider, u model.User) (err error) { - log.Infof("cwd: %s", cwd) - log.Infof("cmd: %s", cmdStr) - - wg := &sync.WaitGroup{} - - // 生成执行命令 - cmd := sys_exec.BuildCmd(cmdStr) - - // 工作目录 - cmd.Dir = cwd - - // 日志配置 - go SetLogConfig(wg, cmd, t, u) - - // 环境变量配置 - envs := s.Envs - if s.Type == constants.Configurable { - // 数据库配置 - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_HOST", Value: viper.GetString("mongo.host")}) - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PORT", Value: viper.GetString("mongo.port")}) - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_DB", Value: viper.GetString("mongo.db")}) - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_USERNAME", Value: viper.GetString("mongo.username")}) - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PASSWORD", Value: viper.GetString("mongo.password")}) - envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_AUTHSOURCE", Value: viper.GetString("mongo.authSource")}) - - // 设置配置 - for envName, envValue := range s.Config.Settings { - envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue}) - } - } - cmd = SetEnv(cmd, envs, t, s) - - // 起一个goroutine来监控进程 - ch := utils.TaskExecChanMap.ChanBlocked(t.Id) - go FinishOrCancelTask(ch, cmd, s, t) - - // kill的时候,可以kill所有的子进程 - sys_exec.Setpgid(cmd) - - // 启动进程 - if err := StartTaskProcess(cmd, t); err != nil { - return err - } - - // 同步等待进程完成 - if err := WaitTaskProcess(cmd, t, s); err != nil { - return err - } - - // 如果返回值不为0,返回错误 - returnCode := cmd.ProcessState.ExitCode() - if returnCode != 0 { - log.Errorf(fmt.Sprintf("task returned code not zero: %d", returnCode)) - debug.PrintStack() - return errors.New(fmt.Sprintf("task returned code not zero: %d", returnCode)) - } - - ch <- constants.TaskFinish - return nil -} - -// 生成执行任务方法 -func GetExecuteTaskFunc(id int) func() { - return func() { - ExecuteTask(id) - } -} - -func GetWorkerPrefix(id int) string { - return "[Worker " + strconv.Itoa(id) + "] " -} - -// 统计任务结果数 -func SaveTaskResultCount(id string) func() { - return func() { - if err := model.UpdateTaskResultCount(id); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return - } - } -} - -// Scan Error Logs -func ScanErrorLogs(t model.Task) func() { - return func() { - u, err := model.GetUser(t.UserId) - if err != nil { - return - } - if err := model.UpdateTaskErrorLogs(t.Id, u.Setting.ErrorRegexPattern); err != nil { - return - } - if err := model.UpdateErrorLogCount(t.Id); err != nil { - return - } - } -} - -// 执行任务 -func ExecuteTask(id int) { - if flag, ok := LockList.Load(id); ok { - if flag.(bool) { - log.Debugf(GetWorkerPrefix(id) + "running tasks...") - return - } - } - - // 上锁 - LockList.Store(id, true) - - // 解锁(延迟执行) - defer func() { - LockList.Delete(id) - LockList.Store(id, false) - }() - - // 开始计时 - tic := time.Now() - - // 获取当前节点 - //node, err := model.GetCurrentNode() - //if err != nil { - // log.Errorf("execute task get current node error: %s", err.Error()) - // debug.PrintStack() - // return - //} - node := local_node.CurrentNode() - - // 节点队列 - queueCur := "tasks:node:" + node.Id.Hex() - - // 节点队列任务 - var msg string - var err error - if msg, err = database.RedisClient.LPop(queueCur); err != nil { - // 节点队列没有任务,获取公共队列任务 - queuePub := "tasks:public" - if msg, err = database.RedisClient.LPop(queuePub); err != nil { - } - } - - // 如果没有获取到任务,返回 - if msg == "" { - return - } - - // 反序列化 - tMsg := TaskMessage{} - if err := json.Unmarshal([]byte(msg), &tMsg); err != nil { - log.Errorf("json string to struct error: %s", err.Error()) - return - } - - // 获取任务 - t, err := model.GetTask(tMsg.Id) - if err != nil { - log.Errorf("execute task, get task error: %s", err.Error()) - return - } - - // 获取爬虫 - var spider model.Spider - if t.Type == constants.TaskTypeSpider { - spider, err = t.GetSpider() - if err != nil { - log.Errorf("execute task, get spider error: %s", err.Error()) - return - } - } - - // 工作目录 - cwd := filepath.Join( - viper.GetString("spider.path"), - spider.Name, - ) - - // 执行命令 - var cmd string - if t.Type == constants.TaskTypeSpider { - // 爬虫任务 - if spider.Type == constants.Configurable { - // 可配置爬虫命令 - cmd = "scrapy crawl config_spider" - } else { - // 自定义爬虫命令 - cmd = spider.Cmd - } - t.Cmd = cmd - } else if t.Type == constants.TaskTypeSystem { - // 系统任务 - cmd = t.Cmd - } - - // 加入参数 - if t.Param != "" { - cmd += " " + t.Param - } - - // 获得触发任务用户 - user, err := model.GetUser(t.UserId) - if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - return - } - - // 任务赋值 - t.NodeId = node.Id // 任务节点信息 - t.StartTs = time.Now() // 任务开始时间 - t.Status = constants.StatusRunning // 任务状态 - t.WaitDuration = t.StartTs.Sub(t.CreateTs).Seconds() // 等待时长 - - // 储存任务 - _ = t.Save() - - // 发送 Web Hook 请求 (任务开始) - go SendWebHookRequest(user, t, spider) - - // 爬虫任务专属逻辑 - if t.Type == constants.TaskTypeSpider { - // 文件检查 - if err := SpiderFileCheck(t, spider); err != nil { - log.Errorf("spider file check error: %s", err.Error()) - return - } - - // 开始执行任务 - log.Infof(GetWorkerPrefix(id) + "start task (id:" + t.Id + ")") - - // 创建结果集索引 - go func() { - col := utils.GetSpiderCol(spider.Col, spider.Name) - CreateResultsIndexes(col) - }() - - // 起一个cron执行器来统计任务结果数 - cronExec := cron.New(cron.WithSeconds()) - _, err = cronExec.AddFunc("*/5 * * * * *", SaveTaskResultCount(t.Id)) - if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() - return - } - cronExec.Start() - defer cronExec.Stop() - - // 起一个cron来更新错误日志 - cronExecErrLog := cron.New(cron.WithSeconds()) - _, err = cronExecErrLog.AddFunc("*/30 * * * * *", ScanErrorLogs(t)) - if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - debug.PrintStack() - return - } - cronExecErrLog.Start() - defer cronExecErrLog.Stop() - } - - // 执行Shell命令 - if err := ExecuteShellCmd(cmd, cwd, t, spider, user); err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - - // 如果发生错误,则发送通知 - t, _ = model.GetTask(t.Id) - if user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskEnd || user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskError { - SendNotifications(user, t, spider) - } - - // 发送 Web Hook 请求 (任务开始) - go SendWebHookRequest(user, t, spider) - - return - } - - // 完成进程 - t, err = model.GetTask(t.Id) - if err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - return - } - - // 统计数据 - t.Status = constants.StatusFinished // 任务状态: 已完成 - t.FinishTs = time.Now() // 结束时间 - t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长 - t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长 - - // 发送 Web Hook 请求 (任务结束) - go SendWebHookRequest(user, t, spider) - - // 如果是任务结束时发送通知,则发送通知 - if user.Setting.NotificationTrigger == constants.NotificationTriggerOnTaskEnd { - SendNotifications(user, t, spider) - } - - // 保存任务 - if err := t.Save(); err != nil { - log.Errorf(GetWorkerPrefix(id) + err.Error()) - return - } - - // 完成任务收尾工作 - go FinishUpTask(spider, t) - - // 结束计时 - toc := time.Now() - - // 统计时长 - duration := toc.Sub(tic).Seconds() - durationStr := strconv.FormatFloat(duration, 'f', 6, 64) - log.Infof(GetWorkerPrefix(id) + "task (id:" + t.Id + ")" + " finished. elapsed:" + durationStr + " sec") -} - -func FinishUpTask(s model.Spider, t model.Task) { - // 更新任务结果数 - if t.Type == constants.TaskTypeSpider { - go func() { - if err := model.UpdateTaskResultCount(t.Id); err != nil { - return - } - }() - } - - // 更新任务错误日志 - go func() { - ScanErrorLogs(t)() - }() -} - -func SpiderFileCheck(t model.Task, spider model.Spider) error { - // 判断爬虫文件是否存在 - gfFile := model.GetGridFs(spider.FileId) - if gfFile == nil { - t.Error = "cannot find spider files, please re-upload" - t.Status = constants.StatusError - t.FinishTs = time.Now() // 结束时间 - t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长 - t.TotalDuration = t.FinishTs.Sub(t.CreateTs).Seconds() // 总时长 - _ = t.Save() - return errors.New(t.Error) - } - - // 判断md5值是否一致 - path := filepath.Join(viper.GetString("spider.path"), spider.Name) - md5File := filepath.Join(path, spider_handler.Md5File) - md5 := utils.GetSpiderMd5Str(md5File) - if gfFile.Md5 != md5 { - spiderSync := spider_handler.SpiderSync{Spider: spider} - spiderSync.RemoveDownCreate(gfFile.Md5) - } - return nil -} - -func GetTaskLog(id string, keyword string, page int, pageSize int) (logItems []model.LogItem, logTotal int, err error) { - task, err := model.GetTask(id) - if err != nil { - return - } - - logItems, logTotal, err = task.GetLogItems(keyword, page, pageSize) - if err != nil { - return logItems, logTotal, err - } - - return logItems, logTotal, nil -} - -func GetTaskErrorLog(id string, n int) (errLogItems []model.ErrorLogItem, err error) { - if n == 0 { - n = 1000 - } - - task, err := model.GetTask(id) - if err != nil { - return - } - errLogItems, err = task.GetErrorLogItems(n) - if err != nil { - return - } - return errLogItems, nil -} - -func CancelTask(id string) (err error) { - // 获取任务 - task, err := model.GetTask(id) - if err != nil { - log.Errorf("task not found, task id : %s, error: %s", id, err.Error()) - debug.PrintStack() - return err - } - - // 如果任务状态不为pending或running,返回错误 - if task.Status != constants.StatusPending && task.Status != constants.StatusRunning { - return errors.New("task is not cancellable") - } - - // 获取当前节点(默认当前节点为主节点) - //node, err := model.GetCurrentNode() - //if err != nil { - // log.Errorf("get current node error: %s", err.Error()) - // debug.PrintStack() - // return err - //} - node := local_node.CurrentNode() - - log.Infof("current node id is: %s", node.Id.Hex()) - log.Infof("task node id is: %s", task.NodeId.Hex()) - - if node.Id == task.NodeId { - // 任务节点为主节点 - if err := rpc.CancelTaskLocal(task.Id, task.NodeId.Hex()); err != nil { - return err - } - } else { - // 任务节点为工作节点 - if err := rpc.CancelTaskRemote(task.Id, task.NodeId.Hex()); err != nil { - return err - } - } - - return nil -} - -func RestartTask(id string, uid bson.ObjectId) (err error) { - // 获取任务 - oldTask, err := model.GetTask(id) - if err != nil { - log.Errorf("task not found, task id : %s, error: %s", id, err.Error()) - debug.PrintStack() - return err - } - - newTask := model.Task{ - SpiderId: oldTask.SpiderId, - NodeId: oldTask.NodeId, - Cmd: oldTask.Cmd, - Param: oldTask.Param, - UserId: uid, - RunType: oldTask.RunType, - ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), - Type: oldTask.Type, - } - - // 加入任务队列 - _, err = AddTask(newTask) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return err - } - - return nil -} - -func AddTask(t model.Task) (string, error) { - // 生成任务ID - id := uuid.NewV4() - t.Id = id.String() - - // 设置任务状态 - t.Status = constants.StatusPending - - // 如果没有传入node_id,则置为null - if t.NodeId.Hex() == "" { - t.NodeId = bson.ObjectIdHex(constants.ObjectIdNull) - } - - // 将任务存入数据库 - if err := model.AddTask(t); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return t.Id, err - } - - // 加入任务队列 - if err := AssignTask(t); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return t.Id, err - } - - return t.Id, nil -} - -func GetTaskEmailMarkdownContent(t model.Task, s model.Spider) string { - n, _ := model.GetNode(t.NodeId) - errMsg := "" - statusMsg := fmt.Sprintf(`%s`, t.Status) - if t.Status == constants.StatusError { - errMsg = " with errors" - statusMsg = fmt.Sprintf(`%s`, t.Status) - } - return fmt.Sprintf(` -Your task has finished%s. Please find the task info below. - - | ---: | :-- -**Task ID:** | %s -**Task Status:** | %s -**Task Param:** | %s -**Spider ID:** | %s -**Spider Name:** | %s -**Node:** | %s -**Create Time:** | %s -**Start Time:** | %s -**Finish Time:** | %s -**Wait Duration:** | %.0f sec -**Runtime Duration:** | %.0f sec -**Total Duration:** | %.0f sec -**Number of Results:** | %d -**Error:** | %s - -Please login to Crawlab to view the details. -`, - errMsg, - t.Id, - statusMsg, - t.Param, - s.Id.Hex(), - s.Name, - n.Name, - utils.GetLocalTimeString(t.CreateTs), - utils.GetLocalTimeString(t.StartTs), - utils.GetLocalTimeString(t.FinishTs), - t.WaitDuration, - t.RuntimeDuration, - t.TotalDuration, - t.ResultCount, - t.Error, - ) -} - -func GetTaskMarkdownContent(t model.Task, s model.Spider) string { - n, _ := model.GetNode(t.NodeId) - errMsg := "" - errLog := "-" - statusMsg := fmt.Sprintf(`%s`, t.Status) - if t.Status == constants.StatusError { - errMsg = `(有错误)` - errLog = fmt.Sprintf(`%s`, t.Error) - statusMsg = fmt.Sprintf(`%s`, t.Status) - } - return fmt.Sprintf(` -您的任务已完成%s,请查看任务信息如下。 - -> **任务ID:** %s -> **任务状态:** %s -> **任务参数:** %s -> **爬虫ID:** %s -> **爬虫名称:** %s -> **节点:** %s -> **创建时间:** %s -> **开始时间:** %s -> **完成时间:** %s -> **等待时间:** %.0f秒 -> **运行时间:** %.0f秒 -> **总时间:** %.0f秒 -> **结果数:** %d -> **错误:** %s - -请登录Crawlab查看详情。 -`, - errMsg, - t.Id, - statusMsg, - t.Param, - s.Id.Hex(), - s.Name, - n.Name, - utils.GetLocalTimeString(t.CreateTs), - utils.GetLocalTimeString(t.StartTs), - utils.GetLocalTimeString(t.FinishTs), - t.WaitDuration, - t.RuntimeDuration, - t.TotalDuration, - t.ResultCount, - errLog, - ) -} - -func CreateResultsIndexes(col string) { - s, c := database.GetCol(col) - defer s.Close() - - _ = c.EnsureIndex(mgo.Index{ - Key: []string{"task_id"}, - }) -} - -func SendTaskEmail(u model.User, t model.Task, s model.Spider) { - statusMsg := "has finished" - if t.Status == constants.StatusError { - statusMsg = "has an error" - } - title := fmt.Sprintf("[Crawlab] Task for \"%s\" %s", s.Name, statusMsg) - if err := notification.SendMail( - u.Email, - u.Username, - title, - GetTaskEmailMarkdownContent(t, s), - ); err != nil { - log.Errorf("mail error: " + err.Error()) - debug.PrintStack() - } -} - -func SendTaskDingTalk(u model.User, t model.Task, s model.Spider) { - statusMsg := "已完成" - if t.Status == constants.StatusError { - statusMsg = "发生错误" - } - title := fmt.Sprintf("[Crawlab] \"%s\" 任务%s", s.Name, statusMsg) - content := GetTaskMarkdownContent(t, s) - if err := notification.SendMobileNotification(u.Setting.DingTalkRobotWebhook, title, content); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } -} - -func SendTaskWechat(u model.User, t model.Task, s model.Spider) { - content := GetTaskMarkdownContent(t, s) - if err := notification.SendMobileNotification(u.Setting.WechatRobotWebhook, "", content); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - } -} - -func SendNotifications(u model.User, t model.Task, s model.Spider) { - if u.Email != "" && utils.StringArrayContains(u.Setting.EnabledNotifications, constants.NotificationTypeMail) { - go func() { - SendTaskEmail(u, t, s) - }() - } - - if u.Setting.DingTalkRobotWebhook != "" && utils.StringArrayContains(u.Setting.EnabledNotifications, constants.NotificationTypeDingTalk) { - go func() { - SendTaskDingTalk(u, t, s) - }() - } - - if u.Setting.WechatRobotWebhook != "" && utils.StringArrayContains(u.Setting.EnabledNotifications, constants.NotificationTypeWechat) { - go func() { - SendTaskWechat(u, t, s) - }() - } -} - -func SendWebHookRequest(u model.User, t model.Task, s model.Spider) { - type RequestBody struct { - Status string `json:"status"` - Task model.Task `json:"task"` - Spider model.Spider `json:"spider"` - UserName string `json:"user_name"` - } - - if s.IsWebHook && s.WebHookUrl != "" { - // request header - header := req.Header{ - "Content-Type": "application/json; charset=utf-8", - } - - // request body - reqBody := RequestBody{ - Status: t.Status, - UserName: u.Username, - Task: t, - Spider: s, - } - - // make POST http request - res, err := req.Post(s.WebHookUrl, header, req.BodyJSON(reqBody)) - if err != nil { - log.Errorf("sent web hook request with error: " + err.Error()) - debug.PrintStack() - return - } - if res.Response().StatusCode != http.StatusOK { - log.Errorf(fmt.Sprintf("sent web hook request with error http code: %d, task_id: %s, status: %s", res.Response().StatusCode, t.Id, t.Status)) - debug.PrintStack() - return - } - log.Infof(fmt.Sprintf("sent web hook request, task_id: %s, status: %s)", t.Id, t.Status)) - } -} - -func InitTaskExecutor() error { - // 构造任务执行器 - c := cron.New(cron.WithSeconds()) - Exec = &Executor{ - Cron: c, - } - - // 如果不允许主节点运行任务,则跳过 - if model.IsMaster() && viper.GetString("setting.runOnMaster") == "N" { - return nil - } - - // 运行定时任务 - if err := Exec.Start(); err != nil { - return err - } - return nil -} diff --git a/backend/services/user.go b/backend/services/user.go deleted file mode 100644 index fbad2c71..00000000 --- a/backend/services/user.go +++ /dev/null @@ -1,133 +0,0 @@ -package services - -import ( - "crawlab/constants" - "crawlab/model" - "crawlab/utils" - "errors" - "github.com/dgrijalva/jwt-go" - "github.com/gin-gonic/gin" - "github.com/globalsign/mgo/bson" - "github.com/spf13/viper" - "strings" - "time" -) - -func InitUserService() error { - _ = CreateNewUser("admin", "admin", constants.RoleAdmin, "", bson.ObjectIdHex(constants.ObjectIdNull)) - return nil -} - -func MakeToken(user *model.User) (tokenStr string, err error) { - token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ - "id": user.Id, - "username": user.Username, - "nbf": time.Now().Unix(), - }) - - return token.SignedString([]byte(viper.GetString("server.secret"))) - -} - -//func GetToken(username string) (tokenStr string, err error) { -// user, err := model.GetUserByUsername(username) -// if err != nil { -// log.Errorf(err.Error()) -// debug.PrintStack() -// return -// } -// -// token := jwt.NewWithClaims(jwt.SigningMethodHS256, jwt.MapClaims{ -// "id": user.Id, -// "username": user.Username, -// "nbf": time.Now().Unix(), -// }) -// -// tokenStr, err = token.SignedString([]byte(viper.GetString("server.secret"))) -// if err != nil { -// return -// } -// return -//} - -func SecretFunc() jwt.Keyfunc { - return func(token *jwt.Token) (interface{}, error) { - return []byte(viper.GetString("server.secret")), nil - } -} - -func CheckToken(tokenStr string) (user model.User, err error) { - token, err := jwt.Parse(tokenStr, SecretFunc()) - if err != nil { - return - } - - claim, ok := token.Claims.(jwt.MapClaims) - if !ok { - err = errors.New("cannot convert claim to mapclaim") - return - } - - //验证token,如果token被修改过则为false - if !token.Valid { - err = errors.New("token is invalid") - return - } - - id := bson.ObjectIdHex(claim["id"].(string)) - username := claim["username"].(string) - user, err = model.GetUser(id) - if err != nil { - err = errors.New("cannot get user") - return - } - - if username != user.Username { - err = errors.New("username does not match") - return - } - - return -} - -func CreateNewUser(username string, password string, role string, email string, uid bson.ObjectId) error { - user := model.User{ - Username: strings.ToLower(username), - Password: utils.EncryptPassword(password), - Role: role, - Email: email, - UserId: uid, - Setting: model.UserSetting{ - NotificationTrigger: constants.NotificationTriggerNever, - EnabledNotifications: []string{ - constants.NotificationTypeMail, - constants.NotificationTypeDingTalk, - constants.NotificationTypeWechat, - }, - }, - } - if err := user.Add(); err != nil { - return err - } - return nil -} - -func GetCurrentUser(c *gin.Context) *model.User { - data, _ := c.Get(constants.ContextUser) - if data == nil { - return &model.User{} - } - return data.(*model.User) -} - -func GetCurrentUserId(c *gin.Context) bson.ObjectId { - return GetCurrentUser(c).Id -} - -func GetAdminUser() (user *model.User, err error) { - u, err := model.GetUserByUsername("admin") - if err != nil { - return user, err - } - return &u, nil -} diff --git a/backend/services/version.go b/backend/services/version.go deleted file mode 100644 index 34df7b22..00000000 --- a/backend/services/version.go +++ /dev/null @@ -1,29 +0,0 @@ -package services - -import ( - "crawlab/entity" - "github.com/apex/log" - "github.com/imroc/req" - "runtime/debug" - "sort" -) - -func GetLatestRelease() (release entity.Release, err error) { - res, err := req.Get("https://api.github.com/repos/crawlab-team/crawlab/releases") - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return release, err - } - - var releaseDataList entity.ReleaseSlices - if err := res.ToJSON(&releaseDataList); err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return release, err - } - - sort.Sort(releaseDataList) - - return releaseDataList[len(releaseDataList)-1], nil -} diff --git a/backend/template/scrapy/config_spider/__init__.py b/backend/template/scrapy/config_spider/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/scrapy/config_spider/items.py b/backend/template/scrapy/config_spider/items.py deleted file mode 100644 index 16681a52..00000000 --- a/backend/template/scrapy/config_spider/items.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class Item(scrapy.Item): -###ITEMS### diff --git a/backend/template/scrapy/config_spider/middlewares.py b/backend/template/scrapy/config_spider/middlewares.py deleted file mode 100644 index e864bd0b..00000000 --- a/backend/template/scrapy/config_spider/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class ConfigSpiderSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Request, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class ConfigSpiderDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/scrapy/config_spider/pipelines.py b/backend/template/scrapy/config_spider/pipelines.py deleted file mode 100644 index 69af4c85..00000000 --- a/backend/template/scrapy/config_spider/pipelines.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html - -import os -from pymongo import MongoClient - -mongo = MongoClient( - host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost', - port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017), - username=os.environ.get('CRAWLAB_MONGO_USERNAME'), - password=os.environ.get('CRAWLAB_MONGO_PASSWORD'), - authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin' -) -db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test'] -col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test'] -task_id = os.environ.get('CRAWLAB_TASK_ID') - -class ConfigSpiderPipeline(object): - def process_item(self, item, spider): - item['task_id'] = task_id - if col is not None: - col.save(item) - return item diff --git a/backend/template/scrapy/config_spider/settings.py b/backend/template/scrapy/config_spider/settings.py deleted file mode 100644 index 4b0965f2..00000000 --- a/backend/template/scrapy/config_spider/settings.py +++ /dev/null @@ -1,111 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import re -import json - -# Scrapy settings for config_spider project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://docs.scrapy.org/en/latest/topics/settings.html -# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html -# https://docs.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'Crawlab Configurable Spider' - -SPIDER_MODULES = ['config_spider.spiders'] -NEWSPIDER_MODULE = 'config_spider.spiders' - - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -USER_AGENT = 'Crawlab Spider' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = True - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -#CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -#DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -#CONCURRENT_REQUESTS_PER_DOMAIN = 16 -#CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -#COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -#TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -#DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -#} - -# Enable or disable spider middlewares -# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html -#SPIDER_MIDDLEWARES = { -# 'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543, -#} - -# Enable or disable downloader middlewares -# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html -#DOWNLOADER_MIDDLEWARES = { -# 'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543, -#} - -# Enable or disable extensions -# See https://docs.scrapy.org/en/latest/topics/extensions.html -#EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -#} - -# Configure item pipelines -# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'config_spider.pipelines.ConfigSpiderPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://docs.scrapy.org/en/latest/topics/autothrottle.html -#AUTOTHROTTLE_ENABLED = True -# The initial download delay -#AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -#AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -#AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -#HTTPCACHE_ENABLED = True -#HTTPCACHE_EXPIRATION_SECS = 0 -#HTTPCACHE_DIR = 'httpcache' -#HTTPCACHE_IGNORE_HTTP_CODES = [] -#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' - -for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]: - setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '') - setting_value = os.environ.get(setting_env_name) - if setting_value.lower() == 'true': - setting_value = True - elif setting_value.lower() == 'false': - setting_value = False - elif re.search(r'^\d+$', setting_value) is not None: - setting_value = int(setting_value) - elif re.search(r'^\{.*\}$', setting_value.strip()) is not None: - setting_value = json.loads(setting_value) - elif re.search(r'^\[.*\]$', setting_value.strip()) is not None: - setting_value = json.loads(setting_value) - else: - pass - locals()[setting_name] = setting_value - diff --git a/backend/template/scrapy/config_spider/spiders/__init__.py b/backend/template/scrapy/config_spider/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/scrapy/config_spider/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/scrapy/config_spider/spiders/spider.py b/backend/template/scrapy/config_spider/spiders/spider.py deleted file mode 100644 index 09dfdf5e..00000000 --- a/backend/template/scrapy/config_spider/spiders/spider.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -import scrapy -import re -from config_spider.items import Item -from urllib.parse import urljoin, urlparse - -def get_real_url(response, url): - if re.search(r'^https?', url): - return url - elif re.search(r'^\/\/', url): - u = urlparse(response.url) - return u.scheme + ":" + url - return urljoin(response.url, url) - -class ConfigSpider(scrapy.Spider): - name = 'config_spider' - - def start_requests(self): - yield scrapy.Request(url='###START_URL###', callback=self.###START_STAGE###) - -###PARSERS### diff --git a/backend/template/scrapy/scrapy.cfg b/backend/template/scrapy/scrapy.cfg deleted file mode 100644 index a78d91e3..00000000 --- a/backend/template/scrapy/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = config_spider.settings - -[deploy] -#url = http://localhost:6800/ -project = config_spider diff --git a/backend/template/spiderfile/Spiderfile.163_news b/backend/template/spiderfile/Spiderfile.163_news deleted file mode 100644 index b87b8888..00000000 --- a/backend/template/spiderfile/Spiderfile.163_news +++ /dev/null @@ -1,19 +0,0 @@ -name: "toscrapy_books" -start_url: "http://news.163.com/special/0001386F/rank_news.html" -start_stage: "list" -engine: "scrapy" -stages: -- name: list - is_list: true - list_css: "table tr:not(:first-child)" - fields: - - name: "title" - css: "td:nth-child(1) > a" - - name: "url" - css: "td:nth-child(1) > a" - attr: "href" - - name: "clicks" - css: "td.cBlue" -settings: - ROBOTSTXT_OBEY: false - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiderfile/Spiderfile.baidu b/backend/template/spiderfile/Spiderfile.baidu deleted file mode 100644 index 0259c64f..00000000 --- a/backend/template/spiderfile/Spiderfile.baidu +++ /dev/null @@ -1,21 +0,0 @@ -name: toscrapy_books -start_url: http://www.baidu.com/s?wd=crawlab -start_stage: list -engine: scrapy -stages: -- name: list - is_list: true - list_xpath: //*[contains(@class, "c-container")] - page_xpath: //*[@id="page"]//a[@class="n"][last()] - page_attr: href - fields: - - name: title - xpath: .//h3/a - - name: url - xpath: .//h3/a - attr: href - - name: abstract - xpath: .//*[@class="c-abstract"] -settings: - ROBOTSTXT_OBEY: false - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiderfile/Spiderfile.toscrapy_books b/backend/template/spiderfile/Spiderfile.toscrapy_books deleted file mode 100644 index d9100e21..00000000 --- a/backend/template/spiderfile/Spiderfile.toscrapy_books +++ /dev/null @@ -1,27 +0,0 @@ -name: "toscrapy_books" -start_url: "http://books.toscrape.com" -start_stage: "list" -engine: "scrapy" -stages: -- name: list - is_list: true - list_css: "section article.product_pod" - page_css: "ul.pager li.next a" - page_attr: "href" - fields: - - name: "title" - css: "h3 > a" - - name: "url" - css: "h3 > a" - attr: "href" - next_stage: "detail" - - name: "price" - css: ".product_price > .price_color" -- name: detail - is_list: false - fields: - - name: "description" - css: "#product_description + p" -settings: - ROBOTSTXT_OBEY: true - AUTOTHROTTLE_ENABLED: true diff --git a/backend/template/spiders/amazon_config/Spiderfile b/backend/template/spiders/amazon_config/Spiderfile deleted file mode 100644 index eea8a538..00000000 --- a/backend/template/spiders/amazon_config/Spiderfile +++ /dev/null @@ -1,51 +0,0 @@ -name: "amazon_config" -display_name: "亚马逊中国(可配置)" -remark: "亚马逊中国搜索手机,列表+分页" -type: "configurable" -col: "results_amazon_config" -engine: scrapy -start_url: https://www.amazon.cn/s?k=%E6%89%8B%E6%9C%BA&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&ref=nb_sb_noss_2 -start_stage: list -stages: -- name: list - is_list: true - list_css: .s-result-item - list_xpath: "" - page_css: .a-last > a - page_xpath: "" - page_attr: href - fields: - - name: title - css: span.a-text-normal - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: .a-link-normal - xpath: "" - attr: href - next_stage: "" - remark: "" - - name: price - css: "" - xpath: .//*[@class="a-price-whole"] - attr: "" - next_stage: "" - remark: "" - - name: price_fraction - css: "" - xpath: .//*[@class="a-price-fraction"] - attr: "" - next_stage: "" - remark: "" - - name: img - css: .s-image-square-aspect > img - xpath: "" - attr: src - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiders/autohome_config/Spiderfile b/backend/template/spiders/autohome_config/Spiderfile deleted file mode 100644 index e69880cb..00000000 --- a/backend/template/spiders/autohome_config/Spiderfile +++ /dev/null @@ -1,57 +0,0 @@ -name: "autohome_config" -display_name: "汽车之家(可配置)" -remark: "汽车之家文章,列表+详情+分页" -type: "configurable" -col: "results_autohome_config" -engine: scrapy -start_url: https://www.autohome.com.cn/all/ -start_stage: list -stages: -- name: list - is_list: true - list_css: ul.article > li - list_xpath: "" - page_css: a.page-item-next - page_xpath: "" - page_attr: href - fields: - - name: title - css: li > a > h3 - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: li > a - xpath: "" - attr: href - next_stage: "" - remark: "" - - name: abstract - css: li > a > p - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: time - css: li > a .fn-left - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: views - css: li > a .fn-right > em:first-child - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: comments - css: li > a .fn-right > em:last-child - xpath: "" - attr: "" - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiders/baidu_config/Spiderfile b/backend/template/spiders/baidu_config/Spiderfile deleted file mode 100644 index a29d4acb..00000000 --- a/backend/template/spiders/baidu_config/Spiderfile +++ /dev/null @@ -1,39 +0,0 @@ -name: "baidu_config" -display_name: "百度搜索(可配置)" -remark: "百度搜索Crawlab,列表+分页" -type: "configurable" -col: "results_baidu_config" -engine: scrapy -start_url: http://www.baidu.com/s?wd=crawlab -start_stage: list -stages: -- name: list - is_list: true - list_css: ".result.c-container" - list_xpath: "" - page_css: "a.n" - page_xpath: "" - page_attr: href - fields: - - name: title - css: "" - xpath: .//h3/a - attr: "" - next_stage: "" - remark: "" - - name: url - css: "" - xpath: .//h3/a - attr: href - next_stage: "" - remark: "" - - name: abstract - css: "" - xpath: .//*[@class="c-abstract"] - attr: "" - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiders/bing_general/Spiderfile b/backend/template/spiders/bing_general/Spiderfile deleted file mode 100644 index 614c135e..00000000 --- a/backend/template/spiders/bing_general/Spiderfile +++ /dev/null @@ -1,6 +0,0 @@ -name: "bing_general" -display_name: "必应搜索 (通用)" -remark: "必应搜索 Crawlab,列表+分页" -col: "results_bing_general" -type: "customized" -cmd: "python bing_spider.py" \ No newline at end of file diff --git a/backend/template/spiders/bing_general/bing_spider.py b/backend/template/spiders/bing_general/bing_spider.py deleted file mode 100644 index e982e4ee..00000000 --- a/backend/template/spiders/bing_general/bing_spider.py +++ /dev/null @@ -1,41 +0,0 @@ -import requests -from bs4 import BeautifulSoup as bs -from urllib.parse import urljoin, urlparse -import re -from crawlab import save_item - -s = requests.Session() - -def get_real_url(response, url): - if re.search(r'^https?', url): - return url - elif re.search(r'^\/\/', url): - u = urlparse(response.url) - return u.scheme + url - return urljoin(response.url, url) - -def start_requests(): - for i in range(0, 9): - fr = 'PERE' if not i else 'MORE' - url = f'https://cn.bing.com/search?q=crawlab&first={10 * i + 1}&FROM={fr}' - request_page(url) - -def request_page(url): - print(f'requesting {url}') - r = s.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}) - parse_list(r) - -def parse_list(response): - soup = bs(response.content.decode('utf-8')) - for el in list(soup.select('#b_results > li')): - try: - save_item({ - 'title': el.select_one('h2').text, - 'url': el.select_one('h2 a').attrs.get('href'), - 'abstract': el.select_one('.b_caption p').text, - }) - except: - pass - -if __name__ == '__main__': - start_requests() \ No newline at end of file diff --git a/backend/template/spiders/chinaz/Spiderfile b/backend/template/spiders/chinaz/Spiderfile deleted file mode 100644 index 2fb940bb..00000000 --- a/backend/template/spiders/chinaz/Spiderfile +++ /dev/null @@ -1,5 +0,0 @@ -name: "chinaz" -display_name: "站长之家 (Scrapy)" -col: "results_chinaz" -type: "customized" -cmd: "scrapy crawl chinaz_spider" \ No newline at end of file diff --git a/backend/template/spiders/chinaz/chinaz/__init__.py b/backend/template/spiders/chinaz/chinaz/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/spiders/chinaz/chinaz/items.py b/backend/template/spiders/chinaz/chinaz/items.py deleted file mode 100644 index 1fdcac1b..00000000 --- a/backend/template/spiders/chinaz/chinaz/items.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class ChinazItem(scrapy.Item): - # define the fields for your item here like: - _id = scrapy.Field() - task_id = scrapy.Field() - name = scrapy.Field() - domain = scrapy.Field() - description = scrapy.Field() - rank = scrapy.Field() - main_category = scrapy.Field() - category = scrapy.Field() - location = scrapy.Field() diff --git a/backend/template/spiders/chinaz/chinaz/middlewares.py b/backend/template/spiders/chinaz/chinaz/middlewares.py deleted file mode 100644 index c98995d5..00000000 --- a/backend/template/spiders/chinaz/chinaz/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class ChinazSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Response, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class ChinazDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/spiders/chinaz/chinaz/pipelines.py b/backend/template/spiders/chinaz/chinaz/pipelines.py deleted file mode 100644 index b29f9eb7..00000000 --- a/backend/template/spiders/chinaz/chinaz/pipelines.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html - diff --git a/backend/template/spiders/chinaz/chinaz/settings.py b/backend/template/spiders/chinaz/chinaz/settings.py deleted file mode 100644 index 932ec9ed..00000000 --- a/backend/template/spiders/chinaz/chinaz/settings.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- coding: utf-8 -*- - -# Scrapy settings for chinaz project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://doc.scrapy.org/en/latest/topics/settings.html -# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'chinaz' - -SPIDER_MODULES = ['chinaz.spiders'] -NEWSPIDER_MODULE = 'chinaz.spiders' - - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -#USER_AGENT = 'chinaz (+http://www.yourdomain.com)' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = True - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -#CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -#DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -#CONCURRENT_REQUESTS_PER_DOMAIN = 16 -#CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -#COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -#TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -#DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -#} - -# Enable or disable spider middlewares -# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html -#SPIDER_MIDDLEWARES = { -# 'chinaz.middlewares.ChinazSpiderMiddleware': 543, -#} - -# Enable or disable downloader middlewares -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -#DOWNLOADER_MIDDLEWARES = { -# 'chinaz.middlewares.ChinazDownloaderMiddleware': 543, -#} - -# Enable or disable extensions -# See https://doc.scrapy.org/en/latest/topics/extensions.html -#EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -#} - -# Configure item pipelines -# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'crawlab.pipelines.CrawlabMongoPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/autothrottle.html -#AUTOTHROTTLE_ENABLED = True -# The initial download delay -#AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -#AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -#AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -#HTTPCACHE_ENABLED = True -#HTTPCACHE_EXPIRATION_SECS = 0 -#HTTPCACHE_DIR = 'httpcache' -#HTTPCACHE_IGNORE_HTTP_CODES = [] -#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/backend/template/spiders/chinaz/chinaz/spiders/__init__.py b/backend/template/spiders/chinaz/chinaz/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/spiders/chinaz/chinaz/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/spiders/chinaz/chinaz/spiders/chinaz_spider.py b/backend/template/spiders/chinaz/chinaz/spiders/chinaz_spider.py deleted file mode 100644 index 28ad84e7..00000000 --- a/backend/template/spiders/chinaz/chinaz/spiders/chinaz_spider.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -import scrapy -from chinaz.items import ChinazItem - - -class ChinazSpiderSpider(scrapy.Spider): - name = 'chinaz_spider' - allowed_domains = ['chinaz.com'] - start_urls = ['http://top.chinaz.com/hangye/'] - - def parse(self, response): - for item in response.css('.listCentent > li'): - name = item.css('h3.rightTxtHead > a::text').extract_first() - href = item.css('h3.rightTxtHead > a::attr("href")').extract_first() - domain = item.css('h3.rightTxtHead > span::text').extract_first() - description = item.css('p.RtCInfo::text').extract_first() - rank = item.css('.RtCRateCent > strong::text').extract_first() - rank = int(rank) - item = ChinazItem( - _id=domain, - name=name, - domain=domain, - description=description, - rank=rank, - ) - yield scrapy.Request( - url='http://top.chinaz.com' + href, - callback=self.parse_item, - meta={ - 'item': item - } - ) - - # pagination - a_list = response.css('.ListPageWrap > a::attr("href")').extract() - url = 'http://top.chinaz.com/hangye/' + a_list[-1] - yield scrapy.Request(url=url, callback=self.parse) - - def parse_item(self, response): - item = response.meta['item'] - - # category info extraction - arr = response.css('.TopMainTag-show .SimSun') - res1 = arr[0].css('a::text').extract() - main_category = res1[0] - if len(res1) == 1: - category = '其他' - else: - category = res1[1] - - # location info extraction - res2 = arr[1].css('a::text').extract() - if len(res2) > 0: - location = res2[0] - else: - location = '其他' - - # assign values to item - item['main_category'] = main_category - item['category'] = category - item['location'] = location - - yield item diff --git a/backend/template/spiders/chinaz/scrapy.cfg b/backend/template/spiders/chinaz/scrapy.cfg deleted file mode 100644 index d3b44a1a..00000000 --- a/backend/template/spiders/chinaz/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = chinaz.settings - -[deploy] -#url = http://localhost:6800/ -project = chinaz diff --git a/backend/template/spiders/csdn/csdn_spider.js b/backend/template/spiders/csdn/csdn_spider.js deleted file mode 100644 index 0f65c0ad..00000000 --- a/backend/template/spiders/csdn/csdn_spider.js +++ /dev/null @@ -1,87 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -const MONGO_HOST = process.env.MONGO_HOST; -const MONGO_PORT = process.env.MONGO_PORT; -const MONGO_DB = process.env.MONGO_DB; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://www.csdn.net'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 100; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(100); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('#feedlist_id > li').forEach(el => { - const $a = el.querySelector('.title > h2 > a'); - if (!$a) return; - results.push({ - url: $a.getAttribute('href'), - title: $a.innerText - }); - }); - return results; - }); - - // open database connection - const client = await MongoClient.connect(`mongodb://${MONGO_HOST}:${MONGO_PORT}`); - let db = await client.db(MONGO_DB); - const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'csdn'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file diff --git a/backend/template/spiders/csdn_config/Spiderfile b/backend/template/spiders/csdn_config/Spiderfile deleted file mode 100644 index 67f4f8c5..00000000 --- a/backend/template/spiders/csdn_config/Spiderfile +++ /dev/null @@ -1,60 +0,0 @@ -name: "csdn_config" -display_name: "CSDN(可配置)" -remark: "CSDN Crawlab 文章,列表+详情+分页" -type: "configurable" -col: "results_csdn_config" -engine: scrapy -start_url: https://so.csdn.net/so/search/s.do?q=crawlab -start_stage: list -stages: -- name: list - is_list: true - list_css: .search-list-con > .search-list - list_xpath: "" - page_css: a.btn-next - page_xpath: "" - page_attr: href - fields: - - name: url - css: "" - xpath: .//*[@class="limit_width"]/a - attr: href - next_stage: detail - remark: "" -- name: detail - is_list: false - list_css: "" - list_xpath: "" - page_css: "" - page_xpath: "" - page_attr: "" - fields: - - name: content - css: "" - xpath: .//div[@id="content_views"] - attr: "" - next_stage: "" - remark: "" - - name: views - css: .read-count - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: title - css: .title-article - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: author - css: .follow-nickName - xpath: "" - attr: "" - next_stage: "" - remark: "" -settings: - AUTOTHROTTLE_ENABLED: "false" - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/79.0.3945.117 Safari/537.36 diff --git a/backend/template/spiders/douban_config/Spiderfile b/backend/template/spiders/douban_config/Spiderfile deleted file mode 100644 index 84f0647a..00000000 --- a/backend/template/spiders/douban_config/Spiderfile +++ /dev/null @@ -1,57 +0,0 @@ -name: "douban_config" -display_name: "豆瓣读书(可配置)" -remark: "豆瓣读书新书推荐,列表" -type: "configurable" -col: "results_douban_config" -engine: scrapy -start_url: https://book.douban.com/latest -start_stage: list -stages: -- name: list - is_list: true - list_css: ul.cover-col-4 > li - list_xpath: "" - page_css: "" - page_xpath: "" - page_attr: "" - fields: - - name: title - css: h2 > a - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: h2 > a - xpath: "" - attr: href - next_stage: "" - remark: "" - - name: img - css: a.cover img - xpath: "" - attr: src - next_stage: "" - remark: "" - - name: rating - css: p.rating > .color-lightgray - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: abstract - css: p:last-child - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: info - css: .color-gray - xpath: "" - attr: "" - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiders/jd/Spiderfile b/backend/template/spiders/jd/Spiderfile deleted file mode 100644 index d090472b..00000000 --- a/backend/template/spiders/jd/Spiderfile +++ /dev/null @@ -1,5 +0,0 @@ -name: "jd" -display_name: "京东 (Scrapy)" -col: "results_jd" -type: "customized" -cmd: "scrapy crawl jd_spider" \ No newline at end of file diff --git a/backend/template/spiders/jd/jd/__init__.py b/backend/template/spiders/jd/jd/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/spiders/jd/jd/items.py b/backend/template/spiders/jd/jd/items.py deleted file mode 100644 index b2c5e647..00000000 --- a/backend/template/spiders/jd/jd/items.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class JdItem(scrapy.Item): - # define the fields for your item here like: - name = scrapy.Field() - price = scrapy.Field() - url = scrapy.Field() diff --git a/backend/template/spiders/jd/jd/middlewares.py b/backend/template/spiders/jd/jd/middlewares.py deleted file mode 100644 index 6fceded5..00000000 --- a/backend/template/spiders/jd/jd/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class JdSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Response, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class JdDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/spiders/jd/jd/pipelines.py b/backend/template/spiders/jd/jd/pipelines.py deleted file mode 100644 index 5a7d7cbf..00000000 --- a/backend/template/spiders/jd/jd/pipelines.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html diff --git a/backend/template/spiders/jd/jd/settings.py b/backend/template/spiders/jd/jd/settings.py deleted file mode 100644 index ef89ed0c..00000000 --- a/backend/template/spiders/jd/jd/settings.py +++ /dev/null @@ -1,90 +0,0 @@ -# -*- coding: utf-8 -*- - -# Scrapy settings for jd project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://doc.scrapy.org/en/latest/topics/settings.html -# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'jd' - -SPIDER_MODULES = ['jd.spiders'] -NEWSPIDER_MODULE = 'jd.spiders' - - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -#USER_AGENT = 'jd (+http://www.yourdomain.com)' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = False - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -#CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -#DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -#CONCURRENT_REQUESTS_PER_DOMAIN = 16 -#CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -#COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -#TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -#DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -#} - -# Enable or disable spider middlewares -# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html -#SPIDER_MIDDLEWARES = { -# 'jd.middlewares.JdSpiderMiddleware': 543, -#} - -# Enable or disable downloader middlewares -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -#DOWNLOADER_MIDDLEWARES = { -# 'jd.middlewares.JdDownloaderMiddleware': 543, -#} - -# Enable or disable extensions -# See https://doc.scrapy.org/en/latest/topics/extensions.html -#EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -#} - -# Configure item pipelines -# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'crawlab.pipelines.CrawlabMongoPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/autothrottle.html -#AUTOTHROTTLE_ENABLED = True -# The initial download delay -#AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -#AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -#AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -#HTTPCACHE_ENABLED = True -#HTTPCACHE_EXPIRATION_SECS = 0 -#HTTPCACHE_DIR = 'httpcache' -#HTTPCACHE_IGNORE_HTTP_CODES = [] -#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/backend/template/spiders/jd/jd/spiders/__init__.py b/backend/template/spiders/jd/jd/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/spiders/jd/jd/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/spiders/jd/jd/spiders/jd_spider.py b/backend/template/spiders/jd/jd/spiders/jd_spider.py deleted file mode 100644 index 4ec94fa9..00000000 --- a/backend/template/spiders/jd/jd/spiders/jd_spider.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- -import scrapy - -from jd.items import JdItem - - -class JdSpiderSpider(scrapy.Spider): - name = 'jd_spider' - allowed_domains = ['jd.com'] - - def start_requests(self): - for i in range(1, 50): - yield scrapy.Request(url=f'https://search.jd.com/Search?keyword=手机&enc=utf-8&page={i}') - - def parse(self, response): - for el in response.css('.gl-item'): - yield JdItem( - url=el.css('.p-name > a::attr("href")').extract_first(), - name=el.css('.p-name > a::attr("title")').extract_first(), - price=float(el.css('.p-price i::text').extract_first()), - ) diff --git a/backend/template/spiders/jd/scrapy.cfg b/backend/template/spiders/jd/scrapy.cfg deleted file mode 100644 index 87cf0280..00000000 --- a/backend/template/spiders/jd/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = jd.settings - -[deploy] -#url = http://localhost:6800/ -project = jd diff --git a/backend/template/spiders/jd_mask/Spiderfile b/backend/template/spiders/jd_mask/Spiderfile deleted file mode 100644 index b5134646..00000000 --- a/backend/template/spiders/jd_mask/Spiderfile +++ /dev/null @@ -1,5 +0,0 @@ -name: "jd_mask" -display_name: "京东口罩 (Puppeteer)" -col: "results_jd" -type: "customized" -cmd: "dumb-init -- node jd_mask_spider.js" \ No newline at end of file diff --git a/backend/template/spiders/jd_mask/jd_mask_spider.js b/backend/template/spiders/jd_mask/jd_mask_spider.js deleted file mode 100644 index dfa5c808..00000000 --- a/backend/template/spiders/jd_mask/jd_mask_spider.js +++ /dev/null @@ -1,84 +0,0 @@ -const crawlab = require('crawlab-sdk'); -const PCR = require('puppeteer-chromium-resolver'); - -const crawlDetail = async (page, url) => { - await page.goto(url); - await page.waitForSelector('#choose-btns'); - await page.waitFor(500); - - const hasStock = await page.evaluate(() => { - return !document.querySelector('.J-notify-stock'); - }); - return hasStock; -}; - -const crawlPage = async (page) => { - const items = await page.evaluate(() => { - const items = []; - document.querySelectorAll('.gl-item').forEach(el => { - items.push({ - title: el.querySelector('.p-name > a').getAttribute('title'), - url: 'https:' + el.querySelector('.p-name > a').getAttribute('href'), - }); - }); - return items; - }); - - for (let i = 0; i < items.length; i++) { - const item = items[i]; - item['has_stock'] = await crawlDetail(page, item.url); - await crawlab.saveItem(item); - } - - await page.waitFor(1000); -}; - -const main = async () => { - const pcr = await PCR({ - folderName: '.chromium-browser-snapshots', - hosts: ["https://storage.googleapis.com", "https://npm.taobao.org/mirrors"], - retry: 3 - }); - - const browser = await pcr.puppeteer.launch({ - headless: true, - args: ['--no-sandbox'], - executablePath: pcr.executablePath - }).catch(function (error) { - console.log(error); - }); - - const page = await browser.newPage(); - - await page.goto('https://www.jd.com/chanpin/270170.html'); - await page.waitForSelector('#J_goodsList'); - await page.waitFor(1000); - - await crawlPage(page); - - while (true) { - const hasNext = await page.evaluate(() => { - if (!document.querySelector('.pn-next')) return false - return !document.querySelector('.pn-next.disabled') - }); - - if (!hasNext) break; - - await page.click('.pn-next'); - await page.waitFor(1000); - await crawlPage(page); - } - - await browser.close(); -}; - -(async () => { - try { - await main() - } catch (e) { - console.error(e) - } - - await crawlab.close(); - // process.exit(); -})(); \ No newline at end of file diff --git a/backend/template/spiders/juejin_node/juejin_spider.js b/backend/template/spiders/juejin_node/juejin_spider.js deleted file mode 100644 index afb0cea8..00000000 --- a/backend/template/spiders/juejin_node/juejin_spider.js +++ /dev/null @@ -1,84 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://juejin.im'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 100; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(100); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('.entry-list > .item').forEach(el => { - if (!el.querySelector('.title')) return; - results.push({ - url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'), - title: el.querySelector('.title').innerText - }); - }); - return results; - }); - - // open database connection - console.log(process.env.MONGO_HOST); - console.log(process.env.MONGO_PORT); - const client = await MongoClient.connect(`mongodb://${process.env.MONGO_HOST}:${process.env.MONGO_PORT}`); - let db = await client.db(process.env.MONGO_DB); - const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'juejin'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file diff --git a/backend/template/spiders/realestate/Spiderfile b/backend/template/spiders/realestate/Spiderfile deleted file mode 100644 index 772e8312..00000000 --- a/backend/template/spiders/realestate/Spiderfile +++ /dev/null @@ -1,4 +0,0 @@ -name: "realestate" -display_name: "链家网 (Scrapy)" -col: "results_realestate" -cmd: "scrapy crawl lianjia" \ No newline at end of file diff --git a/backend/template/spiders/realestate/realestate/__init__.py b/backend/template/spiders/realestate/realestate/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/spiders/realestate/realestate/items.py b/backend/template/spiders/realestate/realestate/items.py deleted file mode 100644 index 189b92ed..00000000 --- a/backend/template/spiders/realestate/realestate/items.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class RealEstateItem(scrapy.Item): - # _id - _id = scrapy.Field() - - # task_id - task_id = scrapy.Field() - - # 房产名 - name = scrapy.Field() - - # url - url = scrapy.Field() - - # 类别 - type = scrapy.Field() - - # 价格(万) - price = scrapy.Field() - - # 大小 - size = scrapy.Field() - - # 小区 - region = scrapy.Field() - - # 城市 - city = scrapy.Field() diff --git a/backend/template/spiders/realestate/realestate/middlewares.py b/backend/template/spiders/realestate/realestate/middlewares.py deleted file mode 100644 index ed845f57..00000000 --- a/backend/template/spiders/realestate/realestate/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class RealestateSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Response, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class RealestateDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/spiders/realestate/realestate/pipelines.py b/backend/template/spiders/realestate/realestate/pipelines.py deleted file mode 100644 index 3371792b..00000000 --- a/backend/template/spiders/realestate/realestate/pipelines.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html diff --git a/backend/template/spiders/realestate/realestate/settings.py b/backend/template/spiders/realestate/realestate/settings.py deleted file mode 100644 index 758f8ed0..00000000 --- a/backend/template/spiders/realestate/realestate/settings.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding: utf-8 -*- - -# Scrapy settings for realestate project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://doc.scrapy.org/en/latest/topics/settings.html -# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'realestate' - -SPIDER_MODULES = ['realestate.spiders'] -NEWSPIDER_MODULE = 'realestate.spiders' - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -# USER_AGENT = 'realestate (+http://www.yourdomain.com)' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = True - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -# CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -# DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -# CONCURRENT_REQUESTS_PER_DOMAIN = 16 -# CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -# COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -# TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -# DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -# } - -# Enable or disable spider middlewares -# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html -# SPIDER_MIDDLEWARES = { -# 'realestate.middlewares.RealestateSpiderMiddleware': 543, -# } - -# Enable or disable downloader middlewares -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# DOWNLOADER_MIDDLEWARES = { -# 'realestate.middlewares.RealestateDownloaderMiddleware': 543, -# } - -# Enable or disable extensions -# See https://doc.scrapy.org/en/latest/topics/extensions.html -# EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -# } - -# Configure item pipelines -# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'crawlab.pipelines.CrawlabMongoPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/autothrottle.html -# AUTOTHROTTLE_ENABLED = True -# The initial download delay -# AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -# AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -# AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -# HTTPCACHE_ENABLED = True -# HTTPCACHE_EXPIRATION_SECS = 0 -# HTTPCACHE_DIR = 'httpcache' -# HTTPCACHE_IGNORE_HTTP_CODES = [] -# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/backend/template/spiders/realestate/realestate/spiders/__init__.py b/backend/template/spiders/realestate/realestate/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/spiders/realestate/realestate/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/spiders/realestate/realestate/spiders/lianjia.py b/backend/template/spiders/realestate/realestate/spiders/lianjia.py deleted file mode 100644 index cad054f3..00000000 --- a/backend/template/spiders/realestate/realestate/spiders/lianjia.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding: utf-8 -*- -import scrapy - -from realestate.items import RealEstateItem - - -class LianjiaSpider(scrapy.Spider): - name = 'lianjia' - allowed_domains = ['lianjia.com'] - start_urls = ['https://cq.lianjia.com/ershoufang/'] - - def start_requests(self): - for i in range(100): - url = 'https://cq.lianjia.com/ershoufang/pg%s' % i - yield scrapy.Request(url=url) - - def parse(self, response): - for item in response.css('.sellListContent > li'): - yield RealEstateItem( - name=item.css('.title > a::text').extract_first(), - url=item.css('.title > a::attr("href")').extract_first(), - type='secondhand', - price=item.css('.totalPrice > span::text').extract_first(), - region=item.css('.houseInfo > a::text').extract_first(), - size=item.css('.houseInfo::text').extract_first().split(' | ')[2] - ) - - # 分页 - # a_next = response.css('.house-lst-page-box > a')[-1] - # href = a_next.css('a::attr("href")') - # yield scrapy.Response(url='https://cq.lianjia.com' + href) diff --git a/backend/template/spiders/realestate/scrapy.cfg b/backend/template/spiders/realestate/scrapy.cfg deleted file mode 100644 index d630e123..00000000 --- a/backend/template/spiders/realestate/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = realestate.settings - -[deploy] -#url = http://localhost:6800/ -project = realestate diff --git a/backend/template/spiders/segmentfault/segmentfault_spider.js b/backend/template/spiders/segmentfault/segmentfault_spider.js deleted file mode 100644 index 834b61cc..00000000 --- a/backend/template/spiders/segmentfault/segmentfault_spider.js +++ /dev/null @@ -1,81 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://segmentfault.com/newest'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 10; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(500); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('.news-list .news-item').forEach(el => { - results.push({ - url: 'https://segmentfault.com' + el.querySelector('.news__item-info > a').getAttribute('href'), - title: el.querySelector('.news__item-title').innerText - }) - }); - return results; - }); - - // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); - const colName = process.env.CRAWLAB_COLLECTION || 'results_segmentfault'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'segmentfault'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file diff --git a/backend/template/spiders/segmentfault_colly/.crawlabignore b/backend/template/spiders/segmentfault_colly/.crawlabignore deleted file mode 100644 index ae09aba9..00000000 --- a/backend/template/spiders/segmentfault_colly/.crawlabignore +++ /dev/null @@ -1,2 +0,0 @@ -vendor/ -.idea \ No newline at end of file diff --git a/backend/template/spiders/segmentfault_colly/go.mod b/backend/template/spiders/segmentfault_colly/go.mod deleted file mode 100644 index 4a6b6a1d..00000000 --- a/backend/template/spiders/segmentfault_colly/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module baidu_colly - -go 1.13 - -require ( - github.com/apex/log v1.8.0 - github.com/crawlab-team/crawlab-go-sdk v0.0.6 - github.com/gocolly/colly/v2 v2.1.0 -) diff --git a/backend/template/spiders/segmentfault_colly/go.sum b/backend/template/spiders/segmentfault_colly/go.sum deleted file mode 100644 index 32abde28..00000000 --- a/backend/template/spiders/segmentfault_colly/go.sum +++ /dev/null @@ -1,454 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/firestore v1.1.0/go.mod h1:ulACoGHTpvq5r8rxGJ4ddJZBZqakUQqClKRT5SZwBmk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= -github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= -github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= -github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= -github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= -github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= -github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4= -github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM= -github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk= -github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/apex/log v1.1.4 h1:3Zk+boorIQAAGBrHn0JUtAau4ihMamT4WdnfdnXM1zQ= -github.com/apex/log v1.1.4/go.mod h1:AlpoD9aScyQfJDVHmLMEcx4oU6LqzkWp4Mg9GdAcEvQ= -github.com/apex/log v1.8.0 h1:+W4j+dttibFvynPLlctdnYFUn1eLKT37BZWWW2iMfEM= -github.com/apex/log v1.8.0/go.mod h1:m82fZlWIuiWzWP04XCTXmnX0xRkYYbCdYn8jbJeLBEA= -github.com/apex/logs v0.0.4/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= -github.com/apex/logs v1.0.0/go.mod h1:XzxuLZ5myVHDy9SAmYpamKKRNApGj54PfYLcFrXqDwo= -github.com/aphistic/golf v0.0.0-20180712155816-02c07f170c5a/go.mod h1:3NqKYiepwy8kCu4PNA+aP7WUV72eXWJeP9/r3/K9aLE= -github.com/aphistic/sweet v0.2.0/go.mod h1:fWDlIh/isSE9n6EPsRmC0det+whmX6dJid3stzu0Xys= -github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= -github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= -github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aws/aws-sdk-go v1.20.6/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go v1.33.5/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0= -github.com/aybabtme/rgbterm v0.0.0-20170906152045-cc83f3b3ce59/go.mod h1:q/89r3U2H7sSsE2t6Kca0lfwTK8JdoNGS/yzM/4iH5I= -github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= -github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= -github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= -github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= -github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= -github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/crawlab-team/crawlab-go-sdk v0.0.0-20200811085947-8f8e907c6721 h1:K4WiBL6ygGdH/cw4XH68kylZ40hvW7QvGfW4r3DI75s= -github.com/crawlab-team/crawlab-go-sdk v0.0.1 h1:sHnjUEo5rf+WpyIuinabkHX716GFIjIzlgMlAUS3yZ8= -github.com/crawlab-team/crawlab-go-sdk v0.0.1/go.mod h1:o8G5GycvFLpN2JAFKARnc1sPP9cVl4UL/henjBuU/m0= -github.com/crawlab-team/crawlab-go-sdk v0.0.2 h1:5vC+EXSw6ugNp7KUFyakvXLkMrhuc+iwi6Wg54FzlIM= -github.com/crawlab-team/crawlab-go-sdk v0.0.2/go.mod h1:o8G5GycvFLpN2JAFKARnc1sPP9cVl4UL/henjBuU/m0= -github.com/crawlab-team/crawlab-go-sdk v0.0.3 h1:xtXPvAfrFInqHMQTc9z/4I4x3UD3MlKab3kxuJUlwIw= -github.com/crawlab-team/crawlab-go-sdk v0.0.3/go.mod h1:o8G5GycvFLpN2JAFKARnc1sPP9cVl4UL/henjBuU/m0= -github.com/crawlab-team/crawlab-go-sdk v0.0.5 h1:dpKEIMIwuAUTV+0ieow8QAGixrQKnslSfHrejBm/hOk= -github.com/crawlab-team/crawlab-go-sdk v0.0.5/go.mod h1:T462oNoHharqpV+d6mHORzxCTXx85nZUAnjRl4y2X9Y= -github.com/crawlab-team/crawlab-go-sdk v0.3.3 h1:AfnpbX8284bju/EDQlncnIlj6OAeeLz2zSKL+XSoxCA= -github.com/crawlab-team/crawlab-go-sdk v0.3.3/go.mod h1:o8G5GycvFLpN2JAFKARnc1sPP9cVl4UL/henjBuU/m0= -github.com/crawlab-team/crawlab-sdk v0.3.3 h1:wIZULN0tthEYN5rm4udC4lvNmYWUCVcIpjo8eOEAiUY= -github.com/crawlab-team/crawlab-sdk v0.3.3/go.mod h1:y/eYHfi/RRp1LZnjo8FGJxiCggksP0L48LSO3lHD0Mg= -github.com/crawlab-team/crawlab-sdk/go v0.0.0-20200811075433-4eb89aae4128 h1:pwPIhttJXHHLiz1ycsOCZvvYjq7DRPpwsCEJVv4BW0Y= -github.com/crawlab-team/crawlab-sdk/go v0.0.0-20200811081137-f170fe114b4f h1:UApT42/LRc2uukQPMNLXvdcSqcY9FofZmSeMPkf1UXg= -github.com/crawlab-team/crawlab-sdk/go v0.0.0-20200811081137-f170fe114b4f/go.mod h1:KdyXPWGLgPCkCBnz5SvZS+7u0V7thpdRo81x3iyNAqo= -github.com/crawlab-team/crawlab-sdk/go v0.0.0-20200811083544-41a4b61941df h1:x5CV/x5QoagYlkfGgyCNISjlu87sev47gf+J0zEqofk= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 h1:DujepqpGd1hyOd7aW59XpK7Qymp8iy83xq74fLr21is= -github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= -github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= -github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= -github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= -github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= -github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= -github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoGs= -github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0= -github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w= -github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= -github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= -github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= -github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= -github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= -github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= -github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= -github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik= -github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA= -github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= -github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= -github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= -github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA= -github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= -github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= -github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= -github.com/mailru/easyjson v0.7.1 h1:mdxE1MF9o53iCb2Ghj1VfWvh7ZOwHpnVG/xwXrV90U8= -github.com/mailru/easyjson v0.7.1/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= -github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= -github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ= -github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.5/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= -github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= -github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= -github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olivere/elastic/v7 v7.0.19 h1:w4F6JpqOISadhYf/n0NR1cNj73xHqh4pzPwD1Gkidts= -github.com/olivere/elastic/v7 v7.0.19/go.mod h1:4Jqt5xvjqpjCqgnTcHwl3j8TLs8mvoOK8NYgo/qEOu4= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= -github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= -github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= -github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= -github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= -github.com/rogpeppe/fastuuid v1.1.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= -github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= -github.com/segmentio/kafka-go v0.4.1 h1:jyGn8DlpqI5iPArVxQj6o1IqPk76A+VN3JkhTkDr2Mo= -github.com/segmentio/kafka-go v0.4.1/go.mod h1:Inh7PqOsxmfgasV8InZYKVXWsdjcCq2d9tFV75GLbuM= -github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= -github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= -github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= -github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/smartystreets/gunit v1.0.0/go.mod h1:qwPWnhz6pn0NnRBP++URONOVyNkPyr4SauJk4cUOwJs= -github.com/smartystreets/gunit v1.3.4/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= -github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= -github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= -github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0= -github.com/tj/assert v0.0.3 h1:Df/BlaZ20mq6kuai7f5z2TvPFiwC3xaWJSDQNiIS3Rk= -github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk= -github.com/tj/go-buffer v1.1.0/go.mod h1:iyiJpfFcR2B9sXu7KvjbT9fpM4mOelRSDTbntVj52Uc= -github.com/tj/go-elastic v0.0.0-20171221160941-36157cbbebc2/go.mod h1:WjeM0Oo1eNAjXGDx2yma7uG2XoyRZTq1uv3M/o7imD0= -github.com/tj/go-kinesis v0.0.0-20171128231115-08b17f58cb1b/go.mod h1:/yhzCV0xPfx6jb1bBgRFjl5lytqVqZXEaeqWP8lTEao= -github.com/tj/go-spin v1.1.0/go.mod h1:Mg1mzmePZm4dva8Qz60H2lHwmJ2loum4VIrLgVnKwh4= -github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= -github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190506204251-e1dfcc566284/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191112195655-aa38f8e97acc/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0 h1:UhZDfRO8JRQru4/+LlLE0BRKGF8L+PICnvYZmx/fEGA= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/ini.v1 v1.51.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= -gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c h1:grhR+C34yXImVGp7EzNk+DTIk+323eIUWOmEevy6bDo= -gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= diff --git a/backend/template/spiders/segmentfault_colly/main.go b/backend/template/spiders/segmentfault_colly/main.go deleted file mode 100644 index c6eff3ad..00000000 --- a/backend/template/spiders/segmentfault_colly/main.go +++ /dev/null @@ -1,43 +0,0 @@ -package main - -import ( - "fmt" - "github.com/apex/log" - "github.com/crawlab-team/crawlab-go-sdk" - "github.com/crawlab-team/crawlab-go-sdk/entity" - "github.com/gocolly/colly/v2" - "runtime/debug" -) - -func main() { - startUrl := "https://segmentfault.com/search?q=crawlab" - - c := colly.NewCollector( - colly.Async(true), - colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"), - ) - - c.OnHTML(".search-result > .widget-blog", func(e *colly.HTMLElement) { - item := entity.Item{} - item["title"] = e.ChildText("h2.h4 > a") - item["url"] = e.ChildAttr("h2.h4 > a", "href") - fmt.Println(item) - if err := crawlab.SaveItem(item); err != nil { - log.Errorf("save item error: " + err.Error()) - debug.PrintStack() - return - } - }) - - c.OnRequest(func(r *colly.Request) { - fmt.Println(fmt.Sprintf("Visiting %s", r.URL.String())) - }) - - if err := c.Visit(startUrl); err != nil { - log.Errorf("visit error: " + err.Error()) - debug.PrintStack() - panic(fmt.Sprintf("Unable to visit %s", startUrl)) - } - - c.Wait() -} diff --git a/backend/template/spiders/sinastock/Spiderfile b/backend/template/spiders/sinastock/Spiderfile deleted file mode 100644 index b110cb48..00000000 --- a/backend/template/spiders/sinastock/Spiderfile +++ /dev/null @@ -1,5 +0,0 @@ -name: "sinastock" -display_name: "新浪股票 (Scrapy)" -type: "customized" -col: "results_sinastock" -cmd: "scrapy crawl sinastock_spider" \ No newline at end of file diff --git a/backend/template/spiders/sinastock/scrapy.cfg b/backend/template/spiders/sinastock/scrapy.cfg deleted file mode 100644 index 4969ad96..00000000 --- a/backend/template/spiders/sinastock/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = sinastock.settings - -[deploy] -#url = http://localhost:6800/ -project = sinastock diff --git a/backend/template/spiders/sinastock/sinastock/__init__.py b/backend/template/spiders/sinastock/sinastock/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/spiders/sinastock/sinastock/items.py b/backend/template/spiders/sinastock/sinastock/items.py deleted file mode 100644 index 6e3e5d8e..00000000 --- a/backend/template/spiders/sinastock/sinastock/items.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class NewsItem(scrapy.Item): - # define the fields for your item here like: - _id = scrapy.Field() - title = scrapy.Field() - ts_str = scrapy.Field() - ts = scrapy.Field() - url = scrapy.Field() - text = scrapy.Field() - task_id = scrapy.Field() - source = scrapy.Field() - stocks = scrapy.Field() diff --git a/backend/template/spiders/sinastock/sinastock/middlewares.py b/backend/template/spiders/sinastock/sinastock/middlewares.py deleted file mode 100644 index 912b5e57..00000000 --- a/backend/template/spiders/sinastock/sinastock/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class SinastockSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Response, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class SinastockDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/spiders/sinastock/sinastock/pipelines.py b/backend/template/spiders/sinastock/sinastock/pipelines.py deleted file mode 100644 index 5a7d7cbf..00000000 --- a/backend/template/spiders/sinastock/sinastock/pipelines.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html diff --git a/backend/template/spiders/sinastock/sinastock/settings.py b/backend/template/spiders/sinastock/sinastock/settings.py deleted file mode 100644 index 3e01d3ca..00000000 --- a/backend/template/spiders/sinastock/sinastock/settings.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding: utf-8 -*- - -# Scrapy settings for sinastock project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://doc.scrapy.org/en/latest/topics/settings.html -# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'sinastock' - -SPIDER_MODULES = ['sinastock.spiders'] -NEWSPIDER_MODULE = 'sinastock.spiders' - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -# USER_AGENT = 'sinastock (+http://www.yourdomain.com)' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = True - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -# CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -# DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -# CONCURRENT_REQUESTS_PER_DOMAIN = 16 -# CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -# COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -# TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -# DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -# } - -# Enable or disable spider middlewares -# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html -# SPIDER_MIDDLEWARES = { -# 'sinastock.middlewares.SinastockSpiderMiddleware': 543, -# } - -# Enable or disable downloader middlewares -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# DOWNLOADER_MIDDLEWARES = { -# 'sinastock.middlewares.SinastockDownloaderMiddleware': 543, -# } - -# Enable or disable extensions -# See https://doc.scrapy.org/en/latest/topics/extensions.html -# EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -# } - -# Configure item pipelines -# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'crawlab.pipelines.CrawlabMongoPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/autothrottle.html -# AUTOTHROTTLE_ENABLED = True -# The initial download delay -# AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -# AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -# AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -# HTTPCACHE_ENABLED = True -# HTTPCACHE_EXPIRATION_SECS = 0 -# HTTPCACHE_DIR = 'httpcache' -# HTTPCACHE_IGNORE_HTTP_CODES = [] -# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/backend/template/spiders/sinastock/sinastock/spiders/__init__.py b/backend/template/spiders/sinastock/sinastock/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/spiders/sinastock/sinastock/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/spiders/sinastock/sinastock/spiders/sinastock_spider.py b/backend/template/spiders/sinastock/sinastock/spiders/sinastock_spider.py deleted file mode 100644 index 95bed149..00000000 --- a/backend/template/spiders/sinastock/sinastock/spiders/sinastock_spider.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import re -from datetime import datetime - -import scrapy -from pymongo import MongoClient - -from sinastock.items import NewsItem - -class SinastockSpiderSpider(scrapy.Spider): - name = 'sinastock_spider' - allowed_domains = ['finance.sina.com.cn'] - - def start_requests(self): - col = self.db['stocks'] - for s in col.find({}): - code, ex = s['ts_code'].split('.') - for i in range(10): - url = f'http://vip.stock.finance.sina.com.cn/corp/view/vCB_AllNewsStock.php?symbol={ex.lower()}{code}&Page={i + 1}' - yield scrapy.Request( - url=url, - callback=self.parse, - meta={'ts_code': s['ts_code']} - ) - - def parse(self, response): - for a in response.css('.datelist > ul > a'): - url = a.css('a::attr("href")').extract_first() - item = NewsItem( - title=a.css('a::text').extract_first(), - url=url, - source='sina', - stocks=[response.meta['ts_code']] - ) - yield scrapy.Request( - url=url, - callback=self.parse_detail, - meta={'item': item} - ) - - def parse_detail(self, response): - item = response.meta['item'] - text = response.css('#artibody').extract_first() - pre = re.compile('>(.*?)<') - text = ''.join(pre.findall(text)) - item['text'] = text.replace('\u3000', '') - item['ts_str'] = response.css('.date::text').extract_first() - if item['text'] is None or item['ts_str'] is None: - pass - else: - item['ts'] = datetime.strptime(item['ts_str'], '%Y年%m月%d日 %H:%M') - yield item diff --git a/backend/template/spiders/sites_inspector/sites_inspector.py b/backend/template/spiders/sites_inspector/sites_inspector.py deleted file mode 100644 index b6e264c7..00000000 --- a/backend/template/spiders/sites_inspector/sites_inspector.py +++ /dev/null @@ -1,77 +0,0 @@ -import asyncio -import os -from datetime import datetime - -import aiohttp -import requests - -from pymongo import MongoClient - -# MONGO_HOST = os.environ['MONGO_HOST'] -# MONGO_PORT = int(os.environ['MONGO_PORT']) -# MONGO_DB = os.environ['MONGO_DB'] -MONGO_HOST = 'localhost' -MONGO_PORT = 27017 -MONGO_DB = 'crawlab_test' - -mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT) -db = mongo[MONGO_DB] -col = db['sites'] - - -async def process_response(resp, **kwargs): - url = kwargs.get('url') - status = resp.status # 读取状态 - if status == 200 and ('robots.txt' in str(resp.url)): - col.update({'_id': url}, {'$set': {'has_robots': True}}) - else: - # 错误状态 - col.update({'_id': url}, {'$set': {'has_robots': False}}) - - -async def process_home_page_response(resp, **kwargs): - url = kwargs.get('url') - duration = kwargs.get('duration') - status = resp.status # 读取状态 - col.update({'_id': url}, {'$set': {'home_http_status': status, 'home_response_time': duration}}) - - -async def request_site(url: str, semaphore): - _url = 'http://' + url + '/robots.txt' - # print('crawling ' + _url) - async with semaphore: - async with aiohttp.ClientSession() as session: # <1> 开启一个会话 - async with session.get(_url) as resp: # 发送请求 - await process_response(resp=resp, url=url) - print('crawled ' + _url) - # resp = requests.get(_url) - return resp - - -async def request_site_home_page(url: str, semophore): - _url = 'http://' + url - # print('crawling ' + _url) - async with semophore: - tic = datetime.now() - async with aiohttp.ClientSession() as session: # <1> 开启一个会话 - async with session.get(_url) as resp: # 发送请求 - toc = datetime.now() - duration = (toc - tic).total_seconds() - await process_home_page_response(resp=resp, url=url, duration=duration) - print('crawled ' + _url) - - -async def run(): - semaphore = asyncio.Semaphore(50) # 限制并发量为50 - # sites = [site for site in col.find({'rank': {'$lte': 5000}})] - sites = [site for site in col.find({'rank': {'$lte': 100}})] - urls = [site['_id'] for site in sites] - to_get = [request_site(url, semaphore) for url in urls] - to_get += [request_site_home_page(url, semaphore) for url in urls] - await asyncio.wait(to_get) - - -if __name__ == '__main__': - loop = asyncio.get_event_loop() - loop.run_until_complete(run()) - loop.close() diff --git a/backend/template/spiders/v2ex_config/Spiderfile b/backend/template/spiders/v2ex_config/Spiderfile deleted file mode 100644 index bb18d40a..00000000 --- a/backend/template/spiders/v2ex_config/Spiderfile +++ /dev/null @@ -1,54 +0,0 @@ -name: "v2ex_config" -display_name: "V2ex(可配置)" -remark: "V2ex,列表+详情" -type: "configurable" -col: "results_v2ex_config" -engine: scrapy -start_url: https://v2ex.com/ -start_stage: list -stages: -- name: list - is_list: true - list_css: .cell.item - list_xpath: "" - page_css: "" - page_xpath: "" - page_attr: href - fields: - - name: title - css: a.topic-link - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: a.topic-link - xpath: "" - attr: href - next_stage: detail - remark: "" - - name: replies - css: .count_livid - xpath: "" - attr: "" - next_stage: "" - remark: "" -- name: detail - is_list: false - list_css: "" - list_xpath: "" - page_css: "" - page_xpath: "" - page_attr: "" - fields: - - name: content - css: "" - xpath: .//*[@class="markdown_body"] - attr: "" - next_stage: "" - remark: "" -settings: - AUTOTHROTTLE_ENABLED: "true" - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/79.0.3945.117 Safari/537.36 diff --git a/backend/template/spiders/xueqiu/Spiderfile b/backend/template/spiders/xueqiu/Spiderfile deleted file mode 100644 index 38aa5dbe..00000000 --- a/backend/template/spiders/xueqiu/Spiderfile +++ /dev/null @@ -1,5 +0,0 @@ -name: "xueqiu" -display_name: "雪球网 (Scrapy)" -type: "customized" -col: "results_xueqiu" -cmd: "scrapy crawl xueqiu_spider" \ No newline at end of file diff --git a/backend/template/spiders/xueqiu/scrapy.cfg b/backend/template/spiders/xueqiu/scrapy.cfg deleted file mode 100644 index 2c5ce3b3..00000000 --- a/backend/template/spiders/xueqiu/scrapy.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Automatically created by: scrapy startproject -# -# For more information about the [deploy] section see: -# https://scrapyd.readthedocs.io/en/latest/deploy.html - -[settings] -default = xueqiu.settings - -[deploy] -#url = http://localhost:6800/ -project = xueqiu diff --git a/backend/template/spiders/xueqiu/xueqiu/__init__.py b/backend/template/spiders/xueqiu/xueqiu/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/backend/template/spiders/xueqiu/xueqiu/items.py b/backend/template/spiders/xueqiu/xueqiu/items.py deleted file mode 100644 index 5471594d..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/items.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your scraped items -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/items.html - -import scrapy - - -class XueqiuItem(scrapy.Item): - # define the fields for your item here like: - _id = scrapy.Field() - task_id = scrapy.Field() - id = scrapy.Field() - text = scrapy.Field() - url = scrapy.Field() - target = scrapy.Field() - view_count = scrapy.Field() - mark = scrapy.Field() - created_at = scrapy.Field() - ts = scrapy.Field() - source = scrapy.Field() diff --git a/backend/template/spiders/xueqiu/xueqiu/middlewares.py b/backend/template/spiders/xueqiu/xueqiu/middlewares.py deleted file mode 100644 index f60102ce..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/middlewares.py +++ /dev/null @@ -1,103 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define here the models for your spider middleware -# -# See documentation in: -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -from scrapy import signals - - -class XueqiuSpiderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the spider middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_spider_input(self, response, spider): - # Called for each response that goes through the spider - # middleware and into the spider. - - # Should return None or raise an exception. - return None - - def process_spider_output(self, response, result, spider): - # Called with the results returned from the Spider, after - # it has processed the response. - - # Must return an iterable of Request, dict or Item objects. - for i in result: - yield i - - def process_spider_exception(self, response, exception, spider): - # Called when a spider or process_spider_input() method - # (from other spider middleware) raises an exception. - - # Should return either None or an iterable of Response, dict - # or Item objects. - pass - - def process_start_requests(self, start_requests, spider): - # Called with the start requests of the spider, and works - # similarly to the process_spider_output() method, except - # that it doesn’t have a response associated. - - # Must return only requests (not items). - for r in start_requests: - yield r - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) - - -class XueqiuDownloaderMiddleware(object): - # Not all methods need to be defined. If a method is not defined, - # scrapy acts as if the downloader middleware does not modify the - # passed objects. - - @classmethod - def from_crawler(cls, crawler): - # This method is used by Scrapy to create your spiders. - s = cls() - crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) - return s - - def process_request(self, request, spider): - # Called for each request that goes through the downloader - # middleware. - - # Must either: - # - return None: continue processing this request - # - or return a Response object - # - or return a Request object - # - or raise IgnoreRequest: process_exception() methods of - # installed downloader middleware will be called - return None - - def process_response(self, request, response, spider): - # Called with the response returned from the downloader. - - # Must either; - # - return a Response object - # - return a Request object - # - or raise IgnoreRequest - return response - - def process_exception(self, request, exception, spider): - # Called when a download handler or a process_request() - # (from other downloader middleware) raises an exception. - - # Must either: - # - return None: continue processing this exception - # - return a Response object: stops process_exception() chain - # - return a Request object: stops process_exception() chain - pass - - def spider_opened(self, spider): - spider.logger.info('Spider opened: %s' % spider.name) diff --git a/backend/template/spiders/xueqiu/xueqiu/pipelines.py b/backend/template/spiders/xueqiu/xueqiu/pipelines.py deleted file mode 100644 index 5a7d7cbf..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/pipelines.py +++ /dev/null @@ -1,6 +0,0 @@ -# -*- coding: utf-8 -*- - -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html diff --git a/backend/template/spiders/xueqiu/xueqiu/settings.py b/backend/template/spiders/xueqiu/xueqiu/settings.py deleted file mode 100644 index 1d898e2f..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/settings.py +++ /dev/null @@ -1,89 +0,0 @@ -# -*- coding: utf-8 -*- - -# Scrapy settings for xueqiu project -# -# For simplicity, this file contains only settings considered important or -# commonly used. You can find more settings consulting the documentation: -# -# https://doc.scrapy.org/en/latest/topics/settings.html -# https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# https://doc.scrapy.org/en/latest/topics/spider-middleware.html - -BOT_NAME = 'xueqiu' - -SPIDER_MODULES = ['xueqiu.spiders'] -NEWSPIDER_MODULE = 'xueqiu.spiders' - -# Crawl responsibly by identifying yourself (and your website) on the user-agent -USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' - -# Obey robots.txt rules -ROBOTSTXT_OBEY = False - -# Configure maximum concurrent requests performed by Scrapy (default: 16) -# CONCURRENT_REQUESTS = 32 - -# Configure a delay for requests for the same website (default: 0) -# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay -# See also autothrottle settings and docs -# DOWNLOAD_DELAY = 3 -# The download delay setting will honor only one of: -# CONCURRENT_REQUESTS_PER_DOMAIN = 16 -# CONCURRENT_REQUESTS_PER_IP = 16 - -# Disable cookies (enabled by default) -# COOKIES_ENABLED = False - -# Disable Telnet Console (enabled by default) -# TELNETCONSOLE_ENABLED = False - -# Override the default request headers: -# DEFAULT_REQUEST_HEADERS = { -# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', -# 'Accept-Language': 'en', -# } - -# Enable or disable spider middlewares -# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html -# SPIDER_MIDDLEWARES = { -# 'xueqiu.middlewares.XueqiuSpiderMiddleware': 543, -# } - -# Enable or disable downloader middlewares -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html -# DOWNLOADER_MIDDLEWARES = { -# 'xueqiu.middlewares.XueqiuDownloaderMiddleware': 543, -# } - -# Enable or disable extensions -# See https://doc.scrapy.org/en/latest/topics/extensions.html -# EXTENSIONS = { -# 'scrapy.extensions.telnet.TelnetConsole': None, -# } - -# Configure item pipelines -# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html -ITEM_PIPELINES = { - 'crawlab.pipelines.CrawlabMongoPipeline': 300, -} - -# Enable and configure the AutoThrottle extension (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/autothrottle.html -# AUTOTHROTTLE_ENABLED = True -# The initial download delay -# AUTOTHROTTLE_START_DELAY = 5 -# The maximum download delay to be set in case of high latencies -# AUTOTHROTTLE_MAX_DELAY = 60 -# The average number of requests Scrapy should be sending in parallel to -# each remote server -# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 -# Enable showing throttling stats for every response received: -# AUTOTHROTTLE_DEBUG = False - -# Enable and configure HTTP caching (disabled by default) -# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings -# HTTPCACHE_ENABLED = True -# HTTPCACHE_EXPIRATION_SECS = 0 -# HTTPCACHE_DIR = 'httpcache' -# HTTPCACHE_IGNORE_HTTP_CODES = [] -# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/backend/template/spiders/xueqiu/xueqiu/spiders/__init__.py b/backend/template/spiders/xueqiu/xueqiu/spiders/__init__.py deleted file mode 100644 index ebd689ac..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/backend/template/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py b/backend/template/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py deleted file mode 100644 index a746e156..00000000 --- a/backend/template/spiders/xueqiu/xueqiu/spiders/xueqiu_spider.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -import json -from datetime import datetime -from time import sleep - -import scrapy - -from xueqiu.items import XueqiuItem - - -class XueqiuSpiderSpider(scrapy.Spider): - name = 'xueqiu_spider' - allowed_domains = ['xueqiu.com'] - - def start_requests(self): - return [scrapy.Request( - url='https://xueqiu.com', - callback=self.parse_home - )] - - def parse_home(self, response): - yield scrapy.Request( - url='https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=20&category=6' - ) - - def parse(self, response): - data = json.loads(response.body) - next_max_id = data.get('next_max_id') - sleep(1) - for row in data.get('list'): - d = json.loads(row.get('data')) - item = XueqiuItem( - id=d['id'], - text=d['text'], - mark=d['mark'], - url=d['target'], - created_at=d['created_at'], - ts=datetime.fromtimestamp(d['created_at'] / 1e3), - view_count=d['view_count'], - source='xueqiu' - ) - yield item - - yield scrapy.Request( - url=f'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={next_max_id}&count=20&category=6' - ) diff --git a/backend/template/spiders/xueqiu_config/Spiderfile b/backend/template/spiders/xueqiu_config/Spiderfile deleted file mode 100644 index 0de50e9e..00000000 --- a/backend/template/spiders/xueqiu_config/Spiderfile +++ /dev/null @@ -1,39 +0,0 @@ -name: "xueqiu_config" -display_name: "雪球网(可配置)" -remark: "雪球网新闻,列表" -type: "configurable" -col: "results_xueqiu_config" -engine: scrapy -start_url: https://xueqiu.com/ -start_stage: list -stages: -- name: list - is_list: true - list_css: "" - list_xpath: .//*[contains(@class, "AnonymousHome_home__timeline__item")] - page_css: "" - page_xpath: "" - page_attr: "" - fields: - - name: title - css: h3 > a - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: h3 > a - xpath: "" - attr: href - next_stage: "" - remark: "" - - name: abstract - css: p - xpath: "" - attr: "" - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/template/spiders/zongheng_config/Spiderfile b/backend/template/spiders/zongheng_config/Spiderfile deleted file mode 100644 index 0163fac7..00000000 --- a/backend/template/spiders/zongheng_config/Spiderfile +++ /dev/null @@ -1,45 +0,0 @@ -name: "zongheng_config" -display_name: "纵横(可配置)" -remark: "纵横小说网,列表" -type: "configurable" -col: "results_zongheng_config" -engine: scrapy -start_url: http://www.zongheng.com/rank/details.html?rt=1&d=1 -start_stage: list -stages: -- name: list - is_list: true - list_css: .rank_d_list - list_xpath: "" - page_css: "" - page_xpath: "" - page_attr: href - fields: - - name: title - css: .rank_d_b_name > a - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: url - css: .rank_d_b_name > a - xpath: "" - attr: href - next_stage: "" - remark: "" - - name: abstract - css: body - xpath: "" - attr: "" - next_stage: "" - remark: "" - - name: votes - css: .rank_d_b_ticket - xpath: "" - attr: "" - next_stage: "" - remark: "" -settings: - ROBOTSTXT_OBEY: "false" - USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, - like Gecko) Chrome/78.0.3904.108 Safari/537.36 diff --git a/backend/test/test.http b/backend/test/test.http deleted file mode 100644 index bde72001..00000000 --- a/backend/test/test.http +++ /dev/null @@ -1,49 +0,0 @@ -# For a quick start check out our HTTP Requests collection (Tools|HTTP Client|Open HTTP Requests Collection). -# -# Following HTTP Request Live Templates are available: -# * 'gtrp' and 'gtr' create a GET request with or without query parameters; -# * 'ptr' and 'ptrp' create a POST request with a simple or parameter-like body; -# * 'mptr' and 'fptr' create a POST request to submit a form with a text or file field (multipart/form-data); -PUT http://localhost:8000/schedules -Content-Type: application/json -#Content-Type: application/x-www-form-urlencoded - -{ - "cron": "*/10 * * * * *", - "spider_id": "5d2ead494bdee04810bb7654" -} - -### cron=*/10 * * * * *&spider_id=5d2ead494bdee04810bb7654 - -DELETE http://localhost:8000/schedules/5d31b5334bdee082f6e69e7a - -### - -DELETE http://localhost:8000/spiders/5d31a6ed4bdee07b25d70c7b - -### - - -### - -PUT http://localhost:8000/tasks -Content-Type: application/json - -{ - "spider_id": "5d32f20f4bdee0f4aae526de", - "node_id": "5d3343cc4bdee01cb772883e" -} - -### - -POST http://localhost:8000/spiders/5d32ad224bdee0a60ee5639c/publish - -### - -POST http://localhost:8000/spiders - -### - -GET http://localhost:8000/tasks/bb79ec82-1e8f-41c4-858a-5cb682396409/log - -### diff --git a/backend/utils/array.go b/backend/utils/array.go deleted file mode 100644 index 889430ed..00000000 --- a/backend/utils/array.go +++ /dev/null @@ -1,10 +0,0 @@ -package utils - -func StringArrayContains(arr []string, str string) bool { - for _, s := range arr { - if s == str { - return true - } - } - return false -} diff --git a/backend/utils/chan.go b/backend/utils/chan.go deleted file mode 100644 index c0144340..00000000 --- a/backend/utils/chan.go +++ /dev/null @@ -1,40 +0,0 @@ -package utils - -import ( - "sync" -) - -var TaskExecChanMap = NewChanMap() - -type ChanMap struct { - m sync.Map -} - -func NewChanMap() *ChanMap { - return &ChanMap{m: sync.Map{}} -} - -func (cm *ChanMap) Chan(key string) chan string { - if ch, ok := cm.m.Load(key); ok { - return ch.(interface{}).(chan string) - } - ch := make(chan string, 10) - cm.m.Store(key, ch) - return ch -} - -func (cm *ChanMap) ChanBlocked(key string) chan string { - if ch, ok := cm.m.Load(key); ok { - return ch.(interface{}).(chan string) - } - ch := make(chan string) - cm.m.Store(key, ch) - return ch -} - -func (cm *ChanMap) HasChanKey(key string) bool { - if _, ok := cm.m.Load(key); ok { - return true - } - return false -} diff --git a/backend/utils/chan_test.go b/backend/utils/chan_test.go deleted file mode 100644 index 4bc75917..00000000 --- a/backend/utils/chan_test.go +++ /dev/null @@ -1,78 +0,0 @@ -package utils - -import ( - . "github.com/smartystreets/goconvey/convey" - "sync" - "testing" -) - -func TestNewChanMap(t *testing.T) { - mapTest := sync.Map{} - chanTest := make(chan string) - test := "test" - - Convey("Call NewChanMap to generate ChanMap", t, func() { - mapTest.Store("test", chanTest) - chanMapTest := ChanMap{mapTest} - chanMap := NewChanMap() - chanMap.m.Store("test", chanTest) - - Convey(test, func() { - v1, ok := chanMap.m.Load("test") - So(ok, ShouldBeTrue) - v2, ok := chanMapTest.m.Load("test") - So(ok, ShouldBeTrue) - So(v1, ShouldResemble, v2) - }) - }) -} - -func TestChan(t *testing.T) { - mapTest := sync.Map{} - chanTest := make(chan string) - mapTest.Store("test", chanTest) - chanMapTest := ChanMap{mapTest} - - Convey("Test Chan use exist key", t, func() { - ch1 := chanMapTest.Chan("test") - Convey("ch1 should equal chanTest", func() { - So(ch1, ShouldEqual, chanTest) - }) - }) - Convey("Test Chan use no-exist key", t, func() { - ch2 := chanMapTest.Chan("test2") - Convey("ch2 should equal chanMapTest.m[test2]", func() { - v, ok := chanMapTest.m.Load("test2") - So(ok, ShouldBeTrue) - So(v, ShouldEqual, ch2) - }) - Convey("Cap of chanMapTest.m[test2] should equal 10", func() { - So(10, ShouldEqual, cap(ch2)) - }) - }) -} - -func TestChanBlocked(t *testing.T) { - mapTest := sync.Map{} - chanTest := make(chan string) - mapTest.Store("test", chanTest) - chanMapTest := ChanMap{mapTest} - - Convey("Test Chan use exist key", t, func() { - ch1 := chanMapTest.ChanBlocked("test") - Convey("ch1 should equal chanTest", func() { - So(ch1, ShouldEqual, chanTest) - }) - }) - Convey("Test Chan use no-exist key", t, func() { - ch2 := chanMapTest.ChanBlocked("test2") - Convey("ch2 should equal chanMapTest.m[test2]", func() { - v, ok := chanMapTest.m.Load("test2") - So(ok, ShouldBeTrue) - So(v, ShouldEqual, ch2) - }) - Convey("Cap of chanMapTest.m[test2] should equal 10", func() { - So(0, ShouldEqual, cap(ch2)) - }) - }) -} diff --git a/backend/utils/encrypt.go b/backend/utils/encrypt.go deleted file mode 100644 index 52013b9c..00000000 --- a/backend/utils/encrypt.go +++ /dev/null @@ -1,16 +0,0 @@ -package utils - -import ( - "crypto/hmac" - "crypto/sha256" - "encoding/base64" - "encoding/hex" -) - -func ComputeHmacSha256(message string, secret string) string { - key := []byte(secret) - h := hmac.New(sha256.New, key) - h.Write([]byte(message)) - sha := hex.EncodeToString(h.Sum(nil)) - return base64.StdEncoding.EncodeToString([]byte(sha)) -} diff --git a/backend/utils/file.go b/backend/utils/file.go deleted file mode 100644 index 040b78de..00000000 --- a/backend/utils/file.go +++ /dev/null @@ -1,385 +0,0 @@ -package utils - -import ( - "archive/zip" - "bufio" - "fmt" - "github.com/apex/log" - "io" - "io/ioutil" - "os" - "path" - "path/filepath" - "runtime/debug" - "strings" -) - -// 删除文件 -func RemoveFiles(path string) { - if err := os.RemoveAll(path); err != nil { - log.Errorf("remove files error: %s, path: %s", err.Error(), path) - debug.PrintStack() - } -} - -// 读取文件一行 -func ReadFileOneLine(fileName string) string { - file := OpenFile(fileName) - defer Close(file) - buf := bufio.NewReader(file) - line, err := buf.ReadString('\n') - if err != nil { - log.Errorf("read file error: %s", err.Error()) - return "" - } - return line -} - -func GetSpiderMd5Str(file string) string { - md5Str := ReadFileOneLine(file) - // 去掉空格以及换行符 - md5Str = strings.Replace(md5Str, " ", "", -1) - md5Str = strings.Replace(md5Str, "\n", "", -1) - return md5Str -} - -// 创建文件 -func OpenFile(fileName string) *os.File { - file, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, os.ModePerm) - if err != nil { - log.Errorf("create file error: %s, file_name: %s", err.Error(), fileName) - debug.PrintStack() - return nil - } - return file -} - -// 创建文件夹 -func CreateDirPath(filePath string) { - if !Exists(filePath) { - if err := os.MkdirAll(filePath, os.ModePerm); err != nil { - log.Errorf("create file error: %s, file_path: %s", err.Error(), filePath) - debug.PrintStack() - } - } -} - -// 判断所给路径文件/文件夹是否存在 -func Exists(path string) bool { - _, err := os.Stat(path) //os.Stat获取文件信息 - if err != nil { - return os.IsExist(err) - } - return true -} - -// 判断所给路径是否为文件夹 -func IsDir(path string) bool { - s, err := os.Stat(path) - if err != nil { - return false - } - return s.IsDir() -} - -func ListDir(path string) []os.FileInfo { - list, err := ioutil.ReadDir(path) - if err != nil { - log.Errorf(err.Error()) - debug.PrintStack() - return nil - } - return list -} - -// 判断所给路径是否为文件 -func IsFile(path string) bool { - return !IsDir(path) -} - -/** -@tarFile:压缩文件路径 -@dest:解压文件夹 -*/ -func DeCompressByPath(tarFile, dest string) error { - srcFile, err := os.Open(tarFile) - if err != nil { - return err - } - defer Close(srcFile) - return DeCompress(srcFile, dest) -} - -/** -@zipFile:压缩文件 -@dstPath:解压之后文件保存路径 -*/ -func DeCompress(srcFile *os.File, dstPath string) error { - // 如果保存路径不存在,创建一个 - if !Exists(dstPath) { - if err := os.MkdirAll(dstPath, os.ModePerm); err != nil { - debug.PrintStack() - return err - } - } - - // 读取zip文件 - zipFile, err := zip.OpenReader(srcFile.Name()) - if err != nil { - log.Errorf("Unzip File Error:" + err.Error()) - debug.PrintStack() - return err - } - defer Close(zipFile) - - // 遍历zip内所有文件和目录 - for _, innerFile := range zipFile.File { - // 获取该文件数据 - info := innerFile.FileInfo() - - // 如果是目录,则创建一个 - if info.IsDir() { - err = os.MkdirAll(filepath.Join(dstPath, innerFile.Name), os.ModeDir|os.ModePerm) - if err != nil { - log.Errorf("Unzip File Error : " + err.Error()) - debug.PrintStack() - return err - } - continue - } - - // 如果文件目录不存在,则创建一个 - dirPath := filepath.Join(dstPath, filepath.Dir(innerFile.Name)) - if !Exists(dirPath) { - if err = os.MkdirAll(dirPath, os.ModeDir|os.ModePerm); err != nil { - log.Errorf("Unzip File Error : " + err.Error()) - debug.PrintStack() - return err - } - } - - // 打开该文件 - srcFile, err := innerFile.Open() - if err != nil { - log.Errorf("Unzip File Error : " + err.Error()) - debug.PrintStack() - continue - } - - // 创建新文件 - newFilePath := filepath.Join(dstPath, innerFile.Name) - newFile, err := os.OpenFile(newFilePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, info.Mode()) - if err != nil { - log.Errorf("Unzip File Error : " + err.Error()) - debug.PrintStack() - continue - } - - // 拷贝该文件到新文件中 - if _, err := io.Copy(newFile, srcFile); err != nil { - debug.PrintStack() - return err - } - - // 关闭该文件 - if err := srcFile.Close(); err != nil { - debug.PrintStack() - return err - } - - // 关闭新文件 - if err := newFile.Close(); err != nil { - debug.PrintStack() - return err - } - } - return nil -} - -//压缩文件 -//files 文件数组,可以是不同dir下的文件或者文件夹 -//dest 压缩文件存放地址 -func Compress(files []*os.File, dest string) error { - d, _ := os.Create(dest) - defer Close(d) - w := zip.NewWriter(d) - defer Close(w) - for _, file := range files { - if err := _Compress(file, "", w); err != nil { - return err - } - } - return nil -} - -func _Compress(file *os.File, prefix string, zw *zip.Writer) error { - info, err := file.Stat() - if err != nil { - debug.PrintStack() - return err - } - if info.IsDir() { - prefix = prefix + "/" + info.Name() - fileInfos, err := file.Readdir(-1) - if err != nil { - debug.PrintStack() - return err - } - for _, fi := range fileInfos { - f, err := os.Open(file.Name() + "/" + fi.Name()) - if err != nil { - debug.PrintStack() - return err - } - err = _Compress(f, prefix, zw) - if err != nil { - debug.PrintStack() - return err - } - } - } else { - header, err := zip.FileInfoHeader(info) - if err != nil { - debug.PrintStack() - return err - } - header.Name = prefix + "/" + header.Name - writer, err := zw.CreateHeader(header) - if err != nil { - debug.PrintStack() - return err - } - _, err = io.Copy(writer, file) - Close(file) - if err != nil { - debug.PrintStack() - return err - } - } - return nil -} - -func GetFilesFromDir(dirPath string) ([]*os.File, error) { - var res []*os.File - for _, fInfo := range ListDir(dirPath) { - f, err := os.Open(filepath.Join(dirPath, fInfo.Name())) - if err != nil { - return res, err - } - res = append(res, f) - } - return res, nil -} - -func GetAllFilesFromDir(dirPath string) ([]*os.File, error) { - var res []*os.File - if err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { - if !IsDir(path) { - f, err2 := os.Open(path) - if err2 != nil { - return err - } - res = append(res, f) - } - return nil - }); err != nil { - log.Error(err.Error()) - debug.PrintStack() - return res, err - } - return res, nil -} - -// File copies a single file from src to dst -func CopyFile(src, dst string) error { - var err error - var srcFd *os.File - var dstFd *os.File - var srcInfo os.FileInfo - - if srcFd, err = os.Open(src); err != nil { - return err - } - defer srcFd.Close() - - if dstFd, err = os.Create(dst); err != nil { - return err - } - defer dstFd.Close() - - if _, err = io.Copy(dstFd, srcFd); err != nil { - return err - } - if srcInfo, err = os.Stat(src); err != nil { - return err - } - return os.Chmod(dst, srcInfo.Mode()) -} - -// Dir copies a whole directory recursively -func CopyDir(src string, dst string) error { - var err error - var fds []os.FileInfo - var srcInfo os.FileInfo - - if srcInfo, err = os.Stat(src); err != nil { - return err - } - - if err = os.MkdirAll(dst, srcInfo.Mode()); err != nil { - return err - } - - if fds, err = ioutil.ReadDir(src); err != nil { - return err - } - for _, fd := range fds { - srcfp := path.Join(src, fd.Name()) - dstfp := path.Join(dst, fd.Name()) - - if fd.IsDir() { - if err = CopyDir(srcfp, dstfp); err != nil { - fmt.Println(err) - } - } else { - if err = CopyFile(srcfp, dstfp); err != nil { - fmt.Println(err) - } - } - } - return nil -} - -// 设置文件变量值 -// 可以理解为将文件中的变量占位符替换为想要设置的值 -func SetFileVariable(filePath string, key string, value string) error { - // 占位符标识 - sep := "###" - - // 读取文件到字节 - contentBytes, err := ioutil.ReadFile(filePath) - if err != nil { - return err - } - - // 将字节转化为文本 - content := string(contentBytes) - - // 替换文本 - content = strings.Replace(content, fmt.Sprintf("%s%s%s", sep, key, sep), value, -1) - - // 打开文件 - f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_TRUNC, 0777) - if err != nil { - return err - } - - // 将替换后的内容写入文件 - if _, err := f.Write([]byte(content)); err != nil { - return err - } - - f.Close() - - return nil -} diff --git a/backend/utils/file_test.go b/backend/utils/file_test.go deleted file mode 100644 index 4af32d0d..00000000 --- a/backend/utils/file_test.go +++ /dev/null @@ -1,129 +0,0 @@ -package utils - -import ( - "archive/zip" - . "github.com/smartystreets/goconvey/convey" - "io" - "log" - "os" - "runtime/debug" - "testing" -) - -func TestExists(t *testing.T) { - var pathString = "../config" - var wrongPathString = "test" - - Convey("Test path or file is Exists or not", t, func() { - res := Exists(pathString) - Convey("The result should be true", func() { - So(res, ShouldEqual, true) - }) - wrongRes := Exists(wrongPathString) - Convey("The result should be false", func() { - So(wrongRes, ShouldEqual, false) - }) - }) -} - -func TestIsDir(t *testing.T) { - var pathString = "../config" - var fileString = "../config/config.go" - var wrongString = "test" - - Convey("Test path is folder or not", t, func() { - res := IsDir(pathString) - So(res, ShouldEqual, true) - fileRes := IsDir(fileString) - So(fileRes, ShouldEqual, false) - wrongRes := IsDir(wrongString) - So(wrongRes, ShouldEqual, false) - }) -} - -func TestCompress(t *testing.T) { - err := os.Mkdir("testCompress", os.ModePerm) - if err != nil { - t.Error("create testCompress failed") - } - var pathString = "testCompress" - var files []*os.File - var disPath = "testCompress" - file, err := os.Open(pathString) - if err != nil { - t.Error("open source path failed") - } - files = append(files, file) - Convey("Verify dispath is valid path", t, func() { - er := Compress(files, disPath) - Convey("err should be nil", func() { - So(er, ShouldEqual, nil) - }) - }) - _ = os.RemoveAll("testCompress") - -} -func Zip(zipFile string, fileList []string) error { - // 创建 zip 包文件 - fw, err := os.Create(zipFile) - if err != nil { - log.Fatal() - } - defer Close(fw) - - // 实例化新的 zip.Writer - zw := zip.NewWriter(fw) - defer Close(zw) - - for _, fileName := range fileList { - fr, err := os.Open(fileName) - if err != nil { - return err - } - fi, err := fr.Stat() - if err != nil { - return err - } - // 写入文件的头信息 - fh, err := zip.FileInfoHeader(fi) - if err != nil { - return err - } - w, err := zw.CreateHeader(fh) - if err != nil { - return err - } - // 写入文件内容 - _, err = io.Copy(w, fr) - if err != nil { - return err - } - } - return nil -} - -func TestDeCompress(t *testing.T) { - err := os.Mkdir("testDeCompress", os.ModePerm) - if err != nil { - t.Error(err) - - } - err = Zip("demo.zip", []string{}) - if err != nil { - t.Error("create zip file failed") - } - tmpFile, err := os.OpenFile("demo.zip", os.O_RDONLY, 0777) - if err != nil { - debug.PrintStack() - t.Error("open demo.zip failed") - } - var dstPath = "./testDeCompress" - Convey("Test DeCopmress func", t, func() { - - err := DeCompress(tmpFile, dstPath) - So(err, ShouldEqual, nil) - }) - _ = os.RemoveAll("testDeCompress") - _ = os.Remove("demo.zip") - -} diff --git a/backend/utils/helpers.go b/backend/utils/helpers.go deleted file mode 100644 index e181c66c..00000000 --- a/backend/utils/helpers.go +++ /dev/null @@ -1,60 +0,0 @@ -package utils - -import ( - "crawlab/entity" - "encoding/json" - "github.com/apex/log" - "github.com/gomodule/redigo/redis" - "io" - "reflect" - "runtime/debug" - "unsafe" -) - -func BytesToString(b []byte) string { - return *(*string)(unsafe.Pointer(&b)) -} - -func GetJson(message entity.NodeMessage) string { - msgBytes, err := json.Marshal(&message) - if err != nil { - log.Errorf("node message to json error: %s", err.Error()) - debug.PrintStack() - return "" - } - return BytesToString(msgBytes) -} - -func GetMessage(message redis.Message) *entity.NodeMessage { - msg := entity.NodeMessage{} - if err := json.Unmarshal(message.Data, &msg); err != nil { - log.Errorf("message byte to object error: %s", err.Error()) - debug.PrintStack() - return nil - } - return &msg -} - -func Close(c io.Closer) { - err := c.Close() - if err != nil { - //log.WithError(err).Error("关闭资源文件失败。") - } -} - -func Contains(array interface{}, val interface{}) (fla bool) { - fla = false - switch reflect.TypeOf(array).Kind() { - case reflect.Slice: - { - s := reflect.ValueOf(array) - for i := 0; i < s.Len(); i++ { - if reflect.DeepEqual(val, s.Index(i).Interface()) { - fla = true - return - } - } - } - } - return -} diff --git a/backend/utils/model.go b/backend/utils/model.go deleted file mode 100644 index 048b0001..00000000 --- a/backend/utils/model.go +++ /dev/null @@ -1,24 +0,0 @@ -package utils - -import ( - "crawlab/constants" - "encoding/json" - "github.com/globalsign/mgo/bson" - "strings" -) - -func IsObjectIdNull(id bson.ObjectId) bool { - return id.Hex() == constants.ObjectIdNull -} - -func InterfaceToString(value interface{}) string { - bytes, err := json.Marshal(value) - if err != nil { - return "" - } - str := string(bytes) - if strings.HasPrefix(str, "\"") && strings.HasSuffix(str, "\"") { - str = str[1 : len(str)-1] - } - return str -} diff --git a/backend/utils/model_test.go b/backend/utils/model_test.go deleted file mode 100644 index d641865c..00000000 --- a/backend/utils/model_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package utils - -import ( - "github.com/globalsign/mgo/bson" - . "github.com/smartystreets/goconvey/convey" - "strconv" - "testing" - "time" -) - -func TestIsObjectIdNull(t *testing.T) { - var id bson.ObjectId = "123455" - Convey("Test Object ID is null or not", t, func() { - res := IsObjectIdNull(id) - So(res, ShouldEqual, false) - }) -} - -func TestInterfaceToString(t *testing.T) { - var valueBson bson.ObjectId = "12345" - var valueString = "12345" - var valueInt = 12345 - var valueTime = time.Now().Add(60 * time.Second) - var valueOther = []string{"a", "b"} - - Convey("Test InterfaceToString", t, func() { - resBson := InterfaceToString(valueBson) - Convey("resBson should be string value", func() { - So(resBson, ShouldEqual, valueBson.Hex()) - }) - resString := InterfaceToString(valueString) - Convey("resString should be string value", func() { - So(resString, ShouldEqual, valueString) - }) - resInt := InterfaceToString(valueInt) - Convey("resInt should be string value", func() { - So(resInt, ShouldEqual, strconv.Itoa(valueInt)) - }) - resTime := InterfaceToString(valueTime) - Convey("resTime should be string value", func() { - So(resTime, ShouldEqual, valueTime.String()) - }) - resOther := InterfaceToString(valueOther) - Convey("resOther should be empty string", func() { - So(resOther, ShouldEqual, "") - }) - }) - -} diff --git a/backend/utils/rpc.go b/backend/utils/rpc.go deleted file mode 100644 index 03414199..00000000 --- a/backend/utils/rpc.go +++ /dev/null @@ -1,14 +0,0 @@ -package utils - -import "encoding/json" - -// Object 转化为 String -func ObjectToString(params interface{}) string { - bytes, _ := json.Marshal(params) - return BytesToString(bytes) -} - -// 获取 RPC 参数 -func GetRpcParam(key string, params map[string]string) string { - return params[key] -} diff --git a/backend/utils/spider.go b/backend/utils/spider.go deleted file mode 100644 index 4484ccf0..00000000 --- a/backend/utils/spider.go +++ /dev/null @@ -1,8 +0,0 @@ -package utils - -func GetSpiderCol(col string, name string) string { - if col == "" { - return "results_" + name - } - return col -} diff --git a/backend/utils/system.go b/backend/utils/system.go deleted file mode 100644 index e6d2f591..00000000 --- a/backend/utils/system.go +++ /dev/null @@ -1,149 +0,0 @@ -package utils - -import ( - "crawlab/constants" - "crawlab/entity" - "encoding/json" - "github.com/apex/log" - "github.com/spf13/viper" - "io/ioutil" - "path" - "runtime/debug" - "strings" -) - -func GetLangList() []entity.Lang { - list := []entity.Lang{ - // 语言 - { - Name: "Python", - ExecutableName: "python", - ExecutablePaths: []string{"/usr/bin/python", "/usr/local/bin/python"}, - DepExecutablePath: "/usr/local/bin/pip", - LockPath: "/tmp/install-python.lock", - DepFileName: "requirements.txt", - InstallDepArgs: "install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt", - Type: constants.LangTypeLang, - }, - { - Name: "Node.js", - ExecutableName: "node", - ExecutablePaths: []string{"/usr/bin/node", "/usr/local/bin/node"}, - DepExecutablePath: "/usr/local/bin/npm", - LockPath: "/tmp/install-nodejs.lock", - InstallScript: "install-nodejs.sh", - DepFileName: "package.json", - InstallDepArgs: "install -g --registry=https://registry.npm.taobao.org", - Type: constants.LangTypeLang, - }, - { - Name: "Java", - ExecutableName: "java", - ExecutablePaths: []string{"/usr/bin/java", "/usr/local/bin/java"}, - LockPath: "/tmp/install-java.lock", - InstallScript: "install-java.sh", - Type: constants.LangTypeLang, - }, - { - Name: ".Net Core", - ExecutableName: "dotnet", - ExecutablePaths: []string{"/usr/bin/dotnet", "/usr/local/bin/dotnet"}, - LockPath: "/tmp/install-dotnet.lock", - InstallScript: "install-dotnet.sh", - Type: constants.LangTypeLang, - }, - { - Name: "PHP", - ExecutableName: "php", - ExecutablePaths: []string{"/usr/bin/php", "/usr/local/bin/php"}, - LockPath: "/tmp/install-php.lock", - InstallScript: "install-php.sh", - Type: constants.LangTypeLang, - }, - { - Name: "Golang", - ExecutableName: "go", - ExecutablePaths: []string{"/usr/bin/go", "/usr/local/bin/go"}, - LockPath: "/tmp/install-go.lock", - InstallScript: "install-go.sh", - Type: constants.LangTypeLang, - }, - // WebDriver - { - Name: "Chrome Driver", - ExecutableName: "chromedriver", - ExecutablePaths: []string{"/usr/bin/chromedriver", "/usr/local/bin/chromedriver"}, - LockPath: "/tmp/install-chromedriver.lock", - InstallScript: "install-chromedriver.sh", - Type: constants.LangTypeWebDriver, - }, - { - Name: "Firefox", - ExecutableName: "firefox", - ExecutablePaths: []string{"/usr/bin/firefox", "/usr/local/bin/firefox"}, - LockPath: "/tmp/install-firefox.lock", - InstallScript: "install-firefox.sh", - Type: constants.LangTypeWebDriver, - }, - } - return list -} - -// 获取语言列表 -func GetLangListPlain() []entity.Lang { - list := GetLangList() - return list -} - -// 根据语言名获取语言实例,不包含状态 -func GetLangFromLangNamePlain(name string) entity.Lang { - langList := GetLangListPlain() - for _, lang := range langList { - if lang.ExecutableName == name { - return lang - } - } - return entity.Lang{} -} - -func GetPackageJsonDeps(filepath string) (deps []string, err error) { - data, err := ioutil.ReadFile(filepath) - if err != nil { - log.Errorf("get package.json deps error: " + err.Error()) - debug.PrintStack() - return deps, err - } - var packageJson entity.PackageJson - if err := json.Unmarshal(data, &packageJson); err != nil { - log.Errorf("get package.json deps error: " + err.Error()) - debug.PrintStack() - return deps, err - } - - for d, v := range packageJson.Dependencies { - deps = append(deps, d+"@"+v) - } - - return deps, nil -} - -// 获取系统脚本列表 -func GetSystemScripts() (res []string) { - scriptsPath := viper.GetString("server.scripts") - for _, fInfo := range ListDir(scriptsPath) { - if !fInfo.IsDir() && strings.HasSuffix(fInfo.Name(), ".sh") { - res = append(res, fInfo.Name()) - } - } - return res -} - -func GetSystemScriptPath(scriptName string) string { - scriptsPath := viper.GetString("server.scripts") - for _, name := range GetSystemScripts() { - if name == scriptName { - return path.Join(scriptsPath, name) - } - } - return "" -} diff --git a/backend/utils/time.go b/backend/utils/time.go deleted file mode 100644 index 84b40f4e..00000000 --- a/backend/utils/time.go +++ /dev/null @@ -1,16 +0,0 @@ -package utils - -import "time" - -func GetLocalTime(t time.Time) time.Time { - return t.In(time.Local) -} - -func GetTimeString(t time.Time) string { - return t.Format("2006-01-02 15:04:05") -} - -func GetLocalTimeString(t time.Time) string { - t = GetLocalTime(t) - return GetTimeString(t) -} diff --git a/backend/utils/user.go b/backend/utils/user.go deleted file mode 100644 index 46933f9e..00000000 --- a/backend/utils/user.go +++ /dev/null @@ -1,14 +0,0 @@ -package utils - -import ( - "crypto/md5" - "fmt" - "io" -) - -func EncryptPassword(str string) string { - w := md5.New() - _, _ = io.WriteString(w, str) - md5str := fmt.Sprintf("%x", w.Sum(nil)) - return md5str -} diff --git a/backend/utils/user_test.go b/backend/utils/user_test.go deleted file mode 100644 index 68cf4d65..00000000 --- a/backend/utils/user_test.go +++ /dev/null @@ -1,14 +0,0 @@ -package utils - -import ( - . "github.com/smartystreets/goconvey/convey" - "testing" -) - -func TestEncryptPassword(t *testing.T) { - var passwd = "test" - Convey("Test EncryptPassword", t, func() { - res := EncryptPassword(passwd) - t.Log(res) - }) -}