From c95d5cbe3629583fb03f622b62cf31d10fce25c5 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Wed, 11 Mar 2020 08:16:52 +0800 Subject: [PATCH] =?UTF-8?q?=E9=85=8D=E7=BD=AE=E5=8A=A0=E8=BD=BDdemo?= =?UTF-8?q?=E7=88=AC=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/conf/config.yml | 1 + backend/routes/setting.go | 14 +-- backend/services/spider.go | 213 +++++++++++++++++++------------------ docker-compose.yml | 1 + 4 files changed, 120 insertions(+), 109 deletions(-) diff --git a/backend/conf/config.yml b/backend/conf/config.yml index fa925b62..bbd9d16b 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -43,6 +43,7 @@ setting: allowRegister: "N" enableTutorial: "N" runOnMaster: "Y" + demoSpiders: "N" notification: mail: server: '' diff --git a/backend/routes/setting.go b/backend/routes/setting.go index 66ee7128..36bc46ca 100644 --- a/backend/routes/setting.go +++ b/backend/routes/setting.go @@ -7,9 +7,10 @@ import ( ) type SettingBody struct { - AllowRegister string `json:"allow_register"` - EnableTutorial string `json:"enable_tutorial"` - RunOnMaster string `json:"run_on_master"` + AllowRegister string `json:"allow_register"` + EnableTutorial string `json:"enable_tutorial"` + RunOnMaster string `json:"run_on_master"` + EnableDemoSpiders string `json:"enable_demo_spiders"` } func GetVersion(c *gin.Context) { @@ -24,9 +25,10 @@ func GetVersion(c *gin.Context) { func GetSetting(c *gin.Context) { body := SettingBody{ - AllowRegister: viper.GetString("setting.allowRegister"), - EnableTutorial: viper.GetString("setting.enableTutorial"), - RunOnMaster: viper.GetString("setting.runOnMaster"), + AllowRegister: viper.GetString("setting.allowRegister"), + EnableTutorial: viper.GetString("setting.enableTutorial"), + RunOnMaster: viper.GetString("setting.runOnMaster"), + EnableDemoSpiders: viper.GetString("setting.enableDemoSpiders"), } c.JSON(http.StatusOK, Response{ diff --git a/backend/services/spider.go b/backend/services/spider.go index 392b0488..77bb19ab 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -412,6 +412,111 @@ func CopySpider(spider model.Spider, newName string) error { return nil } +func InitDemoSpiders () { + // 添加Demo爬虫 + templateSpidersDir := "./template/spiders" + for _, info := range utils.ListDir(templateSpidersDir) { + if !info.IsDir() { + continue + } + spiderName := info.Name() + + // 如果爬虫在数据库中不存在,则添加 + spider := model.GetSpiderByName(spiderName) + if spider.Name != "" { + // 存在同名爬虫,跳过 + continue + } + + // 拷贝爬虫 + templateSpiderPath := path.Join(templateSpidersDir, spiderName) + spiderPath := path.Join(viper.GetString("spider.path"), spiderName) + if utils.Exists(spiderPath) { + utils.RemoveFiles(spiderPath) + } + if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil { + log.Errorf("copy error: " + err.Error()) + debug.PrintStack() + continue + } + + // 构造配置数据 + configData := entity.ConfigSpiderData{} + + // 读取YAML文件 + yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile")) + if err != nil { + log.Errorf("read yaml error: " + err.Error()) + //debug.PrintStack() + continue + } + + // 反序列化 + if err := yaml.Unmarshal(yamlFile, &configData); err != nil { + log.Errorf("unmarshal error: " + err.Error()) + debug.PrintStack() + continue + } + + if configData.Type == constants.Customized { + // 添加该爬虫到数据库 + spider = model.Spider{ + Id: bson.NewObjectId(), + Name: spiderName, + DisplayName: configData.DisplayName, + Type: constants.Customized, + Col: configData.Col, + Src: spiderPath, + Remark: configData.Remark, + ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), + FileId: bson.ObjectIdHex(constants.ObjectIdNull), + Cmd: configData.Cmd, + } + if err := spider.Add(); err != nil { + log.Errorf("add spider error: " + err.Error()) + debug.PrintStack() + continue + } + + // 上传爬虫到GridFS + if err := UploadSpiderToGridFsFromMaster(spider); err != nil { + log.Errorf("upload spider error: " + err.Error()) + debug.PrintStack() + continue + } + } else if configData.Type == constants.Configurable || configData.Type == "config" { + // 添加该爬虫到数据库 + spider = model.Spider{ + Id: bson.NewObjectId(), + Name: configData.Name, + DisplayName: configData.DisplayName, + Type: constants.Configurable, + Col: configData.Col, + Src: spiderPath, + Remark: configData.Remark, + ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), + FileId: bson.ObjectIdHex(constants.ObjectIdNull), + Config: configData, + } + if err := spider.Add(); err != nil { + log.Errorf("add spider error: " + err.Error()) + debug.PrintStack() + continue + } + + // 根据序列化后的数据处理爬虫文件 + if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil { + log.Errorf("add spider error: " + err.Error()) + debug.PrintStack() + continue + } + } + } + + // 发布所有爬虫 + PublishAllSpiders() +} + // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 @@ -423,110 +528,12 @@ func InitSpiderService() error { // 启动定时任务 cPub.Start() + if model.IsMaster() && viper.GetString("setting.demoSpiders") == "Y" { + // 初始化Demo爬虫 + InitDemoSpiders() + } + if model.IsMaster() { - // 添加Demo爬虫 - templateSpidersDir := "./template/spiders" - for _, info := range utils.ListDir(templateSpidersDir) { - if !info.IsDir() { - continue - } - spiderName := info.Name() - - // 如果爬虫在数据库中不存在,则添加 - spider := model.GetSpiderByName(spiderName) - if spider.Name != "" { - // 存在同名爬虫,跳过 - continue - } - - // 拷贝爬虫 - templateSpiderPath := path.Join(templateSpidersDir, spiderName) - spiderPath := path.Join(viper.GetString("spider.path"), spiderName) - if utils.Exists(spiderPath) { - utils.RemoveFiles(spiderPath) - } - if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil { - log.Errorf("copy error: " + err.Error()) - debug.PrintStack() - continue - } - - // 构造配置数据 - configData := entity.ConfigSpiderData{} - - // 读取YAML文件 - yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile")) - if err != nil { - log.Errorf("read yaml error: " + err.Error()) - //debug.PrintStack() - continue - } - - // 反序列化 - if err := yaml.Unmarshal(yamlFile, &configData); err != nil { - log.Errorf("unmarshal error: " + err.Error()) - debug.PrintStack() - continue - } - - if configData.Type == constants.Customized { - // 添加该爬虫到数据库 - spider = model.Spider{ - Id: bson.NewObjectId(), - Name: spiderName, - DisplayName: configData.DisplayName, - Type: constants.Customized, - Col: configData.Col, - Src: spiderPath, - Remark: configData.Remark, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - Cmd: configData.Cmd, - } - if err := spider.Add(); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - - // 上传爬虫到GridFS - if err := UploadSpiderToGridFsFromMaster(spider); err != nil { - log.Errorf("upload spider error: " + err.Error()) - debug.PrintStack() - continue - } - } else if configData.Type == constants.Configurable || configData.Type == "config" { - // 添加该爬虫到数据库 - spider = model.Spider{ - Id: bson.NewObjectId(), - Name: configData.Name, - DisplayName: configData.DisplayName, - Type: constants.Configurable, - Col: configData.Col, - Src: spiderPath, - Remark: configData.Remark, - ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), - FileId: bson.ObjectIdHex(constants.ObjectIdNull), - Config: configData, - } - if err := spider.Add(); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - - // 根据序列化后的数据处理爬虫文件 - if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil { - log.Errorf("add spider error: " + err.Error()) - debug.PrintStack() - continue - } - } - } - - // 发布所有爬虫 - PublishAllSpiders() - // 构造 Git 定时任务 GitCron = &GitCronScheduler{ cron: cron.New(cron.WithSeconds()), diff --git a/docker-compose.yml b/docker-compose.yml index e937c9f5..e9cb08cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,7 @@ services: # CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册 # CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程 # CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务 + # CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫 # CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址 # CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口 # CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱