配置加载demo爬虫

This commit is contained in:
marvzhang
2020-03-11 08:16:52 +08:00
parent 491c0f3b07
commit c95d5cbe36
4 changed files with 120 additions and 109 deletions

View File

@@ -43,6 +43,7 @@ setting:
allowRegister: "N"
enableTutorial: "N"
runOnMaster: "Y"
demoSpiders: "N"
notification:
mail:
server: ''

View File

@@ -7,9 +7,10 @@ import (
)
type SettingBody struct {
AllowRegister string `json:"allow_register"`
EnableTutorial string `json:"enable_tutorial"`
RunOnMaster string `json:"run_on_master"`
AllowRegister string `json:"allow_register"`
EnableTutorial string `json:"enable_tutorial"`
RunOnMaster string `json:"run_on_master"`
EnableDemoSpiders string `json:"enable_demo_spiders"`
}
func GetVersion(c *gin.Context) {
@@ -24,9 +25,10 @@ func GetVersion(c *gin.Context) {
func GetSetting(c *gin.Context) {
body := SettingBody{
AllowRegister: viper.GetString("setting.allowRegister"),
EnableTutorial: viper.GetString("setting.enableTutorial"),
RunOnMaster: viper.GetString("setting.runOnMaster"),
AllowRegister: viper.GetString("setting.allowRegister"),
EnableTutorial: viper.GetString("setting.enableTutorial"),
RunOnMaster: viper.GetString("setting.runOnMaster"),
EnableDemoSpiders: viper.GetString("setting.enableDemoSpiders"),
}
c.JSON(http.StatusOK, Response{

View File

@@ -412,6 +412,111 @@ func CopySpider(spider model.Spider, newName string) error {
return nil
}
func InitDemoSpiders () {
// 添加Demo爬虫
templateSpidersDir := "./template/spiders"
for _, info := range utils.ListDir(templateSpidersDir) {
if !info.IsDir() {
continue
}
spiderName := info.Name()
// 如果爬虫在数据库中不存在,则添加
spider := model.GetSpiderByName(spiderName)
if spider.Name != "" {
// 存在同名爬虫,跳过
continue
}
// 拷贝爬虫
templateSpiderPath := path.Join(templateSpidersDir, spiderName)
spiderPath := path.Join(viper.GetString("spider.path"), spiderName)
if utils.Exists(spiderPath) {
utils.RemoveFiles(spiderPath)
}
if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil {
log.Errorf("copy error: " + err.Error())
debug.PrintStack()
continue
}
// 构造配置数据
configData := entity.ConfigSpiderData{}
// 读取YAML文件
yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile"))
if err != nil {
log.Errorf("read yaml error: " + err.Error())
//debug.PrintStack()
continue
}
// 反序列化
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
log.Errorf("unmarshal error: " + err.Error())
debug.PrintStack()
continue
}
if configData.Type == constants.Customized {
// 添加该爬虫到数据库
spider = model.Spider{
Id: bson.NewObjectId(),
Name: spiderName,
DisplayName: configData.DisplayName,
Type: constants.Customized,
Col: configData.Col,
Src: spiderPath,
Remark: configData.Remark,
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
Cmd: configData.Cmd,
}
if err := spider.Add(); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
// 上传爬虫到GridFS
if err := UploadSpiderToGridFsFromMaster(spider); err != nil {
log.Errorf("upload spider error: " + err.Error())
debug.PrintStack()
continue
}
} else if configData.Type == constants.Configurable || configData.Type == "config" {
// 添加该爬虫到数据库
spider = model.Spider{
Id: bson.NewObjectId(),
Name: configData.Name,
DisplayName: configData.DisplayName,
Type: constants.Configurable,
Col: configData.Col,
Src: spiderPath,
Remark: configData.Remark,
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
Config: configData,
}
if err := spider.Add(); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
// 根据序列化后的数据处理爬虫文件
if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
}
}
// 发布所有爬虫
PublishAllSpiders()
}
// 启动爬虫服务
func InitSpiderService() error {
// 构造定时任务执行器
@@ -423,110 +528,12 @@ func InitSpiderService() error {
// 启动定时任务
cPub.Start()
if model.IsMaster() && viper.GetString("setting.demoSpiders") == "Y" {
// 初始化Demo爬虫
InitDemoSpiders()
}
if model.IsMaster() {
// 添加Demo爬虫
templateSpidersDir := "./template/spiders"
for _, info := range utils.ListDir(templateSpidersDir) {
if !info.IsDir() {
continue
}
spiderName := info.Name()
// 如果爬虫在数据库中不存在,则添加
spider := model.GetSpiderByName(spiderName)
if spider.Name != "" {
// 存在同名爬虫,跳过
continue
}
// 拷贝爬虫
templateSpiderPath := path.Join(templateSpidersDir, spiderName)
spiderPath := path.Join(viper.GetString("spider.path"), spiderName)
if utils.Exists(spiderPath) {
utils.RemoveFiles(spiderPath)
}
if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil {
log.Errorf("copy error: " + err.Error())
debug.PrintStack()
continue
}
// 构造配置数据
configData := entity.ConfigSpiderData{}
// 读取YAML文件
yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile"))
if err != nil {
log.Errorf("read yaml error: " + err.Error())
//debug.PrintStack()
continue
}
// 反序列化
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
log.Errorf("unmarshal error: " + err.Error())
debug.PrintStack()
continue
}
if configData.Type == constants.Customized {
// 添加该爬虫到数据库
spider = model.Spider{
Id: bson.NewObjectId(),
Name: spiderName,
DisplayName: configData.DisplayName,
Type: constants.Customized,
Col: configData.Col,
Src: spiderPath,
Remark: configData.Remark,
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
Cmd: configData.Cmd,
}
if err := spider.Add(); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
// 上传爬虫到GridFS
if err := UploadSpiderToGridFsFromMaster(spider); err != nil {
log.Errorf("upload spider error: " + err.Error())
debug.PrintStack()
continue
}
} else if configData.Type == constants.Configurable || configData.Type == "config" {
// 添加该爬虫到数据库
spider = model.Spider{
Id: bson.NewObjectId(),
Name: configData.Name,
DisplayName: configData.DisplayName,
Type: constants.Configurable,
Col: configData.Col,
Src: spiderPath,
Remark: configData.Remark,
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
Config: configData,
}
if err := spider.Add(); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
// 根据序列化后的数据处理爬虫文件
if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
}
}
// 发布所有爬虫
PublishAllSpiders()
// 构造 Git 定时任务
GitCron = &GitCronScheduler{
cron: cron.New(cron.WithSeconds()),

View File

@@ -29,6 +29,7 @@ services:
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
# CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务
# CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址
# CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口
# CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱