添加demo爬虫

This commit is contained in:
marvzhang
2020-02-02 22:56:11 +08:00
parent 19b2dc1086
commit 98e5ead285
10 changed files with 130 additions and 61 deletions

View File

@@ -14,7 +14,10 @@ import (
"github.com/globalsign/mgo/bson"
"github.com/satori/go.uuid"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
"io/ioutil"
"os"
"path"
"path/filepath"
"runtime/debug"
)
@@ -264,5 +267,80 @@ func InitSpiderService() error {
// 启动定时任务
c.Start()
if model.IsMaster() {
// 添加Demo爬虫
templateSpidersDir := "../spiders"
for _, info := range utils.ListDir(templateSpidersDir) {
if !info.IsDir() {
continue
}
spiderName := info.Name()
// 如果爬虫在数据库中不存在,则添加
spider := model.GetSpiderByName(spiderName)
if spider.Name != "" {
// 存在同名爬虫,跳过
continue
}
// 拷贝爬虫
templateSpiderPath := path.Join(templateSpidersDir, spiderName)
spiderPath := path.Join(viper.GetString("spider.path"), spiderName)
if utils.Exists(spiderPath) {
utils.RemoveFiles(spiderPath)
}
if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil {
log.Errorf("copy error: " + err.Error())
debug.PrintStack()
continue
}
// 构造配置数据
configData := entity.ConfigSpiderData{}
// 读取YAML文件
yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile"))
if err != nil {
log.Errorf("read yaml error: " + err.Error())
//debug.PrintStack()
continue
}
// 反序列化
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
log.Errorf("unmarshal error: " + err.Error())
debug.PrintStack()
continue
}
// 添加该爬虫到数据库
spider = model.Spider{
Id: bson.NewObjectId(),
Name: configData.Name,
DisplayName: configData.DisplayName,
Type: constants.Customized,
Col: configData.Col,
Cmd: configData.Cmd,
Src: spiderPath,
Remark: configData.Remark,
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
}
if err := spider.Add(); err != nil {
log.Errorf("add spider error: " + err.Error())
debug.PrintStack()
continue
}
// 上传爬虫到GridFS
if err := UploadSpiderToGridFsFromMaster(spider); err != nil {
log.Errorf("upload spider error: " + err.Error())
debug.PrintStack()
continue
}
}
}
return nil
}