mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-27 17:50:53 +01:00
添加demo爬虫
This commit is contained in:
@@ -1,12 +1,22 @@
|
||||
package entity
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
// 通用
|
||||
Name string `yaml:"name" json:"name"`
|
||||
DisplayName string `yaml:"display_name" json:"display_name"`
|
||||
Col string `yaml:"col" json:"col"`
|
||||
Remark string `yaml:"remark" json:"remark"`
|
||||
|
||||
// 可配置爬虫
|
||||
Version string `yaml:"version" json:"version"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages []Stage `yaml:"stages" json:"stages"`
|
||||
Settings map[string]string `yaml:"settings" json:"settings"`
|
||||
|
||||
// 自定义爬虫
|
||||
Cmd string `yaml:"cmd" json:"cmd"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
|
||||
@@ -39,7 +39,6 @@ func main() {
|
||||
log.SetLevelFromString(logLevel)
|
||||
}
|
||||
log.Info("initialized log config successfully")
|
||||
|
||||
if viper.GetString("log.isDeletePeriodically") == "Y" {
|
||||
err := services.InitDeleteLogPeriodically()
|
||||
if err != nil {
|
||||
@@ -74,8 +73,24 @@ func main() {
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized schedule successfully")
|
||||
|
||||
// 初始化用户服务
|
||||
if err := services.InitUserService(); err != nil {
|
||||
log.Error("init user service error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized user service successfully")
|
||||
|
||||
// 初始化依赖服务
|
||||
if err := services.InitDepsFetcher(); err != nil {
|
||||
log.Error("init dependency fetcher error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized dependency fetcher successfully")
|
||||
}
|
||||
log.Info("initialized schedule successfully")
|
||||
|
||||
// 初始化任务执行器
|
||||
if err := services.InitTaskExecutor(); err != nil {
|
||||
@@ -100,22 +115,6 @@ func main() {
|
||||
}
|
||||
log.Info("initialized spider service successfully")
|
||||
|
||||
// 初始化用户服务
|
||||
if err := services.InitUserService(); err != nil {
|
||||
log.Error("init user service error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized user service successfully")
|
||||
|
||||
// 初始化依赖服务
|
||||
if err := services.InitDepsFetcher(); err != nil {
|
||||
log.Error("init dependency fetcher error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized dependency fetcher successfully")
|
||||
|
||||
// 初始化RPC服务
|
||||
if err := services.InitRpcService(); err != nil {
|
||||
log.Error("init rpc service error:" + err.Error())
|
||||
|
||||
@@ -168,7 +168,7 @@ func GetSpiderByName(name string) Spider {
|
||||
defer s.Close()
|
||||
|
||||
var result Spider
|
||||
if err := c.Find(bson.M{"name": name}).One(&result); err != nil {
|
||||
if err := c.Find(bson.M{"name": name}).One(&result); err != nil && err != mgo.ErrNotFound {
|
||||
log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name)
|
||||
//debug.PrintStack()
|
||||
return result
|
||||
|
||||
@@ -123,6 +123,24 @@ func DeleteProject(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 获取相关的爬虫
|
||||
var spiders []model.Spider
|
||||
s, col := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
if err := col.Find(bson.M{"project_id": bson.ObjectIdHex(id)}).All(&spiders); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 将爬虫的项目ID置空
|
||||
for _, spider := range spiders {
|
||||
spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull)
|
||||
if err := spider.Save(); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
|
||||
@@ -14,7 +14,10 @@ import (
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
)
|
||||
@@ -264,5 +267,80 @@ func InitSpiderService() error {
|
||||
// 启动定时任务
|
||||
c.Start()
|
||||
|
||||
if model.IsMaster() {
|
||||
// 添加Demo爬虫
|
||||
templateSpidersDir := "../spiders"
|
||||
for _, info := range utils.ListDir(templateSpidersDir) {
|
||||
if !info.IsDir() {
|
||||
continue
|
||||
}
|
||||
spiderName := info.Name()
|
||||
|
||||
// 如果爬虫在数据库中不存在,则添加
|
||||
spider := model.GetSpiderByName(spiderName)
|
||||
if spider.Name != "" {
|
||||
// 存在同名爬虫,跳过
|
||||
continue
|
||||
}
|
||||
|
||||
// 拷贝爬虫
|
||||
templateSpiderPath := path.Join(templateSpidersDir, spiderName)
|
||||
spiderPath := path.Join(viper.GetString("spider.path"), spiderName)
|
||||
if utils.Exists(spiderPath) {
|
||||
utils.RemoveFiles(spiderPath)
|
||||
}
|
||||
if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil {
|
||||
log.Errorf("copy error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 构造配置数据
|
||||
configData := entity.ConfigSpiderData{}
|
||||
|
||||
// 读取YAML文件
|
||||
yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile"))
|
||||
if err != nil {
|
||||
log.Errorf("read yaml error: " + err.Error())
|
||||
//debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
|
||||
log.Errorf("unmarshal error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 添加该爬虫到数据库
|
||||
spider = model.Spider{
|
||||
Id: bson.NewObjectId(),
|
||||
Name: configData.Name,
|
||||
DisplayName: configData.DisplayName,
|
||||
Type: constants.Customized,
|
||||
Col: configData.Col,
|
||||
Cmd: configData.Cmd,
|
||||
Src: spiderPath,
|
||||
Remark: configData.Remark,
|
||||
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
}
|
||||
if err := spider.Add(); err != nil {
|
||||
log.Errorf("add spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 上传爬虫到GridFS
|
||||
if err := UploadSpiderToGridFsFromMaster(spider); err != nil {
|
||||
log.Errorf("upload spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user