mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
Merge remote-tracking branch 'upstream/develop' into upstream-develop
This commit is contained in:
@@ -15,7 +15,7 @@ redis:
|
||||
log:
|
||||
level: info
|
||||
path: "/var/logs/crawlab"
|
||||
isDeletePeriodically: "Y"
|
||||
isDeletePeriodically: "N"
|
||||
deleteFrequency: "@hourly"
|
||||
server:
|
||||
host: 0.0.0.0
|
||||
|
||||
@@ -3,15 +3,15 @@ package entity
|
||||
import "strconv"
|
||||
|
||||
type Page struct {
|
||||
Skip int
|
||||
Limit int
|
||||
PageNum int
|
||||
Skip int
|
||||
Limit int
|
||||
PageNum int
|
||||
PageSize int
|
||||
}
|
||||
|
||||
func (p *Page)GetPage(pageNum string, pageSize string) {
|
||||
func (p *Page) GetPage(pageNum string, pageSize string) {
|
||||
p.PageNum, _ = strconv.Atoi(pageNum)
|
||||
p.PageSize, _ = strconv.Atoi(pageSize)
|
||||
p.Skip = p.PageSize * (p.PageNum - 1)
|
||||
p.Limit = p.PageSize
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,30 @@
|
||||
package entity
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages map[string]Stage `yaml:"stages" json:"stages"`
|
||||
Settings map[string]string `yaml:"settings" json:"settings"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
IsList bool `yaml:"is_list" json:"is_list"`
|
||||
ListCss string `yaml:"list_css" json:"list_css"`
|
||||
ListXpath string `yaml:"list_xpath" json:"list_xpath"`
|
||||
PageCss string `yaml:"page_css" json:"page_css"`
|
||||
PageXpath string `yaml:"page_xpath" json:"page_xpath"`
|
||||
PageAttr string `yaml:"page_attr" json:"page_attr"`
|
||||
Fields []Field `yaml:"fields" json:"fields"`
|
||||
}
|
||||
|
||||
type Field struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Attr string `yaml:"attr" json:"attr"`
|
||||
NextStage string `yaml:"next_stage" json:"next_stage"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
IsList bool `yaml:"is_list" json:"is_list"`
|
||||
ListCss string `yaml:"list_css" json:"list_css"`
|
||||
PageCss string `yaml:"page_css" json:"page_css"`
|
||||
PageAttr string `yaml:"page_attr" json:"page_attr"`
|
||||
Fields []Field `yaml:"fields" json:"fields"`
|
||||
}
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages map[string]Stage `yaml:"stages" json:"stages"`
|
||||
Remark string `yaml:"remark" json:"remark"`
|
||||
}
|
||||
|
||||
@@ -47,6 +47,8 @@ func main() {
|
||||
panic(err)
|
||||
}
|
||||
log.Info("初始化定期清理日志配置成功")
|
||||
}else {
|
||||
log.Info("默认未开启定期清理日志配置")
|
||||
}
|
||||
|
||||
// 初始化Mongodb数据库
|
||||
@@ -140,9 +142,13 @@ func main() {
|
||||
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
|
||||
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
|
||||
// 可配置爬虫
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
|
||||
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
|
||||
authGroup.GET("/config_spiders_templates", routes.GetConfigSpiderTemplateList) // 获取可配置爬虫模版列表
|
||||
// 任务
|
||||
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
|
||||
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
|
||||
|
||||
@@ -42,12 +42,12 @@ func init() {
|
||||
app.DELETE("/tasks/:id", DeleteTask) // 删除任务
|
||||
app.GET("/tasks/:id/results", GetTaskResults) // 任务结果
|
||||
app.GET("/tasks/:id/results/download", DownloadTaskResultsCsv) // 下载任务结果
|
||||
app.GET("/spiders", GetSpiderList) // 爬虫列表
|
||||
app.GET("/spiders/:id", GetSpider) // 爬虫详情
|
||||
app.POST("/spiders/:id", PostSpider) // 修改爬虫
|
||||
app.DELETE("/spiders/:id",DeleteSpider) // 删除爬虫
|
||||
app.GET("/spiders/:id/tasks",GetSpiderTasks) // 爬虫任务列表
|
||||
app.GET("/spiders/:id/dir",GetSpiderDir) // 爬虫目录
|
||||
app.GET("/spiders", GetSpiderList) // 爬虫列表
|
||||
app.GET("/spiders/:id", GetSpider) // 爬虫详情
|
||||
app.POST("/spiders/:id", PostSpider) // 修改爬虫
|
||||
app.DELETE("/spiders/:id", DeleteSpider) // 删除爬虫
|
||||
app.GET("/spiders/:id/tasks", GetSpiderTasks) // 爬虫任务列表
|
||||
app.GET("/spiders/:id/dir", GetSpiderDir) // 爬虫目录
|
||||
}
|
||||
|
||||
//mock test, test data in ./mock
|
||||
|
||||
@@ -6,8 +6,6 @@ import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
|
||||
|
||||
var taskDailyItems = []model.TaskDailyItem{
|
||||
{
|
||||
Date: "2019/08/19",
|
||||
|
||||
@@ -1 +1 @@
|
||||
package mock
|
||||
package mock
|
||||
|
||||
@@ -1 +1 @@
|
||||
package mock
|
||||
package mock
|
||||
|
||||
@@ -131,12 +131,7 @@ func (g ScrapyGenerator) GetNonListParserString(stageName string, stage entity.S
|
||||
|
||||
// 遍历字段列表
|
||||
for _, f := range stage.Fields {
|
||||
line := ""
|
||||
if f.Attr == "" {
|
||||
line += fmt.Sprintf(`item['%s'] = response.css('%s::text').extract_first()`, f.Name, f.Css)
|
||||
} else {
|
||||
line += fmt.Sprintf(`item['%s'] = response.css('%s::attr("%s")').extract_first()`, f.Name, f.Css, f.Attr)
|
||||
}
|
||||
line := fmt.Sprintf(`item['%s'] = response.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
|
||||
line = g.PadCode(line, 2)
|
||||
str += line
|
||||
}
|
||||
@@ -163,19 +158,14 @@ func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stag
|
||||
str += g.PadCode(`prev_item = response.meta.get('item')`, 2)
|
||||
|
||||
// for 循环遍历列表
|
||||
str += g.PadCode(fmt.Sprintf(`for elem in response.css('%s'):`, stage.ListCss), 2)
|
||||
str += g.PadCode(fmt.Sprintf(`for elem in response.%s:`, g.GetListString(stage)), 2)
|
||||
|
||||
// 构造item
|
||||
str += g.PadCode(`item = Item()`, 3)
|
||||
|
||||
// 遍历字段列表
|
||||
for _, f := range stage.Fields {
|
||||
line := ""
|
||||
if f.Attr == "" {
|
||||
line += fmt.Sprintf(`item['%s'] = elem.css('%s::text').extract_first()`, f.Name, f.Css)
|
||||
} else {
|
||||
line += fmt.Sprintf(`item['%s'] = elem.css('%s::attr("%s")').extract_first()`, f.Name, f.Css, f.Attr)
|
||||
}
|
||||
line := fmt.Sprintf(`item['%s'] = elem.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
|
||||
line = g.PadCode(line, 3)
|
||||
str += line
|
||||
}
|
||||
@@ -195,15 +185,9 @@ func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stag
|
||||
}
|
||||
|
||||
// 分页
|
||||
if stage.PageCss != "" {
|
||||
// 分页元素属性,默认为 href
|
||||
pageAttr := "href"
|
||||
if stage.PageAttr != "" {
|
||||
pageAttr = stage.PageAttr
|
||||
}
|
||||
|
||||
str += g.PadCode(fmt.Sprintf(`next_url = response.css('%s::attr("%s")').extract_first()`, stage.PageCss, pageAttr), 2)
|
||||
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': item})`, stageName), 2)
|
||||
if stage.PageCss != "" || stage.PageXpath != "" {
|
||||
str += g.PadCode(fmt.Sprintf(`next_url = response.%s.extract_first()`, g.GetExtractStringFromStage(stage)), 2)
|
||||
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': prev_item})`, stageName), 2)
|
||||
}
|
||||
|
||||
// 加入末尾换行
|
||||
@@ -226,3 +210,49 @@ func (g ScrapyGenerator) GetNextStageField(stage entity.Stage) (entity.Field, er
|
||||
}
|
||||
return entity.Field{}, errors.New("cannot find next stage field")
|
||||
}
|
||||
|
||||
func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
|
||||
if f.Css != "" {
|
||||
// 如果为CSS
|
||||
if f.Attr == "" {
|
||||
// 文本
|
||||
return fmt.Sprintf(`css('%s::text')`, f.Css)
|
||||
} else {
|
||||
// 属性
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
|
||||
}
|
||||
} else {
|
||||
// 如果为XPath
|
||||
if f.Attr == "" {
|
||||
// 文本
|
||||
return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath)
|
||||
} else {
|
||||
// 属性
|
||||
return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
|
||||
// 分页元素属性,默认为 href
|
||||
pageAttr := "href"
|
||||
if stage.PageAttr != "" {
|
||||
pageAttr = stage.PageAttr
|
||||
}
|
||||
|
||||
if stage.PageCss != "" {
|
||||
// 如果为CSS
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
|
||||
} else {
|
||||
// 如果为XPath
|
||||
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
|
||||
}
|
||||
}
|
||||
|
||||
func (g ScrapyGenerator) GetListString(stage entity.Stage) string {
|
||||
if stage.ListCss != "" {
|
||||
return fmt.Sprintf(`css('%s')`, stage.ListCss)
|
||||
} else {
|
||||
return fmt.Sprintf(`xpath('%s')`, stage.ListXpath)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"crawlab/entity"
|
||||
"crawlab/utils"
|
||||
"errors"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
@@ -25,14 +31,18 @@ type Spider struct {
|
||||
Site string `json:"site" bson:"site"` // 爬虫网站
|
||||
Envs []Env `json:"envs" bson:"envs"` // 环境变量
|
||||
Remark string `json:"remark" bson:"remark"` // 备注
|
||||
Src string `json:"src" bson:"src"` // 源码位置
|
||||
|
||||
// 自定义爬虫
|
||||
Src string `json:"src" bson:"src"` // 源码位置
|
||||
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
|
||||
|
||||
// 可配置爬虫
|
||||
Template string `json:"template" bson:"template"` // Spiderfile模版
|
||||
|
||||
// 前端展示
|
||||
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
|
||||
LastStatus string `json:"last_status"` // 最后执行状态
|
||||
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
|
||||
LastStatus string `json:"last_status"` // 最后执行状态
|
||||
Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置
|
||||
|
||||
// 时间
|
||||
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
|
||||
@@ -108,6 +118,10 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro
|
||||
return spiders, 0, err
|
||||
}
|
||||
|
||||
if spiders == nil {
|
||||
spiders = []Spider{}
|
||||
}
|
||||
|
||||
// 遍历爬虫列表
|
||||
for i, spider := range spiders {
|
||||
// 获取最后一次任务
|
||||
@@ -161,15 +175,25 @@ func GetSpider(id bson.ObjectId) (Spider, error) {
|
||||
s, c := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
|
||||
var result Spider
|
||||
if err := c.FindId(id).One(&result); err != nil {
|
||||
// 获取爬虫
|
||||
var spider Spider
|
||||
if err := c.FindId(id).One(&spider); err != nil {
|
||||
if err != mgo.ErrNotFound {
|
||||
log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex())
|
||||
debug.PrintStack()
|
||||
}
|
||||
return result, err
|
||||
return spider, err
|
||||
}
|
||||
return result, nil
|
||||
|
||||
// 如果为可配置爬虫,获取爬虫配置
|
||||
if spider.Type == constants.Configurable && utils.Exists(filepath.Join(spider.Src, "Spiderfile")) {
|
||||
config, err := GetConfigSpiderData(spider)
|
||||
if err != nil {
|
||||
return spider, err
|
||||
}
|
||||
spider.Config = config
|
||||
}
|
||||
return spider, nil
|
||||
}
|
||||
|
||||
// 更新爬虫
|
||||
@@ -209,10 +233,12 @@ func RemoveSpider(id bson.ObjectId) error {
|
||||
s, gf := database.GetGridFs("files")
|
||||
defer s.Close()
|
||||
|
||||
if err := gf.RemoveId(result.FileId); err != nil {
|
||||
log.Error("remove file error, id:" + result.FileId.Hex())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
if result.FileId.Hex() != constants.ObjectIdNull {
|
||||
if err := gf.RemoveId(result.FileId); err != nil {
|
||||
log.Error("remove file error, id:" + result.FileId.Hex())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -269,3 +295,35 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) {
|
||||
|
||||
return types, nil
|
||||
}
|
||||
|
||||
func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) {
|
||||
// 构造配置数据
|
||||
configData := entity.ConfigSpiderData{}
|
||||
|
||||
// 校验爬虫类别
|
||||
if spider.Type != constants.Configurable {
|
||||
return configData, errors.New("not a configurable spider")
|
||||
}
|
||||
|
||||
// Spiderfile 目录
|
||||
sfPath := filepath.Join(spider.Src, "Spiderfile")
|
||||
|
||||
// 读取YAML文件
|
||||
yamlFile, err := ioutil.ReadFile(sfPath)
|
||||
if err != nil {
|
||||
return configData, err
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
|
||||
return configData, err
|
||||
}
|
||||
|
||||
// 赋值 stage_name
|
||||
for stageName, stage := range configData.Stages {
|
||||
stage.Name = stageName
|
||||
configData.Stages[stageName] = stage
|
||||
}
|
||||
|
||||
return configData, nil
|
||||
}
|
||||
|
||||
@@ -2,16 +2,13 @@ package routes
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"crawlab/entity"
|
||||
"crawlab/model"
|
||||
"crawlab/services"
|
||||
"crawlab/utils"
|
||||
"fmt"
|
||||
"github.com/apex/log"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io"
|
||||
@@ -19,7 +16,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// 添加可配置爬虫
|
||||
@@ -36,6 +33,12 @@ func PutConfigSpider(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 模版名不能为空
|
||||
if spider.Template == "" {
|
||||
HandleErrorF(http.StatusBadRequest, c, "spider template should not be empty")
|
||||
return
|
||||
}
|
||||
|
||||
// 判断爬虫是否存在
|
||||
if spider := model.GetSpiderByName(spider.Name); spider != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name))
|
||||
@@ -62,6 +65,23 @@ func PutConfigSpider(c *gin.Context) {
|
||||
}
|
||||
spider.Src = spiderDir
|
||||
|
||||
// 复制Spiderfile模版
|
||||
contentByte, err := ioutil.ReadFile("./template/spiderfile/Spiderfile." + spider.Template)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
f, err := os.Create(filepath.Join(spider.Src, "Spiderfile"))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err := f.Write(contentByte); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 添加爬虫到数据库
|
||||
if err := spider.Add(); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
@@ -100,8 +120,8 @@ func UploadConfigSpider(c *gin.Context) {
|
||||
|
||||
// 文件名称必须为Spiderfile
|
||||
filename := header.Filename
|
||||
if filename != "Spiderfile" {
|
||||
HandleErrorF(http.StatusBadRequest, c, "filename must be 'Spiderfile'")
|
||||
if filename != "Spiderfile" && filename != "Spiderfile.yaml" && filename != "Spiderfile.yml" {
|
||||
HandleErrorF(http.StatusBadRequest, c, "filename must be 'Spiderfile(.yaml|.yml)'")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -151,88 +171,146 @@ func UploadConfigSpider(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 删除已有的爬虫文件
|
||||
for _, fInfo := range utils.ListDir(spiderDir) {
|
||||
// 不删除Spiderfile
|
||||
if fInfo.Name() == filename {
|
||||
continue
|
||||
}
|
||||
|
||||
// 删除其他文件
|
||||
if err := os.RemoveAll(filepath.Join(spiderDir, fInfo.Name())); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 拷贝爬虫文件
|
||||
tplDir := "./template/scrapy"
|
||||
for _, fInfo := range utils.ListDir(tplDir) {
|
||||
// 跳过Spiderfile
|
||||
if fInfo.Name() == "Spiderfile" {
|
||||
continue
|
||||
}
|
||||
|
||||
srcPath := filepath.Join(tplDir, fInfo.Name())
|
||||
if fInfo.IsDir() {
|
||||
dirPath := filepath.Join(spiderDir, fInfo.Name())
|
||||
if err := utils.CopyDir(srcPath, dirPath); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err := utils.CopyFile(srcPath, filepath.Join(spiderDir, fInfo.Name())); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 更改爬虫文件
|
||||
if err := services.GenerateConfigSpiderFiles(spider, configData); err != nil {
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 打包为 zip 文件
|
||||
files, err := utils.GetFilesFromDir(spiderDir)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
randomId := uuid.NewV4()
|
||||
tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip")
|
||||
spiderZipFileName := spider.Name + ".zip"
|
||||
if err := utils.Compress(files, tmpFilePath); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 获取 GridFS 实例
|
||||
s, gf := database.GetGridFs("files")
|
||||
defer s.Close()
|
||||
|
||||
// 判断文件是否已经存在
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
fid, err := services.UploadToGridFs(spiderZipFileName, tmpFilePath)
|
||||
if err != nil {
|
||||
log.Errorf("upload to grid fs error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
// 保存爬虫 FileId
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderSpiderfile(c *gin.Context) {
|
||||
type Body struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
id := c.Param("id")
|
||||
|
||||
// 文件内容
|
||||
var reqBody Body
|
||||
if err := c.ShouldBindJSON(&reqBody); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
content := reqBody.Content
|
||||
|
||||
// 获取爬虫
|
||||
var spider model.Spider
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 校验configData
|
||||
if err := services.ValidateSpiderfile(configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 写文件
|
||||
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderConfig(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
// 获取爬虫
|
||||
var spider model.Spider
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化配置数据
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := c.ShouldBindJSON(&configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 校验configData
|
||||
if err := services.ValidateSpiderfile(configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 替换Spiderfile文件
|
||||
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func GetConfigSpiderConfig(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
// 校验ID
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||||
}
|
||||
|
||||
// 获取爬虫
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: spider.Config,
|
||||
})
|
||||
}
|
||||
|
||||
// 获取模版名称列表
|
||||
func GetConfigSpiderTemplateList(c *gin.Context) {
|
||||
var data []string
|
||||
for _, fInfo := range utils.ListDir("./template/spiderfile") {
|
||||
templateName := strings.Replace(fInfo.Name(), "Spiderfile.", "", -1)
|
||||
data = append(data, templateName)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: data,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ func GetSpiderList(c *gin.Context) {
|
||||
"name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}},
|
||||
}
|
||||
|
||||
if t != "" {
|
||||
if t != "" && t != "all" {
|
||||
filter["type"] = t
|
||||
}
|
||||
|
||||
|
||||
@@ -2,11 +2,20 @@ package services
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"crawlab/entity"
|
||||
"crawlab/model"
|
||||
"crawlab/model/config_spider"
|
||||
"crawlab/utils"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -37,12 +46,17 @@ func ValidateSpiderfile(configData entity.ConfigSpiderData) error {
|
||||
|
||||
// 校验是否存在 start_url
|
||||
if configData.StartUrl == "" {
|
||||
return errors.New("spiderfile start_url is empty")
|
||||
return errors.New("spiderfile invalid: start_url is empty")
|
||||
}
|
||||
|
||||
// 校验是否存在 start_stage
|
||||
if configData.StartStage == "" {
|
||||
return errors.New("spiderfile invalid: start_stage is empty")
|
||||
}
|
||||
|
||||
// 校验是否存在 stages
|
||||
if len(configData.Stages) == 0 {
|
||||
return errors.New("spiderfile stages is empty")
|
||||
return errors.New("spiderfile invalid: stages is empty")
|
||||
}
|
||||
|
||||
// 校验stages
|
||||
@@ -50,56 +64,74 @@ func ValidateSpiderfile(configData entity.ConfigSpiderData) error {
|
||||
for stageName, stage := range configData.Stages {
|
||||
// stage 名称不能为空
|
||||
if stageName == "" {
|
||||
return errors.New("spiderfile stage name is empty")
|
||||
return errors.New("spiderfile invalid: stage name is empty")
|
||||
}
|
||||
|
||||
// stage 名称不能为保留字符串
|
||||
// NOTE: 如果有其他Engine,可以扩展,默认为Scrapy
|
||||
if configData.Engine == "" || configData.Engine == constants.EngineScrapy {
|
||||
if strings.Contains(constants.ScrapyProtectedStageNames, stageName) {
|
||||
return errors.New(fmt.Sprintf("spiderfile stage name '%s' is protected", stageName))
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is protected", stageName))
|
||||
}
|
||||
} else if configData.Engine == constants.EngineColly {
|
||||
return errors.New(fmt.Sprintf("engine '%s' is not implemented", stageName))
|
||||
} else {
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: engine '%s' is not implemented", configData.Engine))
|
||||
}
|
||||
|
||||
// stage 名称不能重复
|
||||
if dict[stageName] == 1 {
|
||||
return errors.New("spiderfile stage name should be unique")
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is duplicated", stageName))
|
||||
}
|
||||
dict[stageName] = 1
|
||||
|
||||
// stage 字段不能为空
|
||||
if len(stage.Fields) == 0 {
|
||||
return errors.New(fmt.Sprintf("spiderfile stage '%s' has no fields", stageName))
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has no fields", stageName))
|
||||
}
|
||||
|
||||
// stage 的下一个 stage 只能有一个
|
||||
// 是否包含 next_stage
|
||||
hasNextStage := false
|
||||
|
||||
// 遍历字段列表
|
||||
for _, field := range stage.Fields {
|
||||
// stage 的 next stage 只能有一个
|
||||
if field.NextStage != "" {
|
||||
if hasNextStage {
|
||||
return errors.New("spiderfile stage fields should have only 1 next_stage")
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has more than 1 next_stage", stageName))
|
||||
}
|
||||
hasNextStage = true
|
||||
}
|
||||
|
||||
// 字段里 css 和 xpath 只能包含一个
|
||||
if field.Css != "" && field.Xpath != "" {
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: field '%s' in stage '%s' has both css and xpath set which is prohibited", field.Name, stageName))
|
||||
}
|
||||
}
|
||||
|
||||
// stage 里 page_css 和 page_xpath 只能包含一个
|
||||
if stage.PageCss != "" && stage.PageXpath != "" {
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both page_css and page_xpath set which is prohibited", stageName))
|
||||
}
|
||||
|
||||
// stage 里 list_css 和 list_xpath 只能包含一个
|
||||
if stage.ListCss != "" && stage.ListXpath != "" {
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both list_css and list_xpath set which is prohibited", stageName))
|
||||
}
|
||||
|
||||
// 如果 stage 的 is_list 为 true 但 list_css 为空,报错
|
||||
if stage.IsList && stage.ListCss == "" {
|
||||
return errors.New("spiderfile stage with is_list = true should have list_css being set")
|
||||
if stage.IsList && (stage.ListCss == "" && stage.ListXpath == "") {
|
||||
return errors.New("spiderfile invalid: stage with is_list = true should have either list_css or list_xpath being set")
|
||||
}
|
||||
}
|
||||
|
||||
// 校验字段唯一性
|
||||
if !IsUniqueConfigSpiderFields(fields) {
|
||||
return errors.New("spiderfile fields not unique")
|
||||
return errors.New("spiderfile invalid: fields not unique")
|
||||
}
|
||||
|
||||
// 字段名称不能为保留字符串
|
||||
for _, field := range fields {
|
||||
if strings.Contains(constants.ScrapyProtectedFieldNames, field.Name) {
|
||||
return errors.New(fmt.Sprintf("spiderfile field name '%s' is protected", field.Name))
|
||||
return errors.New(fmt.Sprintf("spiderfile invalid: field name '%s' is protected", field.Name))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,3 +148,118 @@ func IsUniqueConfigSpiderFields(fields []entity.Field) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
|
||||
spiderDir := spider.Src
|
||||
|
||||
// 赋值 stage_name
|
||||
for stageName, stage := range configData.Stages {
|
||||
stage.Name = stageName
|
||||
configData.Stages[stageName] = stage
|
||||
}
|
||||
|
||||
// 删除已有的爬虫文件
|
||||
for _, fInfo := range utils.ListDir(spiderDir) {
|
||||
// 不删除Spiderfile
|
||||
if fInfo.Name() == "Spiderfile" {
|
||||
continue
|
||||
}
|
||||
|
||||
// 删除其他文件
|
||||
if err := os.RemoveAll(filepath.Join(spiderDir, fInfo.Name())); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// 拷贝爬虫文件
|
||||
tplDir := "./template/scrapy"
|
||||
for _, fInfo := range utils.ListDir(tplDir) {
|
||||
// 跳过Spiderfile
|
||||
if fInfo.Name() == "Spiderfile" {
|
||||
continue
|
||||
}
|
||||
|
||||
srcPath := filepath.Join(tplDir, fInfo.Name())
|
||||
if fInfo.IsDir() {
|
||||
dirPath := filepath.Join(spiderDir, fInfo.Name())
|
||||
if err := utils.CopyDir(srcPath, dirPath); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := utils.CopyFile(srcPath, filepath.Join(spiderDir, fInfo.Name())); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 更改爬虫文件
|
||||
if err := GenerateConfigSpiderFiles(spider, configData); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 打包为 zip 文件
|
||||
files, err := utils.GetFilesFromDir(spiderDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
randomId := uuid.NewV4()
|
||||
tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip")
|
||||
spiderZipFileName := spider.Name + ".zip"
|
||||
if err := utils.Compress(files, tmpFilePath); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 获取 GridFS 实例
|
||||
s, gf := database.GetGridFs("files")
|
||||
defer s.Close()
|
||||
|
||||
// 判断文件是否已经存在
|
||||
var gfFile model.GridFs
|
||||
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
|
||||
// 已经存在文件,则删除
|
||||
_ = gf.RemoveId(gfFile.Id)
|
||||
}
|
||||
|
||||
// 上传到GridFs
|
||||
fid, err := UploadToGridFs(spiderZipFileName, tmpFilePath)
|
||||
if err != nil {
|
||||
log.Errorf("upload to grid fs error: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// 保存爬虫 FileId
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
|
||||
// Spiderfile 路径
|
||||
sfPath := filepath.Join(spider.Src, "Spiderfile")
|
||||
|
||||
// 生成Yaml内容
|
||||
sfContentByte, err := yaml.Marshal(configData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 打开文件
|
||||
var f *os.File
|
||||
if utils.Exists(sfPath) {
|
||||
f, err = os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
|
||||
} else {
|
||||
f, err = os.OpenFile(sfPath, os.O_CREATE, 0777)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 写入内容
|
||||
if _, err := f.Write(sfContentByte); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -116,12 +116,15 @@ func PublishAllSpiders() {
|
||||
|
||||
// 发布爬虫
|
||||
func PublishSpider(spider model.Spider) {
|
||||
// 查询gf file,不存在则标记为爬虫文件不存在
|
||||
gfFile := model.GetGridFs(spider.FileId)
|
||||
if gfFile == nil {
|
||||
spider.FileId = constants.ObjectIdNull
|
||||
_ = spider.Save()
|
||||
return
|
||||
var gfFile *model.GridFs
|
||||
if spider.FileId.Hex() != constants.ObjectIdNull {
|
||||
// 查询gf file,不存在则标记为爬虫文件不存在
|
||||
gfFile = model.GetGridFs(spider.FileId)
|
||||
if gfFile == nil {
|
||||
spider.FileId = constants.ObjectIdNull
|
||||
_ = spider.Save()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// 如果FileId为空,表示还没有上传爬虫到GridFS,则跳过
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"github.com/spf13/viper"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
)
|
||||
@@ -99,7 +100,6 @@ func (s *SpiderSync) Download() {
|
||||
// 创建临时文件
|
||||
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
|
||||
tmpFile := utils.OpenFile(tmpFilePath)
|
||||
defer utils.Close(tmpFile)
|
||||
|
||||
// 将该文件写入临时文件
|
||||
if _, err := io.Copy(tmpFile, f); err != nil {
|
||||
@@ -119,6 +119,15 @@ func (s *SpiderSync) Download() {
|
||||
return
|
||||
}
|
||||
|
||||
//递归修改目标文件夹权限
|
||||
// 解决scrapy.setting中开启LOG_ENABLED 和 LOG_FILE时不能创建log文件的问题
|
||||
cmd := exec.Command("chmod", "-R", "777", dstPath)
|
||||
if err := cmd.Run(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
|
||||
// 关闭临时文件
|
||||
if err := tmpFile.Close(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
|
||||
@@ -226,12 +226,18 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
|
||||
// 环境变量配置
|
||||
envs := s.Envs
|
||||
if s.Type == constants.Configurable {
|
||||
// 数据库配置
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_HOST", Value: viper.GetString("mongo.host")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PORT", Value: viper.GetString("mongo.port")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_DB", Value: viper.GetString("mongo.db")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_USERNAME", Value: viper.GetString("mongo.username")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PASSWORD", Value: viper.GetString("mongo.password")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_AUTHSOURCE", Value: viper.GetString("mongo.authSource")})
|
||||
|
||||
// 设置配置
|
||||
for envName, envValue := range s.Config.Settings {
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue})
|
||||
}
|
||||
}
|
||||
cmd = SetEnv(cmd, envs, t.Id, s.Col)
|
||||
|
||||
@@ -311,9 +317,12 @@ func SaveTaskResultCount(id string) func() {
|
||||
|
||||
// 执行任务
|
||||
func ExecuteTask(id int) {
|
||||
if flag, _ := LockList.Load(id); flag.(bool) {
|
||||
log.Debugf(GetWorkerPrefix(id) + "正在执行任务...")
|
||||
return
|
||||
if flag, ok := LockList.Load(id); ok {
|
||||
if flag.(bool) {
|
||||
log.Debugf(GetWorkerPrefix(id) + "正在执行任务...")
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 上锁
|
||||
@@ -485,6 +494,29 @@ func GetTaskLog(id string) (logStr string, err error) {
|
||||
}
|
||||
|
||||
if IsMasterNode(task.NodeId.Hex()) {
|
||||
if !utils.Exists(task.LogPath) {
|
||||
fileDir, err := MakeLogDir(task)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
}
|
||||
|
||||
fileP := GetLogFilePaths(fileDir)
|
||||
|
||||
// 获取日志文件路径
|
||||
fLog, err := os.Create(fileP)
|
||||
defer fLog.Close()
|
||||
if err != nil {
|
||||
log.Errorf("create task log file error: %s", fileP)
|
||||
debug.PrintStack()
|
||||
}
|
||||
task.LogPath = fileP
|
||||
if err := task.Save(); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
}
|
||||
|
||||
}
|
||||
// 若为主节点,获取本机日志
|
||||
logBytes, err := model.GetLocalLog(task.LogPath)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
|
||||
# Scrapy settings for config_spider project
|
||||
#
|
||||
@@ -9,14 +12,14 @@
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'config_spider'
|
||||
BOT_NAME = 'Crawlab Configurable Spider'
|
||||
|
||||
SPIDER_MODULES = ['config_spider.spiders']
|
||||
NEWSPIDER_MODULE = 'config_spider.spiders'
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'config_spider (+http://www.yourdomain.com)'
|
||||
USER_AGENT = 'Crawlab Spider'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
@@ -88,3 +91,21 @@ ITEM_PIPELINES = {
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
||||
|
||||
for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
|
||||
setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
|
||||
setting_value = os.environ.get(setting_env_name)
|
||||
if setting_value.lower() == 'true':
|
||||
setting_value = True
|
||||
elif setting_value.lower() == 'false':
|
||||
setting_value = False
|
||||
elif re.search(r'^\d+$', setting_value) is not None:
|
||||
setting_value = int(setting_value)
|
||||
elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
|
||||
setting_value = json.loads(setting_value)
|
||||
elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
|
||||
setting_value = json.loads(setting_value)
|
||||
else:
|
||||
pass
|
||||
locals()[setting_name] = setting_value
|
||||
|
||||
|
||||
20
backend/template/spiderfile/Spiderfile.163_news
Normal file
20
backend/template/spiderfile/Spiderfile.163_news
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "0.4.0"
|
||||
name: "toscrapy_books"
|
||||
start_url: "http://news.163.com/special/0001386F/rank_news.html"
|
||||
start_stage: "list"
|
||||
engine: "scrapy"
|
||||
stages:
|
||||
list:
|
||||
is_list: true
|
||||
list_css: "table tr:not(:first-child)"
|
||||
fields:
|
||||
- name: "title"
|
||||
css: "td:nth-child(1) > a"
|
||||
- name: "url"
|
||||
css: "td:nth-child(1) > a"
|
||||
attr: "href"
|
||||
- name: "clicks"
|
||||
css: "td.cBlue"
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: false
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
22
backend/template/spiderfile/Spiderfile.baidu
Normal file
22
backend/template/spiderfile/Spiderfile.baidu
Normal file
@@ -0,0 +1,22 @@
|
||||
version: 0.4.0
|
||||
name: toscrapy_books
|
||||
start_url: http://www.baidu.com/s?wd=crawlab
|
||||
start_stage: list
|
||||
engine: scrapy
|
||||
stages:
|
||||
list:
|
||||
is_list: true
|
||||
list_xpath: //*[contains(@class, "c-container")]
|
||||
page_xpath: //*[@id="page"]//a[@class="n"][last()]
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
xpath: .//h3/a
|
||||
- name: url
|
||||
xpath: .//h3/a
|
||||
attr: href
|
||||
- name: abstract
|
||||
xpath: .//*[@class="c-abstract"]
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: false
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
@@ -5,10 +5,10 @@ start_stage: "list"
|
||||
engine: "scrapy"
|
||||
stages:
|
||||
list:
|
||||
is_list: true # default: false
|
||||
is_list: true
|
||||
list_css: "section article.product_pod"
|
||||
page_css: "ul.pager li.next a"
|
||||
page_attr: "href" # default: href
|
||||
page_attr: "href"
|
||||
fields:
|
||||
- name: "title"
|
||||
css: "h3 > a"
|
||||
@@ -23,3 +23,6 @@ stages:
|
||||
fields:
|
||||
- name: "description"
|
||||
css: "#product_description + p"
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: true
|
||||
AUTOTHROTTLE_ENABLED: true
|
||||
@@ -167,7 +167,6 @@ func DeCompress(srcFile *os.File, dstPath string) error {
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
defer Close(newFile)
|
||||
|
||||
// 拷贝该文件到新文件中
|
||||
if _, err := io.Copy(newFile, srcFile); err != nil {
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"cross-env": "^5.2.0",
|
||||
"dayjs": "^1.8.6",
|
||||
"echarts": "^4.1.0",
|
||||
"element-ui": "2.4.6",
|
||||
"element-ui": "2.13.0",
|
||||
"font-awesome": "^4.7.0",
|
||||
"js-cookie": "2.2.0",
|
||||
"normalize.css": "7.0.0",
|
||||
|
||||
@@ -2,13 +2,21 @@
|
||||
<el-dialog
|
||||
:title="$t('Notification')"
|
||||
:visible="visible"
|
||||
class="crawl-confirm-dialog"
|
||||
width="480px"
|
||||
:before-close="beforeClose"
|
||||
>
|
||||
<div style="margin-bottom: 20px;">{{$t('Are you sure to run this spider?')}}</div>
|
||||
<el-form label-width="80px">
|
||||
<el-form-item :label="$t('Node')">
|
||||
<el-select v-model="nodeId">
|
||||
<el-form label-width="80px" :model="form" ref="form">
|
||||
<el-form-item :label="$t('Run Type')" prop="runType" required inline-message>
|
||||
<el-select v-model="form.runType" :placeholder="$t('Run Type')">
|
||||
<el-option value="all-nodes" :label="$t('All Nodes')"/>
|
||||
<el-option value="selected-nodes" :label="$t('Selected Nodes')"/>
|
||||
<el-option value="random" :label="$t('Random')"/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="form.runType === 'selected-nodes'" prop="nodeIds" :label="$t('Node')" required inline-message>
|
||||
<el-select v-model="form.nodeIds" :placeholder="$t('Node')" multiple clearable>
|
||||
<el-option
|
||||
v-for="op in nodeList"
|
||||
:key="op._id"
|
||||
@@ -18,8 +26,8 @@
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Parameters')">
|
||||
<el-input v-model="param" :placeholder="$t('Parameters')"></el-input>
|
||||
<el-form-item :label="$t('Parameters')" prop="param" inline-message>
|
||||
<el-input v-model="form.param" :placeholder="$t('Parameters')"></el-input>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<template slot="footer">
|
||||
@@ -31,6 +39,7 @@
|
||||
|
||||
<script>
|
||||
import request from '../../api/request'
|
||||
|
||||
export default {
|
||||
name: 'CrawlConfirmDialog',
|
||||
props: {
|
||||
@@ -45,9 +54,12 @@ export default {
|
||||
},
|
||||
data () {
|
||||
return {
|
||||
nodeId: '',
|
||||
param: '',
|
||||
nodeList: []
|
||||
form: {
|
||||
runType: 'random',
|
||||
nodeIds: undefined,
|
||||
param: '',
|
||||
nodeList: []
|
||||
}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
@@ -55,12 +67,21 @@ export default {
|
||||
this.$emit('close')
|
||||
},
|
||||
onConfirm () {
|
||||
this.$store.dispatch('spider/crawlSpider', { id: this.spiderId, nodeId: this.nodeId, param: this.param })
|
||||
.then(() => {
|
||||
this.$message.success(this.$t('A task has been scheduled successfully'))
|
||||
this.$refs['form'].validate(res => {
|
||||
if (!res) return
|
||||
|
||||
this.$store.dispatch('spider/crawlSpider', {
|
||||
spiderId: this.spiderId,
|
||||
nodeIds: this.form.nodeIds,
|
||||
param: this.form.param,
|
||||
runType: this.form.runType
|
||||
})
|
||||
this.$emit('close')
|
||||
this.$st.sendEv('爬虫', '运行')
|
||||
.then(() => {
|
||||
this.$message.success(this.$t('A task has been scheduled successfully'))
|
||||
})
|
||||
this.$emit('close')
|
||||
this.$st.sendEv('爬虫', '运行')
|
||||
})
|
||||
}
|
||||
},
|
||||
created () {
|
||||
@@ -81,5 +102,7 @@ export default {
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
|
||||
.crawl-confirm-dialog >>> .el-form .el-form-item {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
</style>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js'
|
||||
import 'codemirror/mode/shell/shell.js'
|
||||
import 'codemirror/mode/markdown/markdown.js'
|
||||
import 'codemirror/mode/php/php.js'
|
||||
import 'codemirror/mode/yaml/yaml.js'
|
||||
|
||||
export default {
|
||||
name: 'FileDetail',
|
||||
@@ -38,7 +39,7 @@ export default {
|
||||
},
|
||||
options () {
|
||||
return {
|
||||
mode: this.lanaguage,
|
||||
mode: this.language,
|
||||
theme: 'darcula',
|
||||
styleActiveLine: true,
|
||||
lineNumbers: true,
|
||||
@@ -46,8 +47,9 @@ export default {
|
||||
matchBrackets: true
|
||||
}
|
||||
},
|
||||
lanaguage () {
|
||||
language () {
|
||||
const fileName = this.$store.state.file.currentPath
|
||||
if (!fileName) return ''
|
||||
if (fileName.match(/\.js$/)) {
|
||||
return 'text/javascript'
|
||||
} else if (fileName.match(/\.py$/)) {
|
||||
@@ -60,6 +62,8 @@ export default {
|
||||
return 'text/x-php'
|
||||
} else if (fileName.match(/\.md$/)) {
|
||||
return 'text/x-markdown'
|
||||
} else if (fileName === 'Spiderfile') {
|
||||
return 'text/x-yaml'
|
||||
} else {
|
||||
return 'text'
|
||||
}
|
||||
@@ -74,7 +78,7 @@ export default {
|
||||
<style scoped>
|
||||
.file-content {
|
||||
border: 1px solid #eaecef;
|
||||
height: 480px;
|
||||
height: calc(100vh - 256px);
|
||||
}
|
||||
|
||||
.file-content >>> .CodeMirror {
|
||||
|
||||
@@ -120,6 +120,8 @@ export default {
|
||||
this.showFile = false
|
||||
this.onBack()
|
||||
}
|
||||
},
|
||||
created () {
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -21,11 +21,11 @@
|
||||
<el-form-item :label="$t('Source Folder')">
|
||||
<el-input v-model="spiderForm.src" :placeholder="$t('Source Folder')" disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
|
||||
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
|
||||
<el-input v-model="spiderForm.cmd" :placeholder="$t('Execute Command')"
|
||||
:disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Results Collection')">
|
||||
<el-form-item :label="$t('Results Collection')" prop="col" required :inline-message="true">
|
||||
<el-input v-model="spiderForm.col" :placeholder="$t('Results Collection')"
|
||||
:disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
@@ -39,11 +39,10 @@
|
||||
</el-autocomplete>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Spider Type')">
|
||||
<!--<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>-->
|
||||
<!--<el-option value="configurable" :label="$t('Configurable')"></el-option>-->
|
||||
<!--<el-option value="customized" :label="$t('Customized')"></el-option>-->
|
||||
<!--</el-select>-->
|
||||
<el-input v-model="spiderForm.type" placeholder="爬虫类型" clearable/>
|
||||
<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>
|
||||
<el-option value="configurable" :label="$t('Configurable')"></el-option>
|
||||
<el-option value="customized" :label="$t('Customized')"></el-option>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Remark')">
|
||||
<el-input v-model="spiderForm.remark"/>
|
||||
@@ -103,7 +102,11 @@ export default {
|
||||
'spiderForm'
|
||||
]),
|
||||
isShowRun () {
|
||||
return !!this.spiderForm.cmd
|
||||
if (this.spiderForm.type === 'customized') {
|
||||
return !!this.spiderForm.cmd
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
|
||||
@@ -25,7 +25,6 @@ export default {
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
<style scoped>
|
||||
.log-item {
|
||||
display: table;
|
||||
|
||||
@@ -15,7 +15,7 @@ import LogItem from './LogItem'
|
||||
import VirtualList from 'vue-virtual-scroll-list'
|
||||
import Convert from 'ansi-to-html'
|
||||
import hasAnsi from 'has-ansi'
|
||||
const convert = new Convert();
|
||||
const convert = new Convert()
|
||||
export default {
|
||||
name: 'LogView',
|
||||
components: {
|
||||
@@ -53,7 +53,7 @@ export default {
|
||||
props: {
|
||||
index: logItem.index,
|
||||
data: isAnsi ? convert.toHtml(logItem.data) : logItem.data,
|
||||
isAnsi,
|
||||
isAnsi
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,73 +1,125 @@
|
||||
<template>
|
||||
<div class="fields-table-view">
|
||||
<el-row class="button-group-container">
|
||||
<label class="title">{{$t(this.title)}}</label>
|
||||
<div class="button-group">
|
||||
<el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>
|
||||
</div>
|
||||
</el-row>
|
||||
<!-- <el-row class="button-group-container">-->
|
||||
<!-- <label class="title">{{$t(this.title)}}</label>-->
|
||||
<!-- <div class="button-group">-->
|
||||
<!-- <el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>-->
|
||||
<!-- </div>-->
|
||||
<!-- </el-row>-->
|
||||
<el-row>
|
||||
<el-table :data="fields"
|
||||
class="table edit"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
border>
|
||||
<el-table-column v-if="type === 'list' && spiderForm.crawl_type === 'list-detail'"
|
||||
:label="$t('Detail Page URL')"
|
||||
align="center">
|
||||
:cell-style="getCellClassStyle"
|
||||
>
|
||||
<el-table-column class-name="action" width="80px" align="right">
|
||||
<template slot-scope="scope">
|
||||
<el-checkbox v-model="scope.row.is_detail"
|
||||
@change="onCheck(scope.row)">
|
||||
</el-checkbox>
|
||||
<i class="action-item el-icon-copy-document" @click="onCopyField(scope.row)"></i>
|
||||
<i class="action-item el-icon-remove-outline" @click="onRemoveField(scope.row)"></i>
|
||||
<i class="action-item el-icon-circle-plus-outline" @click="onAddField(scope.row)"></i>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Field Name')" width="200px">
|
||||
<el-table-column :label="$t('Field Name')" width="150px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.name" :placeholder="$t('Field Name')"
|
||||
@change="onNameChange(scope.row)"></el-input>
|
||||
<el-input v-model="scope.row.name"
|
||||
:placeholder="$t('Field Name')"
|
||||
suffix-icon="el-icon-edit"
|
||||
@change="onNameChange(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query Type')" width="200px">
|
||||
<el-table-column :label="$t('Selector Type')" width="150px" align="center" class-name="selector-type">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.type" :placeholder="$t('Query Type')">
|
||||
<el-option value="css" :label="$t('CSS Selector')"></el-option>
|
||||
<el-option value="xpath" :label="$t('XPath')"></el-option>
|
||||
</el-select>
|
||||
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'css')">
|
||||
<el-tag
|
||||
:class="scope.row.css ? 'active' : 'inactive'"
|
||||
type="success"
|
||||
>
|
||||
CSS
|
||||
</el-tag>
|
||||
</span>
|
||||
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'xpath')">
|
||||
<el-tag
|
||||
:class="scope.row.xpath ? 'active' : 'inactive'"
|
||||
type="primary"
|
||||
>
|
||||
XPath
|
||||
</el-tag>
|
||||
</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query')" width="250px">
|
||||
<el-table-column :label="$t('Selector')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.query" :placeholder="$t('Query')"></el-input>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Extract Type')" width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.extract_type" :placeholder="$t('Extract Type')">
|
||||
<el-option value="text" :label="$t('Text')"></el-option>
|
||||
<el-option value="attribute" :label="$t('Attribute')"></el-option>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Attribute')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="scope.row.extract_type === 'attribute'">
|
||||
<el-input v-model="scope.row.attribute"
|
||||
:placeholder="$t('Attribute')">
|
||||
<template v-if="scope.row.css">
|
||||
<el-input
|
||||
v-model="scope.row.css"
|
||||
:placeholder="$t('CSS / XPath')"
|
||||
suffix-icon="el-icon-edit"
|
||||
>
|
||||
</el-input>
|
||||
</template>
|
||||
<template v-else>
|
||||
<el-input
|
||||
v-model="scope.row.xpath"
|
||||
:placeholder="$t('CSS / XPath')"
|
||||
suffix-icon="el-icon-edit"
|
||||
>
|
||||
</el-input>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Action')" fixed="right" min-width="100px">
|
||||
<el-table-column :label="$t('Is Attribute')" width="150px" align="center">
|
||||
<template slot-scope="scope">
|
||||
<div class="action-button-group">
|
||||
<el-button size="mini"
|
||||
style="margin-left:10px"
|
||||
icon="el-icon-delete"
|
||||
type="danger"
|
||||
@click="deleteField(scope.$index)">
|
||||
</el-button>
|
||||
</div>
|
||||
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, false)">
|
||||
<el-tag
|
||||
:class="!isShowAttr(scope.row) ? 'active' : 'inactive'"
|
||||
type="success"
|
||||
>
|
||||
{{$t('Text')}}
|
||||
</el-tag>
|
||||
</span>
|
||||
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, true)">
|
||||
<el-tag
|
||||
:class="isShowAttr(scope.row) ? 'active' : 'inactive'"
|
||||
type="primary"
|
||||
>
|
||||
{{$t('Attribute')}}
|
||||
</el-tag>
|
||||
</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Attribute')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="isShowAttr(scope.row)">
|
||||
<el-input
|
||||
v-model="scope.row.attr"
|
||||
:placeholder="$t('Attribute')"
|
||||
suffix-icon="el-icon-edit"
|
||||
@change="onAttrChange(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
<template v-else>
|
||||
<span style="margin-left: 15px; color: lightgrey">
|
||||
N/A
|
||||
</span>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Next Stage')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<el-select
|
||||
v-model="scope.row.next_stage"
|
||||
:class="!scope.row.next_stage ? 'disabled' : ''"
|
||||
@change="onChangeNextStage(scope.row)"
|
||||
>
|
||||
<el-option :label="$t('No Next Stage')" value=""/>
|
||||
<el-option v-for="n in filteredStageNames" :key="n" :label="n" :value="n"/>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Remark')" width="auto" min-width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.remark" :placeholder="$t('Remark')" suffix-icon="el-icon-edit"/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
@@ -91,6 +143,18 @@ export default {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
stage: {
|
||||
type: Object,
|
||||
default () {
|
||||
return {}
|
||||
}
|
||||
},
|
||||
stageNames: {
|
||||
type: Array,
|
||||
default () {
|
||||
return []
|
||||
}
|
||||
},
|
||||
fields: {
|
||||
type: Array,
|
||||
default () {
|
||||
@@ -101,7 +165,10 @@ export default {
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
])
|
||||
]),
|
||||
filteredStageNames () {
|
||||
return this.stageNames.filter(n => n !== this.stage.name)
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
addField () {
|
||||
@@ -128,6 +195,89 @@ export default {
|
||||
}
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
|
||||
},
|
||||
onClickSelectorType (row, selectorType) {
|
||||
if (selectorType === 'css') {
|
||||
if (row.xpath) this.$set(row, 'xpath', '')
|
||||
if (!row.css) this.$set(row, 'css', 'body')
|
||||
} else {
|
||||
if (row.css) this.$set(row, 'css', '')
|
||||
if (!row.xpath) this.$set(row, 'xpath', '//body')
|
||||
}
|
||||
},
|
||||
onClickIsAttribute (row, isAttribute) {
|
||||
if (!isAttribute) {
|
||||
// 文本
|
||||
if (row.attr) this.$set(row, 'attr', '')
|
||||
} else {
|
||||
// 属性
|
||||
if (!row.attr) this.$set(row, 'attr', 'href')
|
||||
}
|
||||
this.$set(row, 'isAttrChange', false)
|
||||
},
|
||||
onCopyField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i, 0, JSON.parse(JSON.stringify(row)))
|
||||
break
|
||||
}
|
||||
}
|
||||
},
|
||||
onRemoveField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i, 1)
|
||||
break
|
||||
}
|
||||
}
|
||||
if (this.fields.length === 0) {
|
||||
this.fields.push({
|
||||
xpath: '//body',
|
||||
next_stage: ''
|
||||
})
|
||||
}
|
||||
},
|
||||
onAddField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i + 1, 0, {
|
||||
name: `field_${Math.floor(new Date().getTime()).toString()}`,
|
||||
xpath: '//body',
|
||||
next_stage: ''
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
},
|
||||
getCellClassStyle ({ row, columnIndex }) {
|
||||
if (columnIndex === 1) {
|
||||
// 字段名称
|
||||
if (!row.name) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
} else if (columnIndex === 3) {
|
||||
// 选择器
|
||||
if (!row.css && !row.xpath) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
onChangeNextStage (row) {
|
||||
this.fields.forEach(f => {
|
||||
if (f.name !== row.name) {
|
||||
this.$set(f, 'next_stage', '')
|
||||
}
|
||||
})
|
||||
},
|
||||
onAttrChange (row) {
|
||||
this.$set(row, 'isAttrChange', !row.attr)
|
||||
},
|
||||
isShowAttr (row) {
|
||||
return (row.attr || row.isAttrChange)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -158,6 +308,50 @@ export default {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .button-selector-item {
|
||||
cursor: pointer;
|
||||
margin: 0 5px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-tag.inactive {
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action {
|
||||
background: none !important;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr th {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td:nth-child(2) {
|
||||
border-left: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit::before {
|
||||
background: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item {
|
||||
font-size: 14px;
|
||||
margin-right: 5px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item:last-child {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.button-group-container {
|
||||
/*display: inline-block;*/
|
||||
/*width: 100%;*/
|
||||
@@ -180,4 +374,8 @@ export default {
|
||||
.action-button-group >>> .el-checkbox__label {
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select.disabled .el-input__inner {
|
||||
color: lightgrey;
|
||||
}
|
||||
</style>
|
||||
|
||||
283
frontend/src/components/TableView/SettingFieldsTableView.vue
Normal file
283
frontend/src/components/TableView/SettingFieldsTableView.vue
Normal file
@@ -0,0 +1,283 @@
|
||||
<template>
|
||||
<div class="setting-list-table-view">
|
||||
<el-row>
|
||||
<el-table :data="list"
|
||||
class="table edit"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
:cell-style="getCellClassStyle"
|
||||
>
|
||||
<el-table-column class-name="action" width="80px" align="right">
|
||||
<template slot-scope="scope">
|
||||
<!-- <i class="action-item el-icon-copy-document" @click="onCopyField(scope.row)"></i>-->
|
||||
<i class="action-item el-icon-remove-outline" @click="onRemoveField(scope.row)"></i>
|
||||
<i class="action-item el-icon-circle-plus-outline" @click="onAddField(scope.row)"></i>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Name')" width="240px">
|
||||
<template slot-scope="scope">
|
||||
<el-input
|
||||
v-model="scope.row.name"
|
||||
:placeholder="$t('Name')"
|
||||
suffix-icon="el-icon-edit"
|
||||
@change="onChange(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Value')" width="auto" min-width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-input
|
||||
v-model="scope.row.value"
|
||||
:placeholder="$t('Value')"
|
||||
suffix-icon="el-icon-edit"
|
||||
@change="onChange(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-row>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
|
||||
export default {
|
||||
name: 'SettingFieldsTableView',
|
||||
props: {
|
||||
type: {
|
||||
type: String,
|
||||
default: 'list'
|
||||
},
|
||||
title: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
stageNames: {
|
||||
type: Array,
|
||||
default () {
|
||||
return []
|
||||
}
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
]),
|
||||
list () {
|
||||
const list = []
|
||||
for (let name in this.spiderForm.config.settings) {
|
||||
if (this.spiderForm.config.settings.hasOwnProperty(name)) {
|
||||
const value = this.spiderForm.config.settings[name]
|
||||
list.push({ name, value })
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
addField () {
|
||||
this.list.push({
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '添加字段')
|
||||
},
|
||||
deleteField (index) {
|
||||
this.list.splice(index, 1)
|
||||
this.$st.sendEv('爬虫详情-配置', '删除字段')
|
||||
},
|
||||
onChange (row) {
|
||||
if (this.list.filter(d => d.name === row.name).length > 1) {
|
||||
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
|
||||
}
|
||||
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', this.list)
|
||||
this.$st.sendEv('爬虫详情-配置', '更改字段')
|
||||
},
|
||||
onCheck (row) {
|
||||
this.list.forEach(d => {
|
||||
if (row.name !== d.name) {
|
||||
this.$set(d, 'is_detail', false)
|
||||
}
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
|
||||
},
|
||||
onClickSelectorType (row, selectorType) {
|
||||
if (selectorType === 'css') {
|
||||
if (row.xpath) this.$set(row, 'xpath', '')
|
||||
if (!row.css) this.$set(row, 'css', 'body')
|
||||
} else {
|
||||
if (row.css) this.$set(row, 'css', '')
|
||||
if (!row.xpath) this.$set(row, 'xpath', '//body')
|
||||
}
|
||||
},
|
||||
onClickIsAttribute (row, isAttribute) {
|
||||
if (!isAttribute) {
|
||||
// 文本
|
||||
if (row.attr) this.$set(row, 'attr', '')
|
||||
} else {
|
||||
// 属性
|
||||
if (!row.attr) this.$set(row, 'attr', 'href')
|
||||
}
|
||||
},
|
||||
onRemoveField (row) {
|
||||
const list = JSON.parse(JSON.stringify(this.list))
|
||||
for (let i = 0; i < list.length; i++) {
|
||||
if (row.name === list[i].name) {
|
||||
list.splice(i, 1)
|
||||
}
|
||||
}
|
||||
if (list.length === 0) {
|
||||
list.push({
|
||||
name: `VARIABLE_NAME_${Math.floor(new Date().getTime())}`,
|
||||
value: `VARIABLE_VALUE_${Math.floor(new Date().getTime())}`
|
||||
})
|
||||
}
|
||||
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', list)
|
||||
},
|
||||
onAddField (row) {
|
||||
const list = JSON.parse(JSON.stringify(this.list))
|
||||
for (let i = 0; i < list.length; i++) {
|
||||
if (row.name === list[i].name) {
|
||||
const name = 'VARIABLE_NAME_' + Math.floor(new Date().getTime())
|
||||
const value = 'VARIABLE_VALUE_' + Math.floor(new Date().getTime())
|
||||
list.push({ name, value })
|
||||
break
|
||||
}
|
||||
}
|
||||
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', list)
|
||||
},
|
||||
getCellClassStyle ({ row, columnIndex }) {
|
||||
if (columnIndex === 1) {
|
||||
// 字段名称
|
||||
if (!row.name) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
} else if (columnIndex === 3) {
|
||||
// 选择器
|
||||
if (!row.css && !row.xpath) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
onChangeNextStage (row) {
|
||||
this.list.forEach(f => {
|
||||
if (f.name !== row.name) {
|
||||
this.$set(f, 'next_stage', '')
|
||||
}
|
||||
})
|
||||
}
|
||||
},
|
||||
created () {
|
||||
if (this.list.length === 0) {
|
||||
this.$store.commit(
|
||||
'spider/SET_SPIDER_FORM_CONFIG_SETTING_ITEM',
|
||||
'VARIABLE_NAME_' + Math.floor(new Date().getTime()),
|
||||
'VARIABLE_VALUE_' + Math.floor(new Date().getTime())
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.el-table.edit >>> .el-table__body td {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-table__body td .cell {
|
||||
padding: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner {
|
||||
height: 36px;
|
||||
border: none;
|
||||
border-radius: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select .el-input .el-select__caret {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .button-selector-item {
|
||||
cursor: pointer;
|
||||
margin: 0 5px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-tag.inactive {
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action {
|
||||
background: none !important;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr th {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td:nth-child(2) {
|
||||
border-left: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit::before {
|
||||
background: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item {
|
||||
font-size: 14px;
|
||||
margin-right: 5px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item:last-child {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.button-group-container {
|
||||
/*display: inline-block;*/
|
||||
/*width: 100%;*/
|
||||
}
|
||||
|
||||
.button-group-container .title {
|
||||
float: left;
|
||||
line-height: 32px;
|
||||
}
|
||||
|
||||
.button-group-container .button-group {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.action-button-group {
|
||||
display: flex;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.action-button-group >>> .el-checkbox__label {
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select.disabled .el-input__inner {
|
||||
color: lightgrey;
|
||||
}
|
||||
</style>
|
||||
@@ -125,6 +125,8 @@ export default {
|
||||
'Customized Spider': '自定义爬虫',
|
||||
'Configurable': '可配置',
|
||||
'Customized': '自定义',
|
||||
'configurable': '可配置',
|
||||
'customized': '自定义',
|
||||
'Text': '文本',
|
||||
'Attribute': '属性',
|
||||
'Field Name': '字段名称',
|
||||
@@ -148,6 +150,26 @@ export default {
|
||||
'List Page Fields': '列表页字段',
|
||||
'Detail Page Fields': '详情页字段',
|
||||
'Detail Page URL': '详情页URL',
|
||||
'All': '全部',
|
||||
'Stages': '阶段',
|
||||
'Process': '流程',
|
||||
'Stage Process': '流程图',
|
||||
'Stage Name': '阶段名称',
|
||||
'Start Stage': '开始阶段',
|
||||
'Engine': '引擎',
|
||||
'Selector Type': '选择器类别',
|
||||
'Selector': '选择器',
|
||||
'Is Attribute': '是否为属性',
|
||||
'Next Stage': '下一阶段',
|
||||
'No Next Stage': '没有下一阶段',
|
||||
'Fields': '字段',
|
||||
'Stage': '阶段',
|
||||
'Is List': '是否为列表',
|
||||
'List': '列表',
|
||||
'Pagination': '分页',
|
||||
'Settings': '设置',
|
||||
'Display Name': '显示名称',
|
||||
'Template': '模版',
|
||||
|
||||
// 爬虫列表
|
||||
'Name': '名称',
|
||||
@@ -171,6 +193,9 @@ export default {
|
||||
'Wait Duration (sec)': '等待时长(秒)',
|
||||
'Runtime Duration (sec)': '运行时长(秒)',
|
||||
'Total Duration (sec)': '总时长(秒)',
|
||||
'Run Type': '运行类型',
|
||||
'Random': '随机',
|
||||
'Selected Nodes': '指定节点',
|
||||
|
||||
// 任务列表
|
||||
'Node': '节点',
|
||||
|
||||
@@ -42,12 +42,6 @@ const actions = {
|
||||
.then(response => {
|
||||
commit('SET_FILE_CONTENT', response.data.data)
|
||||
})
|
||||
},
|
||||
saveFileContent ({ state, rootState }, payload) {
|
||||
const { path } = payload
|
||||
const spiderId = rootState.spider.spiderForm._id
|
||||
const content = state.fileContent
|
||||
return request.post(`/spiders/${spiderId}/file`, { content, path })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
const state = {
|
||||
lang: window.localStorage.getItem('lang') || 'en'
|
||||
lang: window.localStorage.getItem('lang') || 'zh'
|
||||
}
|
||||
|
||||
const getters = {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import Vue from 'vue'
|
||||
import request from '../../api/request'
|
||||
import axisModelCommonMixin from 'echarts/src/coord/axisModelCommonMixin'
|
||||
|
||||
const state = {
|
||||
// list of spiders
|
||||
@@ -34,7 +36,10 @@ const state = {
|
||||
filterSite: '',
|
||||
|
||||
// preview crawl data
|
||||
previewCrawlData: []
|
||||
previewCrawlData: [],
|
||||
|
||||
// template list
|
||||
templateList: []
|
||||
}
|
||||
|
||||
const getters = {}
|
||||
@@ -72,6 +77,16 @@ const mutations = {
|
||||
},
|
||||
SET_PREVIEW_CRAWL_DATA (state, value) {
|
||||
state.previewCrawlData = value
|
||||
},
|
||||
SET_SPIDER_FORM_CONFIG_SETTINGS (state, payload) {
|
||||
const settings = {}
|
||||
payload.forEach(row => {
|
||||
settings[row.name] = row.value
|
||||
})
|
||||
Vue.set(state.spiderForm.config, 'settings', settings)
|
||||
},
|
||||
SET_TEMPLATE_LIST (state, value) {
|
||||
state.templateList = value
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,10 +118,11 @@ const actions = {
|
||||
})
|
||||
},
|
||||
crawlSpider ({ state, dispatch }, payload) {
|
||||
const { id, nodeId, param } = payload
|
||||
const { spiderId, runType, nodeIds, param } = payload
|
||||
return request.put(`/tasks`, {
|
||||
spider_id: id,
|
||||
node_id: nodeId,
|
||||
spider_id: spiderId,
|
||||
run_type: runType,
|
||||
node_ids: nodeIds,
|
||||
param: param
|
||||
})
|
||||
},
|
||||
@@ -148,6 +164,20 @@ const actions = {
|
||||
},
|
||||
extractFields ({ state, commit }) {
|
||||
return request.post(`/spiders/${state.spiderForm._id}/extract_fields`)
|
||||
},
|
||||
postConfigSpiderConfig ({ state }) {
|
||||
return request.post(`/config_spiders/${state.spiderForm._id}/config`, state.spiderForm.config)
|
||||
},
|
||||
saveConfigSpiderSpiderfile ({ state, rootState }) {
|
||||
const content = rootState.file.fileContent
|
||||
return request.post(`/config_spiders/${state.spiderForm._id}/spiderfile`, { content })
|
||||
},
|
||||
addConfigSpider ({ state }) {
|
||||
return request.put(`/config_spiders`, state.spiderForm)
|
||||
},
|
||||
async getTemplateList ({ state, commit }) {
|
||||
const res = await request.get(`/config_spiders_templates`)
|
||||
commit('SET_TEMPLATE_LIST', res.data.data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -274,7 +274,7 @@ export default {
|
||||
// 爬虫列表
|
||||
request.get('/spiders', {})
|
||||
.then(response => {
|
||||
this.spiderList = response.data.data.list
|
||||
this.spiderList = response.data.data.list || []
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
<el-tab-pane :label="$t('Overview')" name="overview">
|
||||
<spider-overview/>
|
||||
</el-tab-pane>
|
||||
<el-tab-pane v-if="isConfigurable" :label="$t('Config')" name="配置">
|
||||
<config-list/>
|
||||
<el-tab-pane v-if="isConfigurable" :label="$t('Config')" name="config">
|
||||
<config-list ref="config"/>
|
||||
</el-tab-pane>
|
||||
<el-tab-pane :label="$t('Files')" name="files">
|
||||
<file-list/>
|
||||
@@ -48,6 +48,13 @@ export default {
|
||||
FileList,
|
||||
SpiderOverview
|
||||
},
|
||||
watch: {
|
||||
activeTabName () {
|
||||
// 初始化文件
|
||||
this.$store.commit('file/SET_FILE_CONTENT', '')
|
||||
this.$store.commit('file/SET_CURRENT_PATH', '')
|
||||
}
|
||||
},
|
||||
data () {
|
||||
return {
|
||||
activeTabName: 'overview'
|
||||
@@ -77,6 +84,10 @@ export default {
|
||||
setTimeout(() => {
|
||||
this.$refs['spider-stats'].update()
|
||||
}, 0)
|
||||
} else if (this.activeTabName === 'config') {
|
||||
setTimeout(() => {
|
||||
this.$refs['config'].update()
|
||||
}, 0)
|
||||
}
|
||||
this.$st.sendEv('爬虫详情', '切换标签', tab.name)
|
||||
},
|
||||
@@ -85,19 +96,26 @@ export default {
|
||||
this.$st.sendEv('爬虫详情', '切换爬虫')
|
||||
}
|
||||
},
|
||||
created () {
|
||||
async created () {
|
||||
// get the list of the spiders
|
||||
// this.$store.dispatch('spider/getSpiderList')
|
||||
|
||||
// get spider basic info
|
||||
this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
|
||||
.then(() => {
|
||||
// get spider file info
|
||||
this.$store.dispatch('file/getFileList', this.spiderForm.src)
|
||||
})
|
||||
await this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
|
||||
|
||||
// get spider file info
|
||||
await this.$store.dispatch('file/getFileList', this.spiderForm.src)
|
||||
|
||||
// get spider tasks
|
||||
this.$store.dispatch('spider/getTaskList', this.$route.params.id)
|
||||
await this.$store.dispatch('spider/getTaskList', this.$route.params.id)
|
||||
|
||||
// get spider list
|
||||
await this.$store.dispatch('spider/getSpiderList')
|
||||
|
||||
// if spider is configurable spider, set to config tab by default
|
||||
if (this.spiderForm.type === 'configurable') {
|
||||
this.activeTabName = 'config'
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -33,18 +33,50 @@
|
||||
width="40%"
|
||||
:visible.sync="addDialogVisible"
|
||||
:before-close="onAddDialogClose">
|
||||
<div class="add-spider-wrapper">
|
||||
<div @click="onAddConfigurable">
|
||||
<el-card shadow="hover" class="add-spider-item success">
|
||||
{{$t('Configurable Spider')}}
|
||||
</el-card>
|
||||
</div>
|
||||
<div @click="onAddCustomized">
|
||||
<el-card shadow="hover" class="add-spider-item primary">
|
||||
{{$t('Customized Spider')}}
|
||||
</el-card>
|
||||
</div>
|
||||
</div>
|
||||
<el-tabs :active-name="spiderType">
|
||||
<el-tab-pane name="configurable" :label="$t('Configurable')">
|
||||
<el-form :model="spiderForm" ref="addConfigurableForm" inline-message label-width="120px">
|
||||
<el-form-item :label="$t('Spider Name')" prop="name" required>
|
||||
<el-input v-model="spiderForm.name" :placeholder="$t('Spider Name')"/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Display Name')" prop="display_name" required>
|
||||
<el-input v-model="spiderForm.display_name" :placeholder="$t('Display Name')"/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Template')" prop="template" required>
|
||||
<el-select v-model="spiderForm.template" :value="spiderForm.template" :placeholder="$t('Template')">
|
||||
<el-option
|
||||
v-for="template in templateList"
|
||||
:key="template"
|
||||
:label="template"
|
||||
:value="template"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Results')" prop="col" required>
|
||||
<el-input v-model="spiderForm.col" :placeholder="$t('Results')"/>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<div class="actions">
|
||||
<el-button type="primary" @click="onAddConfigurable">{{$t('Add')}}</el-button>
|
||||
</div>
|
||||
</el-tab-pane>
|
||||
<el-tab-pane name="customized" :label="$t('Customized')">
|
||||
<el-form :model="spiderForm" ref="addCustomizedForm" inline-message>
|
||||
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
|
||||
<el-upload
|
||||
:action="$request.baseUrl + '/spiders'"
|
||||
:headers="{Authorization:token}"
|
||||
:on-change="onUploadChange"
|
||||
:on-success="onUploadSuccess"
|
||||
:file-list="fileList">
|
||||
<el-button size="small" type="primary" icon="el-icon-upload">{{$t('Upload')}}</el-button>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<el-alert type="error" :title="$t('Please zip your spider files from the root directory')"
|
||||
:closable="false"></el-alert>
|
||||
</el-tab-pane>
|
||||
</el-tabs>
|
||||
</el-dialog>
|
||||
<!--./add dialog-->
|
||||
|
||||
@@ -81,19 +113,7 @@
|
||||
width="40%"
|
||||
:visible.sync="addCustomizedDialogVisible"
|
||||
:before-close="onAddCustomizedDialogClose">
|
||||
<el-form :model="spiderForm" ref="addConfigurableForm" inline-message>
|
||||
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
|
||||
<el-upload
|
||||
:action="$request.baseUrl + '/spiders'"
|
||||
:headers="{Authorization:token}"
|
||||
:on-change="onUploadChange"
|
||||
:on-success="onUploadSuccess"
|
||||
:file-list="fileList">
|
||||
<el-button size="small" type="primary" icon="el-icon-upload">{{$t('Upload')}}</el-button>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<el-alert type="error" :title="$t('Please zip your spider files from the root directory')" :closable="false"></el-alert>
|
||||
|
||||
</el-dialog>
|
||||
<!--./customized spider dialog-->
|
||||
|
||||
@@ -110,17 +130,24 @@
|
||||
<div class="filter">
|
||||
<div class="left">
|
||||
<el-form :inline="true">
|
||||
<el-form-item>
|
||||
<el-select clearable @change="onSpiderTypeChange" placeholder="爬虫类型" size="small" v-model="filter.type">
|
||||
<el-option v-for="item in types" :value="item.type" :key="item.type"
|
||||
:label="item.type === 'customized'? '自定义':item.type "/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<!-- <el-form-item>-->
|
||||
<!-- <el-select clearable @change="onSpiderTypeChange" placeholder="爬虫类型" size="small" v-model="filter.type">-->
|
||||
<!-- <el-option v-for="item in types" :value="item.type" :key="item.type"-->
|
||||
<!-- :label="item.type === 'customized'? '自定义':item.type "/>-->
|
||||
<!-- </el-select>-->
|
||||
<!-- </el-form-item>-->
|
||||
<el-form-item>
|
||||
<el-input clearable @keyup.enter.native="onSearch" size="small" placeholder="名称" v-model="filter.keyword">
|
||||
<i slot="suffix" class="el-input__icon el-icon-search"></i>
|
||||
</el-input>
|
||||
</el-form-item>
|
||||
<el-form-item>
|
||||
<el-button size="small" type="success"
|
||||
class="btn refresh"
|
||||
@click="onRefresh">
|
||||
{{$t('Search')}}
|
||||
</el-button>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
</div>
|
||||
<div class="right">
|
||||
@@ -133,16 +160,19 @@
|
||||
@click="onAdd">
|
||||
{{$t('Add Spider')}}
|
||||
</el-button>
|
||||
<el-button size="small" type="success"
|
||||
icon="el-icon-refresh"
|
||||
class="btn refresh"
|
||||
@click="onRefresh">
|
||||
{{$t('Refresh')}}
|
||||
</el-button>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<!--./filter-->
|
||||
|
||||
<!--tabs-->
|
||||
<el-tabs v-model="filter.type" @tab-click="onClickTab">
|
||||
<el-tab-pane :label="$t('All')" name="all"></el-tab-pane>
|
||||
<el-tab-pane :label="$t('Configurable')" name="configurable"></el-tab-pane>
|
||||
<el-tab-pane :label="$t('Customized')" name="customized"></el-tab-pane>
|
||||
</el-tabs>
|
||||
<!--./tabs-->
|
||||
|
||||
<!--table list-->
|
||||
<el-table :data="spiderList"
|
||||
class="table"
|
||||
@@ -157,7 +187,7 @@
|
||||
align="left"
|
||||
:width="col.width">
|
||||
<template slot-scope="scope">
|
||||
{{scope.row.type === 'customized' ? '自定义' : scope.row.type}}
|
||||
{{$t(scope.row.type)}}
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column v-else-if="col.name === 'last_5_errors'"
|
||||
@@ -210,16 +240,19 @@
|
||||
<el-table-column :label="$t('Action')" align="left" fixed="right">
|
||||
<template slot-scope="scope">
|
||||
<el-tooltip :content="$t('View')" placement="top">
|
||||
<el-button type="primary" icon="el-icon-search" size="mini" @click="onView(scope.row)"></el-button>
|
||||
<el-button type="primary" icon="el-icon-search" size="mini"
|
||||
@click="onView(scope.row, $event)"></el-button>
|
||||
</el-tooltip>
|
||||
<el-tooltip :content="$t('Remove')" placement="top">
|
||||
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
|
||||
<el-button type="danger" icon="el-icon-delete" size="mini"
|
||||
@click="onRemove(scope.row, $event)"></el-button>
|
||||
</el-tooltip>
|
||||
<el-tooltip v-if="!isShowRun(scope.row)" :content="$t('No command line')" placement="top">
|
||||
<el-button disabled type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row)"></el-button>
|
||||
<el-button disabled type="success" icon="fa fa-bug" size="mini"
|
||||
@click="onCrawl(scope.row, $event)"></el-button>
|
||||
</el-tooltip>
|
||||
<el-tooltip v-else :content="$t('Run')" placement="top">
|
||||
<el-button type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row)"></el-button>
|
||||
<el-button type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row, $event)"></el-button>
|
||||
</el-tooltip>
|
||||
</template>
|
||||
</el-table-column>
|
||||
@@ -248,7 +281,7 @@ import {
|
||||
import dayjs from 'dayjs'
|
||||
import CrawlConfirmDialog from '../../components/Common/CrawlConfirmDialog'
|
||||
import StatusTag from '../../components/Status/StatusTag'
|
||||
import request from '../../api/request'
|
||||
|
||||
export default {
|
||||
name: 'SpiderList',
|
||||
components: {
|
||||
@@ -272,10 +305,9 @@ export default {
|
||||
activeSpiderId: undefined,
|
||||
filter: {
|
||||
keyword: '',
|
||||
type: ''
|
||||
type: 'all'
|
||||
},
|
||||
types: [],
|
||||
// tableData,
|
||||
columns: [
|
||||
{ name: 'display_name', label: 'Name', width: '160', align: 'left' },
|
||||
{ name: 'type', label: 'Spider Type', width: '120' },
|
||||
@@ -287,7 +319,8 @@ export default {
|
||||
spiderFormRules: {
|
||||
name: [{ required: true, message: 'Required Field', trigger: 'change' }]
|
||||
},
|
||||
fileList: []
|
||||
fileList: [],
|
||||
spiderType: 'configurable'
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
@@ -295,7 +328,8 @@ export default {
|
||||
'importForm',
|
||||
'spiderList',
|
||||
'spiderForm',
|
||||
'spiderTotal'
|
||||
'spiderTotal',
|
||||
'templateList'
|
||||
]),
|
||||
...mapGetters('user', [
|
||||
'token'
|
||||
@@ -318,14 +352,26 @@ export default {
|
||||
this.getList()
|
||||
},
|
||||
onAdd () {
|
||||
// this.addDialogVisible = true
|
||||
this.onAddCustomized()
|
||||
this.$store.commit('spider/SET_SPIDER_FORM', {
|
||||
template: this.templateList[0]
|
||||
})
|
||||
this.addDialogVisible = true
|
||||
},
|
||||
onAddConfigurable () {
|
||||
this.$store.commit('spider/SET_SPIDER_FORM', {})
|
||||
this.addDialogVisible = false
|
||||
this.addConfigurableDialogVisible = true
|
||||
this.$st.sendEv('爬虫', '添加爬虫-可配置爬虫')
|
||||
this.$refs['addConfigurableForm'].validate(async res => {
|
||||
if (!res) return
|
||||
|
||||
let res2
|
||||
try {
|
||||
res2 = await this.$store.dispatch('spider/addConfigSpider')
|
||||
} catch (e) {
|
||||
this.$message.error(this.$t('Something wrong happened'))
|
||||
return
|
||||
}
|
||||
await this.$store.dispatch('spider/getSpiderList')
|
||||
this.$router.push(`/spiders/${res2.data.data._id}`)
|
||||
this.$st.sendEv('爬虫', '添加爬虫-可配置爬虫')
|
||||
})
|
||||
},
|
||||
onAddCustomized () {
|
||||
this.addDialogVisible = false
|
||||
@@ -374,7 +420,8 @@ export default {
|
||||
this.$store.commit('spider/SET_SPIDER_FORM', row)
|
||||
this.dialogVisible = true
|
||||
},
|
||||
onRemove (row) {
|
||||
onRemove (row, ev) {
|
||||
ev.stopPropagation()
|
||||
this.$confirm(this.$t('Are you sure to delete this spider?'), this.$t('Notification'), {
|
||||
confirmButtonText: this.$t('Confirm'),
|
||||
cancelButtonText: this.$t('Cancel'),
|
||||
@@ -390,12 +437,14 @@ export default {
|
||||
this.$st.sendEv('爬虫', '删除')
|
||||
})
|
||||
},
|
||||
onCrawl (row) {
|
||||
onCrawl (row, ev) {
|
||||
ev.stopPropagation()
|
||||
this.crawlConfirmDialogVisible = true
|
||||
this.activeSpiderId = row._id
|
||||
this.$st.sendEv('爬虫', '点击运行')
|
||||
},
|
||||
onView (row) {
|
||||
onView (row, ev) {
|
||||
ev.stopPropagation()
|
||||
this.$router.push('/spiders/' + row._id)
|
||||
this.$st.sendEv('爬虫', '查看')
|
||||
},
|
||||
@@ -483,10 +532,12 @@ export default {
|
||||
if (!str || str.match('^0001')) return 'NA'
|
||||
return dayjs(str).format('YYYY-MM-DD HH:mm:ss')
|
||||
},
|
||||
onRowClick (row, event, column) {
|
||||
if (column.label !== this.$t('Action')) {
|
||||
this.onView(row)
|
||||
}
|
||||
onRowClick (row, column, event) {
|
||||
this.onView(row, event)
|
||||
},
|
||||
onClickTab (tab) {
|
||||
this.filter.type = tab.name
|
||||
this.getList()
|
||||
},
|
||||
getList () {
|
||||
let params = {
|
||||
@@ -496,19 +547,29 @@ export default {
|
||||
type: this.filter.type
|
||||
}
|
||||
this.$store.dispatch('spider/getSpiderList', params)
|
||||
},
|
||||
getTypes () {
|
||||
request.get(`/spider/types`).then(resp => {
|
||||
this.types = resp.data.data
|
||||
})
|
||||
}
|
||||
// getTypes () {
|
||||
// request.get(`/spider/types`).then(resp => {
|
||||
// this.types = resp.data.data
|
||||
// })
|
||||
// }
|
||||
},
|
||||
created () {
|
||||
this.getTypes()
|
||||
async created () {
|
||||
// fetch spider types
|
||||
// await this.getTypes()
|
||||
|
||||
// fetch spider list
|
||||
this.getList()
|
||||
await this.getList()
|
||||
|
||||
// fetch template list
|
||||
await this.$store.dispatch('spider/getTemplateList')
|
||||
},
|
||||
mounted () {
|
||||
console.log(this.spiderForm)
|
||||
const vm = this
|
||||
this.$nextTick(() => {
|
||||
vm.$store.commit('spider/SET_SPIDER_FORM', this.spiderForm)
|
||||
})
|
||||
}
|
||||
}
|
||||
</script>
|
||||
@@ -594,4 +655,8 @@ export default {
|
||||
.el-table >>> tr {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.actions {
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -125,7 +125,7 @@
|
||||
<el-button type="primary" icon="el-icon-search" size="mini" @click="onView(scope.row)"></el-button>
|
||||
</el-tooltip>
|
||||
<el-tooltip :content="$t('Remove')" placement="top">
|
||||
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
|
||||
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row, $event)"></el-button>
|
||||
</el-tooltip>
|
||||
</template>
|
||||
</el-table-column>
|
||||
@@ -250,7 +250,8 @@ export default {
|
||||
onSelectSpider () {
|
||||
this.$st.sendEv('任务', '选择爬虫')
|
||||
},
|
||||
onRemove (row) {
|
||||
onRemove (row, ev) {
|
||||
ev.stopPropagation()
|
||||
this.$confirm(this.$t('Are you sure to delete this task?'), this.$t('Notification'), {
|
||||
confirmButtonText: this.$t('Confirm'),
|
||||
cancelButtonText: this.$t('Cancel'),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
module.exports = {
|
||||
publicPath: process.env.BASE_URL || '/'
|
||||
// TODO: need to configure output static files with hash
|
||||
}
|
||||
|
||||
@@ -2954,9 +2954,10 @@ electron-to-chromium@^1.3.103:
|
||||
version "1.3.113"
|
||||
resolved "http://registry.npm.taobao.org/electron-to-chromium/download/electron-to-chromium-1.3.113.tgz#b1ccf619df7295aea17bc6951dc689632629e4a9"
|
||||
|
||||
element-ui@2.4.6:
|
||||
version "2.4.6"
|
||||
resolved "https://registry.yarnpkg.com/element-ui/-/element-ui-2.4.6.tgz#524d3d4cac0b68745dda87311ef0d8fe541b5fc4"
|
||||
element-ui@2.13.0:
|
||||
version "2.13.0"
|
||||
resolved "https://registry.npm.taobao.org/element-ui/download/element-ui-2.13.0.tgz?cache=0&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Felement-ui%2Fdownload%2Felement-ui-2.13.0.tgz#f6bb04e5b0a76ea5f62466044b774407ba4ebd2d"
|
||||
integrity sha1-9rsE5bCnbqX2JGYES3dEB7pOvS0=
|
||||
dependencies:
|
||||
async-validator "~1.8.1"
|
||||
babel-helper-vue-jsx-merge-props "^2.0.0"
|
||||
|
||||
Reference in New Issue
Block a user