Merge remote-tracking branch 'upstream/develop' into develop

This commit is contained in:
陈景阳
2019-12-05 07:15:07 +08:00
41 changed files with 2260 additions and 519 deletions

View File

@@ -15,7 +15,7 @@ redis:
log:
level: info
path: "/var/logs/crawlab"
isDeletePeriodically: "Y"
isDeletePeriodically: "N"
deleteFrequency: "@hourly"
server:
host: 0.0.0.0

View File

@@ -3,15 +3,15 @@ package entity
import "strconv"
type Page struct {
Skip int
Limit int
PageNum int
Skip int
Limit int
PageNum int
PageSize int
}
func (p *Page)GetPage(pageNum string, pageSize string) {
func (p *Page) GetPage(pageNum string, pageSize string) {
p.PageNum, _ = strconv.Atoi(pageNum)
p.PageSize, _ = strconv.Atoi(pageSize)
p.Skip = p.PageSize * (p.PageNum - 1)
p.Limit = p.PageSize
}
}

View File

@@ -1,25 +1,30 @@
package entity
type ConfigSpiderData struct {
Version string `yaml:"version" json:"version"`
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages map[string]Stage `yaml:"stages" json:"stages"`
Settings map[string]string `yaml:"settings" json:"settings"`
}
type Stage struct {
Name string `yaml:"name" json:"name"`
IsList bool `yaml:"is_list" json:"is_list"`
ListCss string `yaml:"list_css" json:"list_css"`
ListXpath string `yaml:"list_xpath" json:"list_xpath"`
PageCss string `yaml:"page_css" json:"page_css"`
PageXpath string `yaml:"page_xpath" json:"page_xpath"`
PageAttr string `yaml:"page_attr" json:"page_attr"`
Fields []Field `yaml:"fields" json:"fields"`
}
type Field struct {
Name string `yaml:"name" json:"name"`
Css string `yaml:"css" json:"css"`
Xpath string `yaml:"xpath" json:"xpath"`
Attr string `yaml:"attr" json:"attr"`
NextStage string `yaml:"next_stage" json:"next_stage"`
}
type Stage struct {
IsList bool `yaml:"is_list" json:"is_list"`
ListCss string `yaml:"list_css" json:"list_css"`
PageCss string `yaml:"page_css" json:"page_css"`
PageAttr string `yaml:"page_attr" json:"page_attr"`
Fields []Field `yaml:"fields" json:"fields"`
}
type ConfigSpiderData struct {
Version string `yaml:"version" json:"version"`
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages map[string]Stage `yaml:"stages" json:"stages"`
Remark string `yaml:"remark" json:"remark"`
}

View File

@@ -47,6 +47,8 @@ func main() {
panic(err)
}
log.Info("初始化定期清理日志配置成功")
}else {
log.Info("默认未开启定期清理日志配置")
}
// 初始化Mongodb数据库
@@ -140,9 +142,13 @@ func main() {
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
// 可配置爬虫
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 改可配置爬虫配置
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
authGroup.GET("/config_spiders_templates", routes.GetConfigSpiderTemplateList) // 获取可配置爬虫模版列表
// 任务
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情

View File

@@ -42,12 +42,12 @@ func init() {
app.DELETE("/tasks/:id", DeleteTask) // 删除任务
app.GET("/tasks/:id/results", GetTaskResults) // 任务结果
app.GET("/tasks/:id/results/download", DownloadTaskResultsCsv) // 下载任务结果
app.GET("/spiders", GetSpiderList) // 爬虫列表
app.GET("/spiders/:id", GetSpider) // 爬虫详情
app.POST("/spiders/:id", PostSpider) // 修改爬虫
app.DELETE("/spiders/:id",DeleteSpider) // 删除爬虫
app.GET("/spiders/:id/tasks",GetSpiderTasks) // 爬虫任务列表
app.GET("/spiders/:id/dir",GetSpiderDir) // 爬虫目录
app.GET("/spiders", GetSpiderList) // 爬虫列表
app.GET("/spiders/:id", GetSpider) // 爬虫详情
app.POST("/spiders/:id", PostSpider) // 修改爬虫
app.DELETE("/spiders/:id", DeleteSpider) // 删除爬虫
app.GET("/spiders/:id/tasks", GetSpiderTasks) // 爬虫任务列表
app.GET("/spiders/:id/dir", GetSpiderDir) // 爬虫目录
}
//mock test, test data in ./mock

View File

@@ -6,8 +6,6 @@ import (
"net/http"
)
var taskDailyItems = []model.TaskDailyItem{
{
Date: "2019/08/19",

View File

@@ -1 +1 @@
package mock
package mock

View File

@@ -1 +1 @@
package mock
package mock

View File

@@ -131,12 +131,7 @@ func (g ScrapyGenerator) GetNonListParserString(stageName string, stage entity.S
// 遍历字段列表
for _, f := range stage.Fields {
line := ""
if f.Attr == "" {
line += fmt.Sprintf(`item['%s'] = response.css('%s::text').extract_first()`, f.Name, f.Css)
} else {
line += fmt.Sprintf(`item['%s'] = response.css('%s::attr("%s")').extract_first()`, f.Name, f.Css, f.Attr)
}
line := fmt.Sprintf(`item['%s'] = response.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
line = g.PadCode(line, 2)
str += line
}
@@ -163,19 +158,14 @@ func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stag
str += g.PadCode(`prev_item = response.meta.get('item')`, 2)
// for 循环遍历列表
str += g.PadCode(fmt.Sprintf(`for elem in response.css('%s'):`, stage.ListCss), 2)
str += g.PadCode(fmt.Sprintf(`for elem in response.%s:`, g.GetListString(stage)), 2)
// 构造item
str += g.PadCode(`item = Item()`, 3)
// 遍历字段列表
for _, f := range stage.Fields {
line := ""
if f.Attr == "" {
line += fmt.Sprintf(`item['%s'] = elem.css('%s::text').extract_first()`, f.Name, f.Css)
} else {
line += fmt.Sprintf(`item['%s'] = elem.css('%s::attr("%s")').extract_first()`, f.Name, f.Css, f.Attr)
}
line := fmt.Sprintf(`item['%s'] = elem.%s.extract_first()`, f.Name, g.GetExtractStringFromField(f))
line = g.PadCode(line, 3)
str += line
}
@@ -195,15 +185,9 @@ func (g ScrapyGenerator) GetListParserString(stageName string, stage entity.Stag
}
// 分页
if stage.PageCss != "" {
// 分页元素属性,默认为 href
pageAttr := "href"
if stage.PageAttr != "" {
pageAttr = stage.PageAttr
}
str += g.PadCode(fmt.Sprintf(`next_url = response.css('%s::attr("%s")').extract_first()`, stage.PageCss, pageAttr), 2)
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': item})`, stageName), 2)
if stage.PageCss != "" || stage.PageXpath != "" {
str += g.PadCode(fmt.Sprintf(`next_url = response.%s.extract_first()`, g.GetExtractStringFromStage(stage)), 2)
str += g.PadCode(fmt.Sprintf(`yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_%s, meta={'item': prev_item})`, stageName), 2)
}
// 加入末尾换行
@@ -226,3 +210,49 @@ func (g ScrapyGenerator) GetNextStageField(stage entity.Stage) (entity.Field, er
}
return entity.Field{}, errors.New("cannot find next stage field")
}
func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
if f.Css != "" {
// 如果为CSS
if f.Attr == "" {
// 文本
return fmt.Sprintf(`css('%s::text')`, f.Css)
} else {
// 属性
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
}
} else {
// 如果为XPath
if f.Attr == "" {
// 文本
return fmt.Sprintf(`xpath('string(%s)')`, f.Xpath)
} else {
// 属性
return fmt.Sprintf(`xpath('%s/@%s')`, f.Xpath, f.Attr)
}
}
}
func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
// 分页元素属性,默认为 href
pageAttr := "href"
if stage.PageAttr != "" {
pageAttr = stage.PageAttr
}
if stage.PageCss != "" {
// 如果为CSS
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
} else {
// 如果为XPath
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
}
}
func (g ScrapyGenerator) GetListString(stage entity.Stage) string {
if stage.ListCss != "" {
return fmt.Sprintf(`css('%s')`, stage.ListCss)
} else {
return fmt.Sprintf(`xpath('%s')`, stage.ListXpath)
}
}

View File

@@ -1,11 +1,17 @@
package model
import (
"crawlab/constants"
"crawlab/database"
"crawlab/entity"
"crawlab/utils"
"errors"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"gopkg.in/yaml.v2"
"io/ioutil"
"path/filepath"
"runtime/debug"
"time"
)
@@ -25,14 +31,18 @@ type Spider struct {
Site string `json:"site" bson:"site"` // 爬虫网站
Envs []Env `json:"envs" bson:"envs"` // 环境变量
Remark string `json:"remark" bson:"remark"` // 备注
Src string `json:"src" bson:"src"` // 源码位置
// 自定义爬虫
Src string `json:"src" bson:"src"` // 源码位置
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
// 可配置爬虫
Template string `json:"template" bson:"template"` // Spiderfile模版
// 前端展示
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
LastStatus string `json:"last_status"` // 最后执行状态
LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间
LastStatus string `json:"last_status"` // 最后执行状态
Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置
// 时间
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
@@ -108,6 +118,10 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, erro
return spiders, 0, err
}
if spiders == nil {
spiders = []Spider{}
}
// 遍历爬虫列表
for i, spider := range spiders {
// 获取最后一次任务
@@ -161,15 +175,25 @@ func GetSpider(id bson.ObjectId) (Spider, error) {
s, c := database.GetCol("spiders")
defer s.Close()
var result Spider
if err := c.FindId(id).One(&result); err != nil {
// 获取爬虫
var spider Spider
if err := c.FindId(id).One(&spider); err != nil {
if err != mgo.ErrNotFound {
log.Errorf("get spider error: %s, id: %id", err.Error(), id.Hex())
debug.PrintStack()
}
return result, err
return spider, err
}
return result, nil
// 如果为可配置爬虫,获取爬虫配置
if spider.Type == constants.Configurable && utils.Exists(filepath.Join(spider.Src, "Spiderfile")) {
config, err := GetConfigSpiderData(spider)
if err != nil {
return spider, err
}
spider.Config = config
}
return spider, nil
}
// 更新爬虫
@@ -209,10 +233,12 @@ func RemoveSpider(id bson.ObjectId) error {
s, gf := database.GetGridFs("files")
defer s.Close()
if err := gf.RemoveId(result.FileId); err != nil {
log.Error("remove file error, id:" + result.FileId.Hex())
debug.PrintStack()
return err
if result.FileId.Hex() != constants.ObjectIdNull {
if err := gf.RemoveId(result.FileId); err != nil {
log.Error("remove file error, id:" + result.FileId.Hex())
debug.PrintStack()
return err
}
}
return nil
@@ -269,3 +295,35 @@ func GetSpiderTypes() ([]*entity.SpiderType, error) {
return types, nil
}
func GetConfigSpiderData(spider Spider) (entity.ConfigSpiderData, error) {
// 构造配置数据
configData := entity.ConfigSpiderData{}
// 校验爬虫类别
if spider.Type != constants.Configurable {
return configData, errors.New("not a configurable spider")
}
// Spiderfile 目录
sfPath := filepath.Join(spider.Src, "Spiderfile")
// 读取YAML文件
yamlFile, err := ioutil.ReadFile(sfPath)
if err != nil {
return configData, err
}
// 反序列化
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
return configData, err
}
// 赋值 stage_name
for stageName, stage := range configData.Stages {
stage.Name = stageName
configData.Stages[stageName] = stage
}
return configData, nil
}

View File

@@ -2,16 +2,13 @@ package routes
import (
"crawlab/constants"
"crawlab/database"
"crawlab/entity"
"crawlab/model"
"crawlab/services"
"crawlab/utils"
"fmt"
"github.com/apex/log"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
"io"
@@ -19,7 +16,7 @@ import (
"net/http"
"os"
"path/filepath"
"runtime/debug"
"strings"
)
// 添加可配置爬虫
@@ -36,6 +33,12 @@ func PutConfigSpider(c *gin.Context) {
return
}
// 模版名不能为空
if spider.Template == "" {
HandleErrorF(http.StatusBadRequest, c, "spider template should not be empty")
return
}
// 判断爬虫是否存在
if spider := model.GetSpiderByName(spider.Name); spider != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("spider for '%s' already exists", spider.Name))
@@ -62,6 +65,23 @@ func PutConfigSpider(c *gin.Context) {
}
spider.Src = spiderDir
// 复制Spiderfile模版
contentByte, err := ioutil.ReadFile("./template/spiderfile/Spiderfile." + spider.Template)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
f, err := os.Create(filepath.Join(spider.Src, "Spiderfile"))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
defer f.Close()
if _, err := f.Write(contentByte); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 添加爬虫到数据库
if err := spider.Add(); err != nil {
HandleError(http.StatusInternalServerError, c, err)
@@ -100,8 +120,8 @@ func UploadConfigSpider(c *gin.Context) {
// 文件名称必须为Spiderfile
filename := header.Filename
if filename != "Spiderfile" {
HandleErrorF(http.StatusBadRequest, c, "filename must be 'Spiderfile'")
if filename != "Spiderfile" && filename != "Spiderfile.yaml" && filename != "Spiderfile.yml" {
HandleErrorF(http.StatusBadRequest, c, "filename must be 'Spiderfile(.yaml|.yml)'")
return
}
@@ -151,88 +171,146 @@ func UploadConfigSpider(c *gin.Context) {
return
}
// 删除已有的爬虫文件
for _, fInfo := range utils.ListDir(spiderDir) {
// 不删除Spiderfile
if fInfo.Name() == filename {
continue
}
// 删除其他文件
if err := os.RemoveAll(filepath.Join(spiderDir, fInfo.Name())); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
}
// 拷贝爬虫文件
tplDir := "./template/scrapy"
for _, fInfo := range utils.ListDir(tplDir) {
// 跳过Spiderfile
if fInfo.Name() == "Spiderfile" {
continue
}
srcPath := filepath.Join(tplDir, fInfo.Name())
if fInfo.IsDir() {
dirPath := filepath.Join(spiderDir, fInfo.Name())
if err := utils.CopyDir(srcPath, dirPath); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
} else {
if err := utils.CopyFile(srcPath, filepath.Join(spiderDir, fInfo.Name())); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
}
}
// 更改爬虫文件
if err := services.GenerateConfigSpiderFiles(spider, configData); err != nil {
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 打包为 zip 文件
files, err := utils.GetFilesFromDir(spiderDir)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
randomId := uuid.NewV4()
tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip")
spiderZipFileName := spider.Name + ".zip"
if err := utils.Compress(files, tmpFilePath); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 获取 GridFS 实例
s, gf := database.GetGridFs("files")
defer s.Close()
// 判断文件是否已经存在
var gfFile model.GridFs
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
// 已经存在文件,则删除
_ = gf.RemoveId(gfFile.Id)
}
// 上传到GridFs
fid, err := services.UploadToGridFs(spiderZipFileName, tmpFilePath)
if err != nil {
log.Errorf("upload to grid fs error: %s", err.Error())
debug.PrintStack()
return
}
// 保存爬虫 FileId
spider.FileId = fid
_ = spider.Save()
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PostConfigSpiderSpiderfile(c *gin.Context) {
type Body struct {
Content string `json:"content"`
}
id := c.Param("id")
// 文件内容
var reqBody Body
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
content := reqBody.Content
// 获取爬虫
var spider model.Spider
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化
var configData entity.ConfigSpiderData
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 校验configData
if err := services.ValidateSpiderfile(configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 写文件
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PostConfigSpiderConfig(c *gin.Context) {
id := c.Param("id")
// 获取爬虫
var spider model.Spider
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化配置数据
var configData entity.ConfigSpiderData
if err := c.ShouldBindJSON(&configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 校验configData
if err := services.ValidateSpiderfile(configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 替换Spiderfile文件
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func GetConfigSpiderConfig(c *gin.Context) {
id := c.Param("id")
// 校验ID
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
// 获取爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: spider.Config,
})
}
// 获取模版名称列表
func GetConfigSpiderTemplateList(c *gin.Context) {
var data []string
for _, fInfo := range utils.ListDir("./template/spiderfile") {
templateName := strings.Replace(fInfo.Name(), "Spiderfile.", "", -1)
data = append(data, templateName)
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: data,
})
}

View File

@@ -34,7 +34,7 @@ func GetSpiderList(c *gin.Context) {
"name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}},
}
if t != "" {
if t != "" && t != "all" {
filter["type"] = t
}

View File

@@ -2,11 +2,20 @@ package services
import (
"crawlab/constants"
"crawlab/database"
"crawlab/entity"
"crawlab/model"
"crawlab/model/config_spider"
"crawlab/utils"
"errors"
"fmt"
"github.com/apex/log"
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
"os"
"path/filepath"
"strings"
)
@@ -37,12 +46,17 @@ func ValidateSpiderfile(configData entity.ConfigSpiderData) error {
// 校验是否存在 start_url
if configData.StartUrl == "" {
return errors.New("spiderfile start_url is empty")
return errors.New("spiderfile invalid: start_url is empty")
}
// 校验是否存在 start_stage
if configData.StartStage == "" {
return errors.New("spiderfile invalid: start_stage is empty")
}
// 校验是否存在 stages
if len(configData.Stages) == 0 {
return errors.New("spiderfile stages is empty")
return errors.New("spiderfile invalid: stages is empty")
}
// 校验stages
@@ -50,56 +64,74 @@ func ValidateSpiderfile(configData entity.ConfigSpiderData) error {
for stageName, stage := range configData.Stages {
// stage 名称不能为空
if stageName == "" {
return errors.New("spiderfile stage name is empty")
return errors.New("spiderfile invalid: stage name is empty")
}
// stage 名称不能为保留字符串
// NOTE: 如果有其他Engine可以扩展默认为Scrapy
if configData.Engine == "" || configData.Engine == constants.EngineScrapy {
if strings.Contains(constants.ScrapyProtectedStageNames, stageName) {
return errors.New(fmt.Sprintf("spiderfile stage name '%s' is protected", stageName))
return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is protected", stageName))
}
} else if configData.Engine == constants.EngineColly {
return errors.New(fmt.Sprintf("engine '%s' is not implemented", stageName))
} else {
return errors.New(fmt.Sprintf("spiderfile invalid: engine '%s' is not implemented", configData.Engine))
}
// stage 名称不能重复
if dict[stageName] == 1 {
return errors.New("spiderfile stage name should be unique")
return errors.New(fmt.Sprintf("spiderfile invalid: stage name '%s' is duplicated", stageName))
}
dict[stageName] = 1
// stage 字段不能为空
if len(stage.Fields) == 0 {
return errors.New(fmt.Sprintf("spiderfile stage '%s' has no fields", stageName))
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has no fields", stageName))
}
// stage 的下一个 stage 只能有一个
// 是否包含 next_stage
hasNextStage := false
// 遍历字段列表
for _, field := range stage.Fields {
// stage 的 next stage 只能有一个
if field.NextStage != "" {
if hasNextStage {
return errors.New("spiderfile stage fields should have only 1 next_stage")
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has more than 1 next_stage", stageName))
}
hasNextStage = true
}
// 字段里 css 和 xpath 只能包含一个
if field.Css != "" && field.Xpath != "" {
return errors.New(fmt.Sprintf("spiderfile invalid: field '%s' in stage '%s' has both css and xpath set which is prohibited", field.Name, stageName))
}
}
// stage 里 page_css 和 page_xpath 只能包含一个
if stage.PageCss != "" && stage.PageXpath != "" {
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both page_css and page_xpath set which is prohibited", stageName))
}
// stage 里 list_css 和 list_xpath 只能包含一个
if stage.ListCss != "" && stage.ListXpath != "" {
return errors.New(fmt.Sprintf("spiderfile invalid: stage '%s' has both list_css and list_xpath set which is prohibited", stageName))
}
// 如果 stage 的 is_list 为 true 但 list_css 为空,报错
if stage.IsList && stage.ListCss == "" {
return errors.New("spiderfile stage with is_list = true should have list_css being set")
if stage.IsList && (stage.ListCss == "" && stage.ListXpath == "") {
return errors.New("spiderfile invalid: stage with is_list = true should have either list_css or list_xpath being set")
}
}
// 校验字段唯一性
if !IsUniqueConfigSpiderFields(fields) {
return errors.New("spiderfile fields not unique")
return errors.New("spiderfile invalid: fields not unique")
}
// 字段名称不能为保留字符串
for _, field := range fields {
if strings.Contains(constants.ScrapyProtectedFieldNames, field.Name) {
return errors.New(fmt.Sprintf("spiderfile field name '%s' is protected", field.Name))
return errors.New(fmt.Sprintf("spiderfile invalid: field name '%s' is protected", field.Name))
}
}
@@ -116,3 +148,118 @@ func IsUniqueConfigSpiderFields(fields []entity.Field) bool {
}
return true
}
func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
spiderDir := spider.Src
// 赋值 stage_name
for stageName, stage := range configData.Stages {
stage.Name = stageName
configData.Stages[stageName] = stage
}
// 删除已有的爬虫文件
for _, fInfo := range utils.ListDir(spiderDir) {
// 不删除Spiderfile
if fInfo.Name() == "Spiderfile" {
continue
}
// 删除其他文件
if err := os.RemoveAll(filepath.Join(spiderDir, fInfo.Name())); err != nil {
return err
}
}
// 拷贝爬虫文件
tplDir := "./template/scrapy"
for _, fInfo := range utils.ListDir(tplDir) {
// 跳过Spiderfile
if fInfo.Name() == "Spiderfile" {
continue
}
srcPath := filepath.Join(tplDir, fInfo.Name())
if fInfo.IsDir() {
dirPath := filepath.Join(spiderDir, fInfo.Name())
if err := utils.CopyDir(srcPath, dirPath); err != nil {
return err
}
} else {
if err := utils.CopyFile(srcPath, filepath.Join(spiderDir, fInfo.Name())); err != nil {
return err
}
}
}
// 更改爬虫文件
if err := GenerateConfigSpiderFiles(spider, configData); err != nil {
return err
}
// 打包为 zip 文件
files, err := utils.GetFilesFromDir(spiderDir)
if err != nil {
return err
}
randomId := uuid.NewV4()
tmpFilePath := filepath.Join(viper.GetString("other.tmppath"), spider.Name+"."+randomId.String()+".zip")
spiderZipFileName := spider.Name + ".zip"
if err := utils.Compress(files, tmpFilePath); err != nil {
return err
}
// 获取 GridFS 实例
s, gf := database.GetGridFs("files")
defer s.Close()
// 判断文件是否已经存在
var gfFile model.GridFs
if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil {
// 已经存在文件,则删除
_ = gf.RemoveId(gfFile.Id)
}
// 上传到GridFs
fid, err := UploadToGridFs(spiderZipFileName, tmpFilePath)
if err != nil {
log.Errorf("upload to grid fs error: %s", err.Error())
return err
}
// 保存爬虫 FileId
spider.FileId = fid
_ = spider.Save()
return nil
}
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
// Spiderfile 路径
sfPath := filepath.Join(spider.Src, "Spiderfile")
// 生成Yaml内容
sfContentByte, err := yaml.Marshal(configData)
if err != nil {
return err
}
// 打开文件
var f *os.File
if utils.Exists(sfPath) {
f, err = os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
} else {
f, err = os.OpenFile(sfPath, os.O_CREATE, 0777)
}
if err != nil {
return err
}
defer f.Close()
// 写入内容
if _, err := f.Write(sfContentByte); err != nil {
return err
}
return nil
}

View File

@@ -116,12 +116,15 @@ func PublishAllSpiders() {
// 发布爬虫
func PublishSpider(spider model.Spider) {
// 查询gf file不存在则标记为爬虫文件不存在
gfFile := model.GetGridFs(spider.FileId)
if gfFile == nil {
spider.FileId = constants.ObjectIdNull
_ = spider.Save()
return
var gfFile *model.GridFs
if spider.FileId.Hex() != constants.ObjectIdNull {
// 查询gf file不存在则标记为爬虫文件不存在
gfFile = model.GetGridFs(spider.FileId)
if gfFile == nil {
spider.FileId = constants.ObjectIdNull
_ = spider.Save()
return
}
}
// 如果FileId为空表示还没有上传爬虫到GridFS则跳过

View File

@@ -10,6 +10,7 @@ import (
"github.com/spf13/viper"
"io"
"os"
"os/exec"
"path/filepath"
"runtime/debug"
)
@@ -99,7 +100,6 @@ func (s *SpiderSync) Download() {
// 创建临时文件
tmpFilePath := filepath.Join(tmpPath, randomId.String()+".zip")
tmpFile := utils.OpenFile(tmpFilePath)
defer utils.Close(tmpFile)
// 将该文件写入临时文件
if _, err := io.Copy(tmpFile, f); err != nil {
@@ -119,6 +119,15 @@ func (s *SpiderSync) Download() {
return
}
//递归修改目标文件夹权限
// 解决scrapy.setting中开启LOG_ENABLED 和 LOG_FILE时不能创建log文件的问题
cmd := exec.Command("chmod", "-R", "777", dstPath)
if err := cmd.Run(); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
// 关闭临时文件
if err := tmpFile.Close(); err != nil {
log.Errorf(err.Error())

View File

@@ -226,12 +226,18 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
// 环境变量配置
envs := s.Envs
if s.Type == constants.Configurable {
// 数据库配置
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_HOST", Value: viper.GetString("mongo.host")})
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PORT", Value: viper.GetString("mongo.port")})
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_DB", Value: viper.GetString("mongo.db")})
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_USERNAME", Value: viper.GetString("mongo.username")})
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PASSWORD", Value: viper.GetString("mongo.password")})
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_AUTHSOURCE", Value: viper.GetString("mongo.authSource")})
// 设置配置
for envName, envValue := range s.Config.Settings {
envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue})
}
}
cmd = SetEnv(cmd, envs, t.Id, s.Col)
@@ -311,9 +317,12 @@ func SaveTaskResultCount(id string) func() {
// 执行任务
func ExecuteTask(id int) {
if flag, _ := LockList.Load(id); flag.(bool) {
log.Debugf(GetWorkerPrefix(id) + "正在执行任务...")
return
if flag, ok := LockList.Load(id); ok {
if flag.(bool) {
log.Debugf(GetWorkerPrefix(id) + "正在执行任务...")
return
}
}
// 上锁
@@ -477,6 +486,29 @@ func GetTaskLog(id string) (logStr string, err error) {
}
if IsMasterNode(task.NodeId.Hex()) {
if !utils.Exists(task.LogPath) {
fileDir, err := MakeLogDir(task)
if err != nil {
log.Errorf(err.Error())
}
fileP := GetLogFilePaths(fileDir)
// 获取日志文件路径
fLog, err := os.Create(fileP)
defer fLog.Close()
if err != nil {
log.Errorf("create task log file error: %s", fileP)
debug.PrintStack()
}
task.LogPath = fileP
if err := task.Save(); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
}
}
// 若为主节点,获取本机日志
logBytes, err := model.GetLocalLog(task.LogPath)
if err != nil {

View File

@@ -1,4 +1,7 @@
# -*- coding: utf-8 -*-
import os
import re
import json
# Scrapy settings for config_spider project
#
@@ -9,14 +12,14 @@
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'config_spider'
BOT_NAME = 'Crawlab Configurable Spider'
SPIDER_MODULES = ['config_spider.spiders']
NEWSPIDER_MODULE = 'config_spider.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'config_spider (+http://www.yourdomain.com)'
USER_AGENT = 'Crawlab Spider'
# Obey robots.txt rules
ROBOTSTXT_OBEY = True
@@ -88,3 +91,21 @@ ITEM_PIPELINES = {
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
setting_value = os.environ.get(setting_env_name)
if setting_value.lower() == 'true':
setting_value = True
elif setting_value.lower() == 'false':
setting_value = False
elif re.search(r'^\d+$', setting_value) is not None:
setting_value = int(setting_value)
elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
setting_value = json.loads(setting_value)
elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
setting_value = json.loads(setting_value)
else:
pass
locals()[setting_name] = setting_value

View File

@@ -0,0 +1,20 @@
version: "0.4.0"
name: "toscrapy_books"
start_url: "http://news.163.com/special/0001386F/rank_news.html"
start_stage: "list"
engine: "scrapy"
stages:
list:
is_list: true
list_css: "table tr:not(:first-child)"
fields:
- name: "title"
css: "td:nth-child(1) > a"
- name: "url"
css: "td:nth-child(1) > a"
attr: "href"
- name: "clicks"
css: "td.cBlue"
settings:
ROBOTSTXT_OBEY: false
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36

View File

@@ -0,0 +1,22 @@
version: 0.4.0
name: toscrapy_books
start_url: http://www.baidu.com/s?wd=crawlab
start_stage: list
engine: scrapy
stages:
list:
is_list: true
list_xpath: //*[contains(@class, "c-container")]
page_xpath: //*[@id="page"]//a[@class="n"][last()]
page_attr: href
fields:
- name: title
xpath: .//h3/a
- name: url
xpath: .//h3/a
attr: href
- name: abstract
xpath: .//*[@class="c-abstract"]
settings:
ROBOTSTXT_OBEY: false
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36

View File

@@ -5,10 +5,10 @@ start_stage: "list"
engine: "scrapy"
stages:
list:
is_list: true # default: false
is_list: true
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href" # default: href
page_attr: "href"
fields:
- name: "title"
css: "h3 > a"
@@ -23,3 +23,6 @@ stages:
fields:
- name: "description"
css: "#product_description + p"
settings:
ROBOTSTXT_OBEY: true
AUTOTHROTTLE_ENABLED: true

View File

@@ -167,7 +167,6 @@ func DeCompress(srcFile *os.File, dstPath string) error {
debug.PrintStack()
continue
}
defer Close(newFile)
// 拷贝该文件到新文件中
if _, err := io.Copy(newFile, srcFile); err != nil {

View File

@@ -23,7 +23,7 @@
"cross-env": "^5.2.0",
"dayjs": "^1.8.6",
"echarts": "^4.1.0",
"element-ui": "2.4.6",
"element-ui": "2.13.0",
"font-awesome": "^4.7.0",
"js-cookie": "2.2.0",
"normalize.css": "7.0.0",

View File

@@ -2,13 +2,21 @@
<el-dialog
:title="$t('Notification')"
:visible="visible"
class="crawl-confirm-dialog"
width="480px"
:before-close="beforeClose"
>
<div style="margin-bottom: 20px;">{{$t('Are you sure to run this spider?')}}</div>
<el-form label-width="80px">
<el-form-item :label="$t('Node')">
<el-select v-model="nodeId">
<el-form label-width="80px" :model="form" ref="form">
<el-form-item :label="$t('Run Type')" prop="runType" required inline-message>
<el-select v-model="form.runType" :placeholder="$t('Run Type')">
<el-option value="all-nodes" :label="$t('All Nodes')"/>
<el-option value="selected-nodes" :label="$t('Selected Nodes')"/>
<el-option value="random" :label="$t('Random')"/>
</el-select>
</el-form-item>
<el-form-item v-if="form.runType === 'selected-nodes'" prop="nodeIds" :label="$t('Node')" required inline-message>
<el-select v-model="form.nodeIds" :placeholder="$t('Node')" multiple clearable>
<el-option
v-for="op in nodeList"
:key="op._id"
@@ -18,8 +26,8 @@
/>
</el-select>
</el-form-item>
<el-form-item :label="$t('Parameters')">
<el-input v-model="param" :placeholder="$t('Parameters')"></el-input>
<el-form-item :label="$t('Parameters')" prop="param" inline-message>
<el-input v-model="form.param" :placeholder="$t('Parameters')"></el-input>
</el-form-item>
</el-form>
<template slot="footer">
@@ -31,6 +39,7 @@
<script>
import request from '../../api/request'
export default {
name: 'CrawlConfirmDialog',
props: {
@@ -45,9 +54,12 @@ export default {
},
data () {
return {
nodeId: '',
param: '',
nodeList: []
form: {
runType: 'random',
nodeIds: undefined,
param: '',
nodeList: []
}
}
},
methods: {
@@ -55,12 +67,21 @@ export default {
this.$emit('close')
},
onConfirm () {
this.$store.dispatch('spider/crawlSpider', { id: this.spiderId, nodeId: this.nodeId, param: this.param })
.then(() => {
this.$message.success(this.$t('A task has been scheduled successfully'))
this.$refs['form'].validate(res => {
if (!res) return
this.$store.dispatch('spider/crawlSpider', {
spiderId: this.spiderId,
nodeIds: this.form.nodeIds,
param: this.form.param,
runType: this.form.runType
})
this.$emit('close')
this.$st.sendEv('爬虫', '运行')
.then(() => {
this.$message.success(this.$t('A task has been scheduled successfully'))
})
this.$emit('close')
this.$st.sendEv('爬虫', '运行')
})
}
},
created () {
@@ -81,5 +102,7 @@ export default {
</script>
<style scoped>
.crawl-confirm-dialog >>> .el-form .el-form-item {
margin-bottom: 20px;
}
</style>

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js'
import 'codemirror/mode/shell/shell.js'
import 'codemirror/mode/markdown/markdown.js'
import 'codemirror/mode/php/php.js'
import 'codemirror/mode/yaml/yaml.js'
export default {
name: 'FileDetail',
@@ -38,7 +39,7 @@ export default {
},
options () {
return {
mode: this.lanaguage,
mode: this.language,
theme: 'darcula',
styleActiveLine: true,
lineNumbers: true,
@@ -46,8 +47,9 @@ export default {
matchBrackets: true
}
},
lanaguage () {
language () {
const fileName = this.$store.state.file.currentPath
if (!fileName) return ''
if (fileName.match(/\.js$/)) {
return 'text/javascript'
} else if (fileName.match(/\.py$/)) {
@@ -60,6 +62,8 @@ export default {
return 'text/x-php'
} else if (fileName.match(/\.md$/)) {
return 'text/x-markdown'
} else if (fileName === 'Spiderfile') {
return 'text/x-yaml'
} else {
return 'text'
}
@@ -74,7 +78,7 @@ export default {
<style scoped>
.file-content {
border: 1px solid #eaecef;
height: 480px;
height: calc(100vh - 256px);
}
.file-content >>> .CodeMirror {

View File

@@ -120,6 +120,8 @@ export default {
this.showFile = false
this.onBack()
}
},
created () {
}
}
</script>

View File

@@ -21,11 +21,11 @@
<el-form-item :label="$t('Source Folder')">
<el-input v-model="spiderForm.src" :placeholder="$t('Source Folder')" disabled></el-input>
</el-form-item>
<el-form-item :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
<el-input v-model="spiderForm.cmd" :placeholder="$t('Execute Command')"
:disabled="isView"></el-input>
</el-form-item>
<el-form-item :label="$t('Results Collection')">
<el-form-item :label="$t('Results Collection')" prop="col" required :inline-message="true">
<el-input v-model="spiderForm.col" :placeholder="$t('Results Collection')"
:disabled="isView"></el-input>
</el-form-item>
@@ -39,11 +39,10 @@
</el-autocomplete>
</el-form-item>
<el-form-item :label="$t('Spider Type')">
<!--<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>-->
<!--<el-option value="configurable" :label="$t('Configurable')"></el-option>-->
<!--<el-option value="customized" :label="$t('Customized')"></el-option>-->
<!--</el-select>-->
<el-input v-model="spiderForm.type" placeholder="爬虫类型" clearable/>
<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>
<el-option value="configurable" :label="$t('Configurable')"></el-option>
<el-option value="customized" :label="$t('Customized')"></el-option>
</el-select>
</el-form-item>
<el-form-item :label="$t('Remark')">
<el-input v-model="spiderForm.remark"/>
@@ -103,7 +102,11 @@ export default {
'spiderForm'
]),
isShowRun () {
return !!this.spiderForm.cmd
if (this.spiderForm.type === 'customized') {
return !!this.spiderForm.cmd
} else {
return true
}
}
},
methods: {

View File

@@ -25,7 +25,6 @@ export default {
}
</script>
<style scoped>
.log-item {
display: table;

View File

@@ -15,7 +15,7 @@ import LogItem from './LogItem'
import VirtualList from 'vue-virtual-scroll-list'
import Convert from 'ansi-to-html'
import hasAnsi from 'has-ansi'
const convert = new Convert();
const convert = new Convert()
export default {
name: 'LogView',
components: {
@@ -53,7 +53,7 @@ export default {
props: {
index: logItem.index,
data: isAnsi ? convert.toHtml(logItem.data) : logItem.data,
isAnsi,
isAnsi
}
}
}

View File

@@ -1,73 +1,125 @@
<template>
<div class="fields-table-view">
<el-row class="button-group-container">
<label class="title">{{$t(this.title)}}</label>
<div class="button-group">
<el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>
</div>
</el-row>
<!-- <el-row class="button-group-container">-->
<!-- <label class="title">{{$t(this.title)}}</label>-->
<!-- <div class="button-group">-->
<!-- <el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>-->
<!-- </div>-->
<!-- </el-row>-->
<el-row>
<el-table :data="fields"
class="table edit"
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
border>
<el-table-column v-if="type === 'list' && spiderForm.crawl_type === 'list-detail'"
:label="$t('Detail Page URL')"
align="center">
:cell-style="getCellClassStyle"
>
<el-table-column class-name="action" width="80px" align="right">
<template slot-scope="scope">
<el-checkbox v-model="scope.row.is_detail"
@change="onCheck(scope.row)">
</el-checkbox>
<i class="action-item el-icon-copy-document" @click="onCopyField(scope.row)"></i>
<i class="action-item el-icon-remove-outline" @click="onRemoveField(scope.row)"></i>
<i class="action-item el-icon-circle-plus-outline" @click="onAddField(scope.row)"></i>
</template>
</el-table-column>
<el-table-column :label="$t('Field Name')" width="200px">
<el-table-column :label="$t('Field Name')" width="150px">
<template slot-scope="scope">
<el-input v-model="scope.row.name" :placeholder="$t('Field Name')"
@change="onNameChange(scope.row)"></el-input>
<el-input v-model="scope.row.name"
:placeholder="$t('Field Name')"
suffix-icon="el-icon-edit"
@change="onNameChange(scope.row)"
/>
</template>
</el-table-column>
<el-table-column :label="$t('Query Type')" width="200px">
<el-table-column :label="$t('Selector Type')" width="150px" align="center" class-name="selector-type">
<template slot-scope="scope">
<el-select v-model="scope.row.type" :placeholder="$t('Query Type')">
<el-option value="css" :label="$t('CSS Selector')"></el-option>
<el-option value="xpath" :label="$t('XPath')"></el-option>
</el-select>
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'css')">
<el-tag
:class="scope.row.css ? 'active' : 'inactive'"
type="success"
>
CSS
</el-tag>
</span>
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'xpath')">
<el-tag
:class="scope.row.xpath ? 'active' : 'inactive'"
type="primary"
>
XPath
</el-tag>
</span>
</template>
</el-table-column>
<el-table-column :label="$t('Query')" width="250px">
<el-table-column :label="$t('Selector')" width="200px">
<template slot-scope="scope">
<el-input v-model="scope.row.query" :placeholder="$t('Query')"></el-input>
</template>
</el-table-column>
<el-table-column :label="$t('Extract Type')" width="120px">
<template slot-scope="scope">
<el-select v-model="scope.row.extract_type" :placeholder="$t('Extract Type')">
<el-option value="text" :label="$t('Text')"></el-option>
<el-option value="attribute" :label="$t('Attribute')"></el-option>
</el-select>
</template>
</el-table-column>
<el-table-column :label="$t('Attribute')" width="250px">
<template slot-scope="scope">
<template v-if="scope.row.extract_type === 'attribute'">
<el-input v-model="scope.row.attribute"
:placeholder="$t('Attribute')">
<template v-if="scope.row.css">
<el-input
v-model="scope.row.css"
:placeholder="$t('CSS / XPath')"
suffix-icon="el-icon-edit"
>
</el-input>
</template>
<template v-else>
<el-input
v-model="scope.row.xpath"
:placeholder="$t('CSS / XPath')"
suffix-icon="el-icon-edit"
>
</el-input>
</template>
</template>
</el-table-column>
<el-table-column :label="$t('Action')" fixed="right" min-width="100px">
<el-table-column :label="$t('Is Attribute')" width="150px" align="center">
<template slot-scope="scope">
<div class="action-button-group">
<el-button size="mini"
style="margin-left:10px"
icon="el-icon-delete"
type="danger"
@click="deleteField(scope.$index)">
</el-button>
</div>
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, false)">
<el-tag
:class="!isShowAttr(scope.row) ? 'active' : 'inactive'"
type="success"
>
{{$t('Text')}}
</el-tag>
</span>
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, true)">
<el-tag
:class="isShowAttr(scope.row) ? 'active' : 'inactive'"
type="primary"
>
{{$t('Attribute')}}
</el-tag>
</span>
</template>
</el-table-column>
<el-table-column :label="$t('Attribute')" width="200px">
<template slot-scope="scope">
<template v-if="isShowAttr(scope.row)">
<el-input
v-model="scope.row.attr"
:placeholder="$t('Attribute')"
suffix-icon="el-icon-edit"
@change="onAttrChange(scope.row)"
/>
</template>
<template v-else>
<span style="margin-left: 15px; color: lightgrey">
N/A
</span>
</template>
</template>
</el-table-column>
<el-table-column :label="$t('Next Stage')" width="250px">
<template slot-scope="scope">
<el-select
v-model="scope.row.next_stage"
:class="!scope.row.next_stage ? 'disabled' : ''"
@change="onChangeNextStage(scope.row)"
>
<el-option :label="$t('No Next Stage')" value=""/>
<el-option v-for="n in filteredStageNames" :key="n" :label="n" :value="n"/>
</el-select>
</template>
</el-table-column>
<el-table-column :label="$t('Remark')" width="auto" min-width="120px">
<template slot-scope="scope">
<el-input v-model="scope.row.remark" :placeholder="$t('Remark')" suffix-icon="el-icon-edit"/>
</template>
</el-table-column>
</el-table>
@@ -91,6 +143,18 @@ export default {
type: String,
default: ''
},
stage: {
type: Object,
default () {
return {}
}
},
stageNames: {
type: Array,
default () {
return []
}
},
fields: {
type: Array,
default () {
@@ -101,7 +165,10 @@ export default {
computed: {
...mapState('spider', [
'spiderForm'
])
]),
filteredStageNames () {
return this.stageNames.filter(n => n !== this.stage.name)
}
},
methods: {
addField () {
@@ -128,6 +195,89 @@ export default {
}
})
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
},
onClickSelectorType (row, selectorType) {
if (selectorType === 'css') {
if (row.xpath) this.$set(row, 'xpath', '')
if (!row.css) this.$set(row, 'css', 'body')
} else {
if (row.css) this.$set(row, 'css', '')
if (!row.xpath) this.$set(row, 'xpath', '//body')
}
},
onClickIsAttribute (row, isAttribute) {
if (!isAttribute) {
// 文本
if (row.attr) this.$set(row, 'attr', '')
} else {
// 属性
if (!row.attr) this.$set(row, 'attr', 'href')
}
this.$set(row, 'isAttrChange', false)
},
onCopyField (row) {
for (let i = 0; i < this.fields.length; i++) {
if (row.name === this.fields[i].name) {
this.fields.splice(i, 0, JSON.parse(JSON.stringify(row)))
break
}
}
},
onRemoveField (row) {
for (let i = 0; i < this.fields.length; i++) {
if (row.name === this.fields[i].name) {
this.fields.splice(i, 1)
break
}
}
if (this.fields.length === 0) {
this.fields.push({
xpath: '//body',
next_stage: ''
})
}
},
onAddField (row) {
for (let i = 0; i < this.fields.length; i++) {
if (row.name === this.fields[i].name) {
this.fields.splice(i + 1, 0, {
name: `field_${Math.floor(new Date().getTime()).toString()}`,
xpath: '//body',
next_stage: ''
})
break
}
}
},
getCellClassStyle ({ row, columnIndex }) {
if (columnIndex === 1) {
// 字段名称
if (!row.name) {
return {
'border': '1px solid red'
}
}
} else if (columnIndex === 3) {
// 选择器
if (!row.css && !row.xpath) {
return {
'border': '1px solid red'
}
}
}
},
onChangeNextStage (row) {
this.fields.forEach(f => {
if (f.name !== row.name) {
this.$set(f, 'next_stage', '')
}
})
},
onAttrChange (row) {
this.$set(row, 'isAttrChange', !row.attr)
},
isShowAttr (row) {
return (row.attr || row.isAttrChange)
}
}
}
@@ -158,6 +308,50 @@ export default {
line-height: 36px;
}
.el-table.edit >>> .button-selector-item {
cursor: pointer;
margin: 0 5px;
}
.el-table.edit >>> .el-tag.inactive {
opacity: 0.5;
}
.el-table.edit >>> .action {
background: none !important;
border: none;
}
.el-table.edit >>> tr {
border: none;
}
.el-table.edit >>> tr th {
border-right: 1px solid rgb(220, 223, 230);
}
.el-table.edit >>> tr td:nth-child(2) {
border-left: 1px solid rgb(220, 223, 230);
}
.el-table.edit >>> tr td {
border-right: 1px solid rgb(220, 223, 230);
}
.el-table.edit::before {
background: none;
}
.el-table.edit >>> .action-item {
font-size: 14px;
margin-right: 5px;
cursor: pointer;
}
.el-table.edit >>> .action-item:last-child {
margin-right: 10px;
}
.button-group-container {
/*display: inline-block;*/
/*width: 100%;*/
@@ -180,4 +374,8 @@ export default {
.action-button-group >>> .el-checkbox__label {
font-size: 12px;
}
.el-table.edit >>> .el-select.disabled .el-input__inner {
color: lightgrey;
}
</style>

View File

@@ -0,0 +1,283 @@
<template>
<div class="setting-list-table-view">
<el-row>
<el-table :data="list"
class="table edit"
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
:cell-style="getCellClassStyle"
>
<el-table-column class-name="action" width="80px" align="right">
<template slot-scope="scope">
<!-- <i class="action-item el-icon-copy-document" @click="onCopyField(scope.row)"></i>-->
<i class="action-item el-icon-remove-outline" @click="onRemoveField(scope.row)"></i>
<i class="action-item el-icon-circle-plus-outline" @click="onAddField(scope.row)"></i>
</template>
</el-table-column>
<el-table-column :label="$t('Name')" width="240px">
<template slot-scope="scope">
<el-input
v-model="scope.row.name"
:placeholder="$t('Name')"
suffix-icon="el-icon-edit"
@change="onChange(scope.row)"
/>
</template>
</el-table-column>
<el-table-column :label="$t('Value')" width="auto" min-width="120px">
<template slot-scope="scope">
<el-input
v-model="scope.row.value"
:placeholder="$t('Value')"
suffix-icon="el-icon-edit"
@change="onChange(scope.row)"
/>
</template>
</el-table-column>
</el-table>
</el-row>
</div>
</template>
<script>
import {
mapState
} from 'vuex'
export default {
name: 'SettingFieldsTableView',
props: {
type: {
type: String,
default: 'list'
},
title: {
type: String,
default: ''
},
stageNames: {
type: Array,
default () {
return []
}
}
},
computed: {
...mapState('spider', [
'spiderForm'
]),
list () {
const list = []
for (let name in this.spiderForm.config.settings) {
if (this.spiderForm.config.settings.hasOwnProperty(name)) {
const value = this.spiderForm.config.settings[name]
list.push({ name, value })
}
}
return list
}
},
methods: {
addField () {
this.list.push({
type: 'css',
extract_type: 'text'
})
this.$st.sendEv('爬虫详情-配置', '添加字段')
},
deleteField (index) {
this.list.splice(index, 1)
this.$st.sendEv('爬虫详情-配置', '删除字段')
},
onChange (row) {
if (this.list.filter(d => d.name === row.name).length > 1) {
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
}
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', this.list)
this.$st.sendEv('爬虫详情-配置', '更改字段')
},
onCheck (row) {
this.list.forEach(d => {
if (row.name !== d.name) {
this.$set(d, 'is_detail', false)
}
})
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
},
onClickSelectorType (row, selectorType) {
if (selectorType === 'css') {
if (row.xpath) this.$set(row, 'xpath', '')
if (!row.css) this.$set(row, 'css', 'body')
} else {
if (row.css) this.$set(row, 'css', '')
if (!row.xpath) this.$set(row, 'xpath', '//body')
}
},
onClickIsAttribute (row, isAttribute) {
if (!isAttribute) {
// 文本
if (row.attr) this.$set(row, 'attr', '')
} else {
// 属性
if (!row.attr) this.$set(row, 'attr', 'href')
}
},
onRemoveField (row) {
const list = JSON.parse(JSON.stringify(this.list))
for (let i = 0; i < list.length; i++) {
if (row.name === list[i].name) {
list.splice(i, 1)
}
}
if (list.length === 0) {
list.push({
name: `VARIABLE_NAME_${Math.floor(new Date().getTime())}`,
value: `VARIABLE_VALUE_${Math.floor(new Date().getTime())}`
})
}
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', list)
},
onAddField (row) {
const list = JSON.parse(JSON.stringify(this.list))
for (let i = 0; i < list.length; i++) {
if (row.name === list[i].name) {
const name = 'VARIABLE_NAME_' + Math.floor(new Date().getTime())
const value = 'VARIABLE_VALUE_' + Math.floor(new Date().getTime())
list.push({ name, value })
break
}
}
this.$store.commit('spider/SET_SPIDER_FORM_CONFIG_SETTINGS', list)
},
getCellClassStyle ({ row, columnIndex }) {
if (columnIndex === 1) {
// 字段名称
if (!row.name) {
return {
'border': '1px solid red'
}
}
} else if (columnIndex === 3) {
// 选择器
if (!row.css && !row.xpath) {
return {
'border': '1px solid red'
}
}
}
},
onChangeNextStage (row) {
this.list.forEach(f => {
if (f.name !== row.name) {
this.$set(f, 'next_stage', '')
}
})
}
},
created () {
if (this.list.length === 0) {
this.$store.commit(
'spider/SET_SPIDER_FORM_CONFIG_SETTING_ITEM',
'VARIABLE_NAME_' + Math.floor(new Date().getTime()),
'VARIABLE_VALUE_' + Math.floor(new Date().getTime())
)
}
}
}
</script>
<style scoped>
.el-table.edit >>> .el-table__body td {
padding: 0;
}
.el-table.edit >>> .el-table__body td .cell {
padding: 0;
font-size: 12px;
}
.el-table.edit >>> .el-input__inner:hover {
text-decoration: underline;
}
.el-table.edit >>> .el-input__inner {
height: 36px;
border: none;
border-radius: 0;
font-size: 12px;
}
.el-table.edit >>> .el-select .el-input .el-select__caret {
line-height: 36px;
}
.el-table.edit >>> .button-selector-item {
cursor: pointer;
margin: 0 5px;
}
.el-table.edit >>> .el-tag.inactive {
opacity: 0.5;
}
.el-table.edit >>> .action {
background: none !important;
border: none;
}
.el-table.edit >>> tr {
border: none;
}
.el-table.edit >>> tr th {
border-right: 1px solid rgb(220, 223, 230);
}
.el-table.edit >>> tr td:nth-child(2) {
border-left: 1px solid rgb(220, 223, 230);
}
.el-table.edit >>> tr td {
border-right: 1px solid rgb(220, 223, 230);
}
.el-table.edit::before {
background: none;
}
.el-table.edit >>> .action-item {
font-size: 14px;
margin-right: 5px;
cursor: pointer;
}
.el-table.edit >>> .action-item:last-child {
margin-right: 10px;
}
.button-group-container {
/*display: inline-block;*/
/*width: 100%;*/
}
.button-group-container .title {
float: left;
line-height: 32px;
}
.button-group-container .button-group {
float: right;
}
.action-button-group {
display: flex;
margin-left: 10px;
}
.action-button-group >>> .el-checkbox__label {
font-size: 12px;
}
.el-table.edit >>> .el-select.disabled .el-input__inner {
color: lightgrey;
}
</style>

View File

@@ -125,6 +125,8 @@ export default {
'Customized Spider': '自定义爬虫',
'Configurable': '可配置',
'Customized': '自定义',
'configurable': '可配置',
'customized': '自定义',
'Text': '文本',
'Attribute': '属性',
'Field Name': '字段名称',
@@ -148,6 +150,26 @@ export default {
'List Page Fields': '列表页字段',
'Detail Page Fields': '详情页字段',
'Detail Page URL': '详情页URL',
'All': '全部',
'Stages': '阶段',
'Process': '流程',
'Stage Process': '流程图',
'Stage Name': '阶段名称',
'Start Stage': '开始阶段',
'Engine': '引擎',
'Selector Type': '选择器类别',
'Selector': '选择器',
'Is Attribute': '是否为属性',
'Next Stage': '下一阶段',
'No Next Stage': '没有下一阶段',
'Fields': '字段',
'Stage': '阶段',
'Is List': '是否为列表',
'List': '列表',
'Pagination': '分页',
'Settings': '设置',
'Display Name': '显示名称',
'Template': '模版',
// 爬虫列表
'Name': '名称',
@@ -171,6 +193,9 @@ export default {
'Wait Duration (sec)': '等待时长(秒)',
'Runtime Duration (sec)': '运行时长(秒)',
'Total Duration (sec)': '总时长(秒)',
'Run Type': '运行类型',
'Random': '随机',
'Selected Nodes': '指定节点',
// 任务列表
'Node': '节点',

View File

@@ -42,12 +42,6 @@ const actions = {
.then(response => {
commit('SET_FILE_CONTENT', response.data.data)
})
},
saveFileContent ({ state, rootState }, payload) {
const { path } = payload
const spiderId = rootState.spider.spiderForm._id
const content = state.fileContent
return request.post(`/spiders/${spiderId}/file`, { content, path })
}
}

View File

@@ -1,5 +1,5 @@
const state = {
lang: window.localStorage.getItem('lang') || 'en'
lang: window.localStorage.getItem('lang') || 'zh'
}
const getters = {

View File

@@ -1,4 +1,6 @@
import Vue from 'vue'
import request from '../../api/request'
import axisModelCommonMixin from 'echarts/src/coord/axisModelCommonMixin'
const state = {
// list of spiders
@@ -34,7 +36,10 @@ const state = {
filterSite: '',
// preview crawl data
previewCrawlData: []
previewCrawlData: [],
// template list
templateList: []
}
const getters = {}
@@ -72,6 +77,16 @@ const mutations = {
},
SET_PREVIEW_CRAWL_DATA (state, value) {
state.previewCrawlData = value
},
SET_SPIDER_FORM_CONFIG_SETTINGS (state, payload) {
const settings = {}
payload.forEach(row => {
settings[row.name] = row.value
})
Vue.set(state.spiderForm.config, 'settings', settings)
},
SET_TEMPLATE_LIST (state, value) {
state.templateList = value
}
}
@@ -103,10 +118,11 @@ const actions = {
})
},
crawlSpider ({ state, dispatch }, payload) {
const { id, nodeId, param } = payload
const { spiderId, runType, nodeIds, param } = payload
return request.put(`/tasks`, {
spider_id: id,
node_id: nodeId,
spider_id: spiderId,
run_type: runType,
node_ids: nodeIds,
param: param
})
},
@@ -148,6 +164,20 @@ const actions = {
},
extractFields ({ state, commit }) {
return request.post(`/spiders/${state.spiderForm._id}/extract_fields`)
},
postConfigSpiderConfig ({ state }) {
return request.post(`/config_spiders/${state.spiderForm._id}/config`, state.spiderForm.config)
},
saveConfigSpiderSpiderfile ({ state, rootState }) {
const content = rootState.file.fileContent
return request.post(`/config_spiders/${state.spiderForm._id}/spiderfile`, { content })
},
addConfigSpider ({ state }) {
return request.put(`/config_spiders`, state.spiderForm)
},
async getTemplateList ({ state, commit }) {
const res = await request.get(`/config_spiders_templates`)
commit('SET_TEMPLATE_LIST', res.data.data)
}
}

View File

@@ -274,7 +274,7 @@ export default {
// 爬虫列表
request.get('/spiders', {})
.then(response => {
this.spiderList = response.data.data.list
this.spiderList = response.data.data.list || []
})
}
}

View File

@@ -13,8 +13,8 @@
<el-tab-pane :label="$t('Overview')" name="overview">
<spider-overview/>
</el-tab-pane>
<el-tab-pane v-if="isConfigurable" :label="$t('Config')" name="配置">
<config-list/>
<el-tab-pane v-if="isConfigurable" :label="$t('Config')" name="config">
<config-list ref="config"/>
</el-tab-pane>
<el-tab-pane :label="$t('Files')" name="files">
<file-list/>
@@ -48,6 +48,13 @@ export default {
FileList,
SpiderOverview
},
watch: {
activeTabName () {
// 初始化文件
this.$store.commit('file/SET_FILE_CONTENT', '')
this.$store.commit('file/SET_CURRENT_PATH', '')
}
},
data () {
return {
activeTabName: 'overview'
@@ -77,6 +84,10 @@ export default {
setTimeout(() => {
this.$refs['spider-stats'].update()
}, 0)
} else if (this.activeTabName === 'config') {
setTimeout(() => {
this.$refs['config'].update()
}, 0)
}
this.$st.sendEv('爬虫详情', '切换标签', tab.name)
},
@@ -85,19 +96,26 @@ export default {
this.$st.sendEv('爬虫详情', '切换爬虫')
}
},
created () {
async created () {
// get the list of the spiders
// this.$store.dispatch('spider/getSpiderList')
// get spider basic info
this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
.then(() => {
// get spider file info
this.$store.dispatch('file/getFileList', this.spiderForm.src)
})
await this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
// get spider file info
await this.$store.dispatch('file/getFileList', this.spiderForm.src)
// get spider tasks
this.$store.dispatch('spider/getTaskList', this.$route.params.id)
await this.$store.dispatch('spider/getTaskList', this.$route.params.id)
// get spider list
await this.$store.dispatch('spider/getSpiderList')
// if spider is configurable spider, set to config tab by default
if (this.spiderForm.type === 'configurable') {
this.activeTabName = 'config'
}
}
}
</script>

View File

@@ -33,18 +33,50 @@
width="40%"
:visible.sync="addDialogVisible"
:before-close="onAddDialogClose">
<div class="add-spider-wrapper">
<div @click="onAddConfigurable">
<el-card shadow="hover" class="add-spider-item success">
{{$t('Configurable Spider')}}
</el-card>
</div>
<div @click="onAddCustomized">
<el-card shadow="hover" class="add-spider-item primary">
{{$t('Customized Spider')}}
</el-card>
</div>
</div>
<el-tabs :active-name="spiderType">
<el-tab-pane name="configurable" :label="$t('Configurable')">
<el-form :model="spiderForm" ref="addConfigurableForm" inline-message label-width="120px">
<el-form-item :label="$t('Spider Name')" prop="name" required>
<el-input v-model="spiderForm.name" :placeholder="$t('Spider Name')"/>
</el-form-item>
<el-form-item :label="$t('Display Name')" prop="display_name" required>
<el-input v-model="spiderForm.display_name" :placeholder="$t('Display Name')"/>
</el-form-item>
<el-form-item :label="$t('Template')" prop="template" required>
<el-select v-model="spiderForm.template" :value="spiderForm.template" :placeholder="$t('Template')">
<el-option
v-for="template in templateList"
:key="template"
:label="template"
:value="template"
/>
</el-select>
</el-form-item>
<el-form-item :label="$t('Results')" prop="col" required>
<el-input v-model="spiderForm.col" :placeholder="$t('Results')"/>
</el-form-item>
</el-form>
<div class="actions">
<el-button type="primary" @click="onAddConfigurable">{{$t('Add')}}</el-button>
</div>
</el-tab-pane>
<el-tab-pane name="customized" :label="$t('Customized')">
<el-form :model="spiderForm" ref="addCustomizedForm" inline-message>
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
<el-upload
:action="$request.baseUrl + '/spiders'"
:headers="{Authorization:token}"
:on-change="onUploadChange"
:on-success="onUploadSuccess"
:file-list="fileList">
<el-button size="small" type="primary" icon="el-icon-upload">{{$t('Upload')}}</el-button>
</el-upload>
</el-form-item>
</el-form>
<el-alert type="error" :title="$t('Please zip your spider files from the root directory')"
:closable="false"></el-alert>
</el-tab-pane>
</el-tabs>
</el-dialog>
<!--./add dialog-->
@@ -81,19 +113,7 @@
width="40%"
:visible.sync="addCustomizedDialogVisible"
:before-close="onAddCustomizedDialogClose">
<el-form :model="spiderForm" ref="addConfigurableForm" inline-message>
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
<el-upload
:action="$request.baseUrl + '/spiders'"
:headers="{Authorization:token}"
:on-change="onUploadChange"
:on-success="onUploadSuccess"
:file-list="fileList">
<el-button size="small" type="primary" icon="el-icon-upload">{{$t('Upload')}}</el-button>
</el-upload>
</el-form-item>
</el-form>
<el-alert type="error" :title="$t('Please zip your spider files from the root directory')" :closable="false"></el-alert>
</el-dialog>
<!--./customized spider dialog-->
@@ -110,17 +130,24 @@
<div class="filter">
<div class="left">
<el-form :inline="true">
<el-form-item>
<el-select clearable @change="onSpiderTypeChange" placeholder="爬虫类型" size="small" v-model="filter.type">
<el-option v-for="item in types" :value="item.type" :key="item.type"
:label="item.type === 'customized'? '自定义':item.type "/>
</el-select>
</el-form-item>
<!-- <el-form-item>-->
<!-- <el-select clearable @change="onSpiderTypeChange" placeholder="爬虫类型" size="small" v-model="filter.type">-->
<!-- <el-option v-for="item in types" :value="item.type" :key="item.type"-->
<!-- :label="item.type === 'customized'? '自定义':item.type "/>-->
<!-- </el-select>-->
<!-- </el-form-item>-->
<el-form-item>
<el-input clearable @keyup.enter.native="onSearch" size="small" placeholder="名称" v-model="filter.keyword">
<i slot="suffix" class="el-input__icon el-icon-search"></i>
</el-input>
</el-form-item>
<el-form-item>
<el-button size="small" type="success"
class="btn refresh"
@click="onRefresh">
{{$t('Search')}}
</el-button>
</el-form-item>
</el-form>
</div>
<div class="right">
@@ -133,16 +160,19 @@
@click="onAdd">
{{$t('Add Spider')}}
</el-button>
<el-button size="small" type="success"
icon="el-icon-refresh"
class="btn refresh"
@click="onRefresh">
{{$t('Refresh')}}
</el-button>
</div>
</div>
<!--./filter-->
<!--tabs-->
<el-tabs v-model="filter.type" @tab-click="onClickTab">
<el-tab-pane :label="$t('All')" name="all"></el-tab-pane>
<el-tab-pane :label="$t('Configurable')" name="configurable"></el-tab-pane>
<el-tab-pane :label="$t('Customized')" name="customized"></el-tab-pane>
</el-tabs>
<!--./tabs-->
<!--table list-->
<el-table :data="spiderList"
class="table"
@@ -157,7 +187,7 @@
align="left"
:width="col.width">
<template slot-scope="scope">
{{scope.row.type === 'customized' ? '自定义' : scope.row.type}}
{{$t(scope.row.type)}}
</template>
</el-table-column>
<el-table-column v-else-if="col.name === 'last_5_errors'"
@@ -210,16 +240,19 @@
<el-table-column :label="$t('Action')" align="left" fixed="right">
<template slot-scope="scope">
<el-tooltip :content="$t('View')" placement="top">
<el-button type="primary" icon="el-icon-search" size="mini" @click="onView(scope.row)"></el-button>
<el-button type="primary" icon="el-icon-search" size="mini"
@click="onView(scope.row, $event)"></el-button>
</el-tooltip>
<el-tooltip :content="$t('Remove')" placement="top">
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
<el-button type="danger" icon="el-icon-delete" size="mini"
@click="onRemove(scope.row, $event)"></el-button>
</el-tooltip>
<el-tooltip v-if="!isShowRun(scope.row)" :content="$t('No command line')" placement="top">
<el-button disabled type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row)"></el-button>
<el-button disabled type="success" icon="fa fa-bug" size="mini"
@click="onCrawl(scope.row, $event)"></el-button>
</el-tooltip>
<el-tooltip v-else :content="$t('Run')" placement="top">
<el-button type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row)"></el-button>
<el-button type="success" icon="fa fa-bug" size="mini" @click="onCrawl(scope.row, $event)"></el-button>
</el-tooltip>
</template>
</el-table-column>
@@ -248,7 +281,7 @@ import {
import dayjs from 'dayjs'
import CrawlConfirmDialog from '../../components/Common/CrawlConfirmDialog'
import StatusTag from '../../components/Status/StatusTag'
import request from '../../api/request'
export default {
name: 'SpiderList',
components: {
@@ -272,10 +305,9 @@ export default {
activeSpiderId: undefined,
filter: {
keyword: '',
type: ''
type: 'all'
},
types: [],
// tableData,
columns: [
{ name: 'display_name', label: 'Name', width: '160', align: 'left' },
{ name: 'type', label: 'Spider Type', width: '120' },
@@ -287,7 +319,8 @@ export default {
spiderFormRules: {
name: [{ required: true, message: 'Required Field', trigger: 'change' }]
},
fileList: []
fileList: [],
spiderType: 'configurable'
}
},
computed: {
@@ -295,7 +328,8 @@ export default {
'importForm',
'spiderList',
'spiderForm',
'spiderTotal'
'spiderTotal',
'templateList'
]),
...mapGetters('user', [
'token'
@@ -318,14 +352,26 @@ export default {
this.getList()
},
onAdd () {
// this.addDialogVisible = true
this.onAddCustomized()
this.$store.commit('spider/SET_SPIDER_FORM', {
template: this.templateList[0]
})
this.addDialogVisible = true
},
onAddConfigurable () {
this.$store.commit('spider/SET_SPIDER_FORM', {})
this.addDialogVisible = false
this.addConfigurableDialogVisible = true
this.$st.sendEv('爬虫', '添加爬虫-可配置爬虫')
this.$refs['addConfigurableForm'].validate(async res => {
if (!res) return
let res2
try {
res2 = await this.$store.dispatch('spider/addConfigSpider')
} catch (e) {
this.$message.error(this.$t('Something wrong happened'))
return
}
await this.$store.dispatch('spider/getSpiderList')
this.$router.push(`/spiders/${res2.data.data._id}`)
this.$st.sendEv('爬虫', '添加爬虫-可配置爬虫')
})
},
onAddCustomized () {
this.addDialogVisible = false
@@ -374,7 +420,8 @@ export default {
this.$store.commit('spider/SET_SPIDER_FORM', row)
this.dialogVisible = true
},
onRemove (row) {
onRemove (row, ev) {
ev.stopPropagation()
this.$confirm(this.$t('Are you sure to delete this spider?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
@@ -390,12 +437,14 @@ export default {
this.$st.sendEv('爬虫', '删除')
})
},
onCrawl (row) {
onCrawl (row, ev) {
ev.stopPropagation()
this.crawlConfirmDialogVisible = true
this.activeSpiderId = row._id
this.$st.sendEv('爬虫', '点击运行')
},
onView (row) {
onView (row, ev) {
ev.stopPropagation()
this.$router.push('/spiders/' + row._id)
this.$st.sendEv('爬虫', '查看')
},
@@ -483,10 +532,12 @@ export default {
if (!str || str.match('^0001')) return 'NA'
return dayjs(str).format('YYYY-MM-DD HH:mm:ss')
},
onRowClick (row, event, column) {
if (column.label !== this.$t('Action')) {
this.onView(row)
}
onRowClick (row, column, event) {
this.onView(row, event)
},
onClickTab (tab) {
this.filter.type = tab.name
this.getList()
},
getList () {
let params = {
@@ -496,19 +547,29 @@ export default {
type: this.filter.type
}
this.$store.dispatch('spider/getSpiderList', params)
},
getTypes () {
request.get(`/spider/types`).then(resp => {
this.types = resp.data.data
})
}
// getTypes () {
// request.get(`/spider/types`).then(resp => {
// this.types = resp.data.data
// })
// }
},
created () {
this.getTypes()
async created () {
// fetch spider types
// await this.getTypes()
// fetch spider list
this.getList()
await this.getList()
// fetch template list
await this.$store.dispatch('spider/getTemplateList')
},
mounted () {
console.log(this.spiderForm)
const vm = this
this.$nextTick(() => {
vm.$store.commit('spider/SET_SPIDER_FORM', this.spiderForm)
})
}
}
</script>
@@ -594,4 +655,8 @@ export default {
.el-table >>> tr {
cursor: pointer;
}
.actions {
text-align: right;
}
</style>

View File

@@ -125,7 +125,7 @@
<el-button type="primary" icon="el-icon-search" size="mini" @click="onView(scope.row)"></el-button>
</el-tooltip>
<el-tooltip :content="$t('Remove')" placement="top">
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row)"></el-button>
<el-button type="danger" icon="el-icon-delete" size="mini" @click="onRemove(scope.row, $event)"></el-button>
</el-tooltip>
</template>
</el-table-column>
@@ -250,7 +250,8 @@ export default {
onSelectSpider () {
this.$st.sendEv('任务', '选择爬虫')
},
onRemove (row) {
onRemove (row, ev) {
ev.stopPropagation()
this.$confirm(this.$t('Are you sure to delete this task?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),

View File

@@ -1,3 +1,4 @@
module.exports = {
publicPath: process.env.BASE_URL || '/'
// TODO: need to configure output static files with hash
}

View File

@@ -2954,9 +2954,10 @@ electron-to-chromium@^1.3.103:
version "1.3.113"
resolved "http://registry.npm.taobao.org/electron-to-chromium/download/electron-to-chromium-1.3.113.tgz#b1ccf619df7295aea17bc6951dc689632629e4a9"
element-ui@2.4.6:
version "2.4.6"
resolved "https://registry.yarnpkg.com/element-ui/-/element-ui-2.4.6.tgz#524d3d4cac0b68745dda87311ef0d8fe541b5fc4"
element-ui@2.13.0:
version "2.13.0"
resolved "https://registry.npm.taobao.org/element-ui/download/element-ui-2.13.0.tgz?cache=0&other_urls=https%3A%2F%2Fregistry.npm.taobao.org%2Felement-ui%2Fdownload%2Felement-ui-2.13.0.tgz#f6bb04e5b0a76ea5f62466044b774407ba4ebd2d"
integrity sha1-9rsE5bCnbqX2JGYES3dEB7pOvS0=
dependencies:
async-validator "~1.8.1"
babel-helper-vue-jsx-merge-props "^2.0.0"