mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
@@ -1,7 +1,10 @@
|
||||
# 0.4.5 (unkown)
|
||||
### 功能 / 优化
|
||||
- **交互式教程**. 引导用户了解 Crawlab 的主要功能.
|
||||
- **加入全局环境变量**. 可以设置全局环境变量,然后传入到所有爬虫程序中.
|
||||
- **加入全局环境变量**. 可以设置全局环境变量,然后传入到所有爬虫程序中. [#177](https://github.com/crawlab-team/crawlab/issues/177)
|
||||
- **项目**. 允许用户将爬虫关联到项目上. [#316](https://github.com/crawlab-team/crawlab/issues/316)
|
||||
- **用户管理优化**. 限制管理用户的权限. [#456](https://github.com/crawlab-team/crawlab/issues/456)
|
||||
- **设置页面优化**.
|
||||
|
||||
### Bug 修复
|
||||
- **无法找到爬虫文件错误**. [#485](https://github.com/crawlab-team/crawlab/issues/485)
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
# 0.4.5 (unkown)
|
||||
### Features / Enhancement
|
||||
- **Interactive Tutorial**. Guide users through the main functionalities of Crawlab.
|
||||
- **Global Environment Variables**. Allow users to set global environment variables, which will be passed into all spider programs.
|
||||
- **Global Environment Variables**. Allow users to set global environment variables, which will be passed into all spider programs. [#177](https://github.com/crawlab-team/crawlab/issues/177)
|
||||
- **Project**. Allow users to link spiders to projects. [#316](https://github.com/crawlab-team/crawlab/issues/316)
|
||||
- **User Admin Optimization**. Restrict privilleges of admin users. [#456](https://github.com/crawlab-team/crawlab/issues/456)
|
||||
- **Setting Page Optimization**.
|
||||
|
||||
### Bug Fixes
|
||||
- **Unable to find spider file error**. [#485](https://github.com/crawlab-team/crawlab/issues/485)
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
package entity
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
// 通用
|
||||
Name string `yaml:"name" json:"name"`
|
||||
DisplayName string `yaml:"display_name" json:"display_name"`
|
||||
Col string `yaml:"col" json:"col"`
|
||||
Remark string `yaml:"remark" json:"remark"`
|
||||
Type string `yaml:"type" bson:"type"`
|
||||
|
||||
// 可配置爬虫
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages []Stage `yaml:"stages" json:"stages"`
|
||||
Settings map[string]string `yaml:"settings" json:"settings"`
|
||||
|
||||
// 自定义爬虫
|
||||
Cmd string `yaml:"cmd" json:"cmd"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
|
||||
@@ -39,7 +39,6 @@ func main() {
|
||||
log.SetLevelFromString(logLevel)
|
||||
}
|
||||
log.Info("initialized log config successfully")
|
||||
|
||||
if viper.GetString("log.isDeletePeriodically") == "Y" {
|
||||
err := services.InitDeleteLogPeriodically()
|
||||
if err != nil {
|
||||
@@ -74,8 +73,24 @@ func main() {
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized schedule successfully")
|
||||
|
||||
// 初始化用户服务
|
||||
if err := services.InitUserService(); err != nil {
|
||||
log.Error("init user service error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized user service successfully")
|
||||
|
||||
// 初始化依赖服务
|
||||
if err := services.InitDepsFetcher(); err != nil {
|
||||
log.Error("init dependency fetcher error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized dependency fetcher successfully")
|
||||
}
|
||||
log.Info("initialized schedule successfully")
|
||||
|
||||
// 初始化任务执行器
|
||||
if err := services.InitTaskExecutor(); err != nil {
|
||||
@@ -100,22 +115,6 @@ func main() {
|
||||
}
|
||||
log.Info("initialized spider service successfully")
|
||||
|
||||
// 初始化用户服务
|
||||
if err := services.InitUserService(); err != nil {
|
||||
log.Error("init user service error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized user service successfully")
|
||||
|
||||
// 初始化依赖服务
|
||||
if err := services.InitDepsFetcher(); err != nil {
|
||||
log.Error("init dependency fetcher error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
panic(err)
|
||||
}
|
||||
log.Info("initialized dependency fetcher successfully")
|
||||
|
||||
// 初始化RPC服务
|
||||
if err := services.InitRpcService(); err != nil {
|
||||
log.Error("init rpc service error:" + err.Error())
|
||||
@@ -224,10 +223,18 @@ func main() {
|
||||
}
|
||||
// 全局变量
|
||||
{
|
||||
authGroup.POST("/variable", routes.PostVariable) // 新增
|
||||
authGroup.PUT("/variable/:id", routes.PutVariable) //修改
|
||||
authGroup.DELETE("/variable/:id", routes.DeleteVariable) //删除
|
||||
authGroup.GET("/variables", routes.GetVariableList) // 列表
|
||||
authGroup.PUT("/variable", routes.PutVariable) // 新增
|
||||
authGroup.POST("/variable/:id", routes.PostVariable) //修改
|
||||
authGroup.DELETE("/variable/:id", routes.DeleteVariable) //删除
|
||||
}
|
||||
// 项目
|
||||
{
|
||||
authGroup.GET("/projects", routes.GetProjectList) // 列表
|
||||
authGroup.GET("/projects/tags", routes.GetProjectTags) // 项目标签
|
||||
authGroup.PUT("/projects", routes.PutProject) //修改
|
||||
authGroup.POST("/projects/:id", routes.PostProject) // 新增
|
||||
authGroup.DELETE("/projects/:id", routes.DeleteProject) //删除
|
||||
}
|
||||
// 统计数据
|
||||
authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据
|
||||
|
||||
146
backend/model/project.go
Normal file
146
backend/model/project.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Project struct {
|
||||
Id bson.ObjectId `json:"_id" bson:"_id"`
|
||||
Name string `json:"name" bson:"name"`
|
||||
Description string `json:"description" bson:"description"`
|
||||
Tags []string `json:"tags" bson:"tags"`
|
||||
|
||||
CreateTs time.Time `json:"create_ts" bson:"create_ts"`
|
||||
UpdateTs time.Time `json:"update_ts" bson:"update_ts"`
|
||||
|
||||
// 前端展示
|
||||
Spiders []Spider `json:"spiders" bson:"spiders"`
|
||||
}
|
||||
|
||||
func (p *Project) Save() error {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
p.UpdateTs = time.Now()
|
||||
|
||||
if err := c.UpdateId(p.Id, p); err != nil {
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Project) Add() error {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
p.Id = bson.NewObjectId()
|
||||
p.UpdateTs = time.Now()
|
||||
p.CreateTs = time.Now()
|
||||
if err := c.Insert(p); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Project) GetSpiders() ([]Spider, error) {
|
||||
s, c := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
|
||||
var query interface{}
|
||||
if p.Id.Hex() == constants.ObjectIdNull {
|
||||
query = bson.M{
|
||||
"$or": []bson.M{
|
||||
{"project_id": p.Id},
|
||||
{"project_id": bson.M{"$exists": false}},
|
||||
},
|
||||
}
|
||||
} else {
|
||||
query = bson.M{"project_id": p.Id}
|
||||
}
|
||||
|
||||
var spiders []Spider
|
||||
if err := c.Find(query).All(&spiders); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return spiders, err
|
||||
}
|
||||
|
||||
return spiders, nil
|
||||
}
|
||||
|
||||
func GetProject(id bson.ObjectId) (Project, error) {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
var p Project
|
||||
if err := c.Find(bson.M{"_id": id}).One(&p); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
debug.PrintStack()
|
||||
return p, err
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
func GetProjectList(filter interface{}, skip int, sortKey string) ([]Project, error) {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
var projects []Project
|
||||
if err := c.Find(filter).Skip(skip).Limit(constants.Infinite).Sort(sortKey).All(&projects); err != nil {
|
||||
debug.PrintStack()
|
||||
return projects, err
|
||||
}
|
||||
return projects, nil
|
||||
}
|
||||
|
||||
func GetProjectListTotal(filter interface{}) (int, error) {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
var result int
|
||||
result, err := c.Find(filter).Count()
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func UpdateProject(id bson.ObjectId, item Project) error {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
var result Project
|
||||
if err := c.FindId(id).One(&result); err != nil {
|
||||
debug.PrintStack()
|
||||
return err
|
||||
}
|
||||
|
||||
if err := item.Save(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func RemoveProject(id bson.ObjectId) error {
|
||||
s, c := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
var result User
|
||||
if err := c.FindId(id).One(&result); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := c.RemoveId(id); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -32,6 +32,7 @@ type Spider struct {
|
||||
Envs []Env `json:"envs" bson:"envs"` // 环境变量
|
||||
Remark string `json:"remark" bson:"remark"` // 备注
|
||||
Src string `json:"src" bson:"src"` // 源码位置
|
||||
ProjectId bson.ObjectId `json:"project_id" bson:"project_id"` // 项目ID
|
||||
|
||||
// 自定义爬虫
|
||||
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
|
||||
@@ -56,6 +57,11 @@ func (spider *Spider) Save() error {
|
||||
|
||||
spider.UpdateTs = time.Now()
|
||||
|
||||
// 兼容没有项目ID的爬虫
|
||||
if spider.ProjectId.Hex() == "" {
|
||||
spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull)
|
||||
}
|
||||
|
||||
if err := c.UpdateId(spider.Id, spider); err != nil {
|
||||
debug.PrintStack()
|
||||
return err
|
||||
@@ -162,7 +168,7 @@ func GetSpiderByName(name string) Spider {
|
||||
defer s.Close()
|
||||
|
||||
var result Spider
|
||||
if err := c.Find(bson.M{"name": name}).One(&result); err != nil {
|
||||
if err := c.Find(bson.M{"name": name}).One(&result); err != nil && err != mgo.ErrNotFound {
|
||||
log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name)
|
||||
//debug.PrintStack()
|
||||
return result
|
||||
|
||||
190
backend/routes/projects.go
Normal file
190
backend/routes/projects.go
Normal file
@@ -0,0 +1,190 @@
|
||||
package routes
|
||||
|
||||
import (
|
||||
"crawlab/constants"
|
||||
"crawlab/database"
|
||||
"crawlab/model"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func GetProjectList(c *gin.Context) {
|
||||
tag := c.Query("tag")
|
||||
|
||||
// 筛选条件
|
||||
query := bson.M{}
|
||||
if tag != "" {
|
||||
query["tags"] = tag
|
||||
}
|
||||
|
||||
// 获取列表
|
||||
projects, err := model.GetProjectList(query, 0, "+_id")
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 获取总数
|
||||
total, err := model.GetProjectListTotal(query)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 获取每个项目的爬虫列表
|
||||
for i, p := range projects {
|
||||
spiders, err := p.GetSpiders()
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
projects[i].Spiders = spiders
|
||||
}
|
||||
|
||||
// 获取未被分配的爬虫数量
|
||||
if tag == "" {
|
||||
noProject := model.Project{
|
||||
Id: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
Name: "No Project",
|
||||
Description: "Not assigned to any project",
|
||||
}
|
||||
spiders, err := noProject.GetSpiders()
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
noProject.Spiders = spiders
|
||||
projects = append(projects, noProject)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, ListResponse{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: projects,
|
||||
Total: total,
|
||||
})
|
||||
}
|
||||
|
||||
func PutProject(c *gin.Context) {
|
||||
// 绑定请求数据
|
||||
var p model.Project
|
||||
if err := c.ShouldBindJSON(&p); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := p.Add(); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostProject(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||||
}
|
||||
|
||||
var item model.Project
|
||||
if err := c.ShouldBindJSON(&item); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := model.UpdateProject(bson.ObjectIdHex(id), item); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func DeleteProject(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||||
return
|
||||
}
|
||||
|
||||
// 从数据库中删除该爬虫
|
||||
if err := model.RemoveProject(bson.ObjectIdHex(id)); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 获取相关的爬虫
|
||||
var spiders []model.Spider
|
||||
s, col := database.GetCol("spiders")
|
||||
defer s.Close()
|
||||
if err := col.Find(bson.M{"project_id": bson.ObjectIdHex(id)}).All(&spiders); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 将爬虫的项目ID置空
|
||||
for _, spider := range spiders {
|
||||
spider.ProjectId = bson.ObjectIdHex(constants.ObjectIdNull)
|
||||
if err := spider.Save(); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func GetProjectTags(c *gin.Context) {
|
||||
type Result struct {
|
||||
Tag string `json:"tag" bson:"tag"`
|
||||
}
|
||||
|
||||
s, col := database.GetCol("projects")
|
||||
defer s.Close()
|
||||
|
||||
pipeline := []bson.M{
|
||||
{
|
||||
"$unwind": "$tags",
|
||||
},
|
||||
{
|
||||
"$group": bson.M{
|
||||
"_id": "$tags",
|
||||
},
|
||||
},
|
||||
{
|
||||
"$sort": bson.M{
|
||||
"_id": 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
"$addFields": bson.M{
|
||||
"tag": "$_id",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var items []Result
|
||||
if err := col.Pipe(pipeline).All(&items); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: items,
|
||||
})
|
||||
}
|
||||
@@ -30,6 +30,7 @@ func GetSpiderList(c *gin.Context) {
|
||||
pageNum, _ := c.GetQuery("page_num")
|
||||
pageSize, _ := c.GetQuery("page_size")
|
||||
keyword, _ := c.GetQuery("keyword")
|
||||
pid, _ := c.GetQuery("project_id")
|
||||
t, _ := c.GetQuery("type")
|
||||
sortKey, _ := c.GetQuery("sort_key")
|
||||
sortDirection, _ := c.GetQuery("sort_direction")
|
||||
@@ -41,6 +42,16 @@ func GetSpiderList(c *gin.Context) {
|
||||
if t != "" && t != "all" {
|
||||
filter["type"] = t
|
||||
}
|
||||
if pid == "" {
|
||||
// do nothing
|
||||
} else if pid == constants.ObjectIdNull {
|
||||
filter["$or"] = []bson.M{
|
||||
{"project_id": bson.ObjectIdHex(pid)},
|
||||
{"project_id": bson.M{"$exists": false}},
|
||||
}
|
||||
} else {
|
||||
filter["project_id"] = bson.ObjectIdHex(pid)
|
||||
}
|
||||
|
||||
// 排序
|
||||
sortStr := "-_id"
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
// 新增
|
||||
func PostVariable(c *gin.Context) {
|
||||
func PutVariable(c *gin.Context) {
|
||||
var variable model.Variable
|
||||
if err := c.ShouldBindJSON(&variable); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
@@ -22,7 +22,7 @@ func PostVariable(c *gin.Context) {
|
||||
}
|
||||
|
||||
// 修改
|
||||
func PutVariable(c *gin.Context) {
|
||||
func PostVariable(c *gin.Context) {
|
||||
var id = c.Param("id")
|
||||
var variable model.Variable
|
||||
if err := c.ShouldBindJSON(&variable); err != nil {
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"crawlab/entity"
|
||||
"crawlab/model"
|
||||
"crawlab/model/config_spider"
|
||||
"crawlab/services/spider_handler"
|
||||
"crawlab/utils"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -227,6 +228,17 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
|
||||
spider.FileId = fid
|
||||
_ = spider.Save()
|
||||
|
||||
// 获取爬虫同步实例
|
||||
spiderSync := spider_handler.SpiderSync{
|
||||
Spider: spider,
|
||||
}
|
||||
|
||||
// 获取gfFile
|
||||
gfFile2 := model.GetGridFs(spider.FileId)
|
||||
|
||||
// 生成MD5
|
||||
spiderSync.CreateMd5File(gfFile2.Md5)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,10 @@ import (
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
)
|
||||
@@ -264,5 +267,108 @@ func InitSpiderService() error {
|
||||
// 启动定时任务
|
||||
c.Start()
|
||||
|
||||
if model.IsMaster() {
|
||||
// 添加Demo爬虫
|
||||
templateSpidersDir := "../spiders"
|
||||
for _, info := range utils.ListDir(templateSpidersDir) {
|
||||
if !info.IsDir() {
|
||||
continue
|
||||
}
|
||||
spiderName := info.Name()
|
||||
|
||||
// 如果爬虫在数据库中不存在,则添加
|
||||
spider := model.GetSpiderByName(spiderName)
|
||||
if spider.Name != "" {
|
||||
// 存在同名爬虫,跳过
|
||||
continue
|
||||
}
|
||||
|
||||
// 拷贝爬虫
|
||||
templateSpiderPath := path.Join(templateSpidersDir, spiderName)
|
||||
spiderPath := path.Join(viper.GetString("spider.path"), spiderName)
|
||||
if utils.Exists(spiderPath) {
|
||||
utils.RemoveFiles(spiderPath)
|
||||
}
|
||||
if err := utils.CopyDir(templateSpiderPath, spiderPath); err != nil {
|
||||
log.Errorf("copy error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 构造配置数据
|
||||
configData := entity.ConfigSpiderData{}
|
||||
|
||||
// 读取YAML文件
|
||||
yamlFile, err := ioutil.ReadFile(path.Join(spiderPath, "Spiderfile"))
|
||||
if err != nil {
|
||||
log.Errorf("read yaml error: " + err.Error())
|
||||
//debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
if err := yaml.Unmarshal(yamlFile, &configData); err != nil {
|
||||
log.Errorf("unmarshal error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
if configData.Type == constants.Customized {
|
||||
// 添加该爬虫到数据库
|
||||
spider = model.Spider{
|
||||
Id: bson.NewObjectId(),
|
||||
Name: configData.Name,
|
||||
DisplayName: configData.DisplayName,
|
||||
Type: constants.Customized,
|
||||
Col: configData.Col,
|
||||
Src: spiderPath,
|
||||
Remark: configData.Remark,
|
||||
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
Cmd: configData.Cmd,
|
||||
}
|
||||
if err := spider.Add(); err != nil {
|
||||
log.Errorf("add spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 上传爬虫到GridFS
|
||||
if err := UploadSpiderToGridFsFromMaster(spider); err != nil {
|
||||
log.Errorf("upload spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
} else if configData.Type == constants.Configurable || configData.Type == "config" {
|
||||
// 添加该爬虫到数据库
|
||||
spider = model.Spider{
|
||||
Id: bson.NewObjectId(),
|
||||
Name: configData.Name,
|
||||
DisplayName: configData.DisplayName,
|
||||
Type: constants.Configurable,
|
||||
Col: configData.Col,
|
||||
Src: spiderPath,
|
||||
Remark: configData.Remark,
|
||||
ProjectId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
FileId: bson.ObjectIdHex(constants.ObjectIdNull),
|
||||
Config: configData,
|
||||
}
|
||||
if err := spider.Add(); err != nil {
|
||||
log.Errorf("add spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
log.Errorf("add spider error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -18,6 +18,20 @@
|
||||
<el-form-item :label="$t('Spider Name')">
|
||||
<el-input v-model="spiderForm.display_name" :placeholder="$t('Spider Name')" :disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Project')" prop="project_id" required>
|
||||
<el-select
|
||||
v-model="spiderForm.project_id"
|
||||
:placeholder="$t('Project')"
|
||||
filterable
|
||||
>
|
||||
<el-option
|
||||
v-for="p in projectList"
|
||||
:key="p._id"
|
||||
:value="p._id"
|
||||
:label="p.name"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Source Folder')">
|
||||
<el-input v-model="spiderForm.src" :placeholder="$t('Source Folder')" disabled></el-input>
|
||||
</el-form-item>
|
||||
@@ -127,6 +141,9 @@ export default {
|
||||
...mapGetters('user', [
|
||||
'token'
|
||||
]),
|
||||
...mapState('project', [
|
||||
'projectList'
|
||||
]),
|
||||
isShowRun () {
|
||||
if (this.spiderForm.type === 'customized') {
|
||||
return !!this.spiderForm.cmd
|
||||
@@ -180,6 +197,15 @@ export default {
|
||||
onUploadError () {
|
||||
this.uploadLoading = false
|
||||
}
|
||||
},
|
||||
async created () {
|
||||
// fetch project list
|
||||
await this.$store.dispatch('project/getProjectList')
|
||||
|
||||
// 兼容项目ID
|
||||
if (!this.spiderForm.project_id) {
|
||||
this.$set(this.spiderForm, 'project_id', '000000000000000000000000')
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -12,6 +12,7 @@ export default {
|
||||
'Deploys': '部署',
|
||||
'Sites': '网站',
|
||||
'Setting': '设置',
|
||||
'Project': '项目',
|
||||
|
||||
// 标签
|
||||
'Overview': '概览',
|
||||
@@ -71,6 +72,7 @@ export default {
|
||||
'Create Directory': '新建目录',
|
||||
'Create File': '新建文件',
|
||||
'Add Node': '添加节点',
|
||||
'Add Project': '添加项目',
|
||||
|
||||
// 主页
|
||||
'Total Tasks': '总任务数',
|
||||
@@ -217,6 +219,14 @@ export default {
|
||||
// 部署
|
||||
'Time': '时间',
|
||||
|
||||
// 项目
|
||||
'All Tags': '全部标签',
|
||||
'Project Name': '项目名称',
|
||||
'Project Description': '项目描述',
|
||||
'Tags': '标签',
|
||||
'Enter Tags': '输入标签',
|
||||
'No Project': '无项目',
|
||||
|
||||
// 定时任务
|
||||
'Schedule Name': '定时任务名称',
|
||||
'Schedule Description': '定时任务描述',
|
||||
@@ -245,6 +255,9 @@ export default {
|
||||
'Home Page Response Time (sec)': '首页响应时间(秒)',
|
||||
'Home Page Response Status Code': '首页响应状态码',
|
||||
|
||||
// 用户
|
||||
'Super Admin': '超级管理员',
|
||||
|
||||
// 文件
|
||||
'Choose Folder': '选择文件',
|
||||
'File': '文件',
|
||||
@@ -350,7 +363,7 @@ export default {
|
||||
'Username': '用户名',
|
||||
'Password': '密码',
|
||||
'Confirm Password': '确认密码',
|
||||
'normal': '正常用户',
|
||||
'normal': '普通用户',
|
||||
'admin': '管理用户',
|
||||
'Role': '角色',
|
||||
'Edit User': '更改用户',
|
||||
|
||||
@@ -47,6 +47,25 @@ export const constantRouterMap = [
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/projects',
|
||||
component: Layout,
|
||||
meta: {
|
||||
title: 'Project',
|
||||
icon: 'fa fa-gear'
|
||||
},
|
||||
children: [
|
||||
{
|
||||
path: '',
|
||||
name: 'Project',
|
||||
component: () => import('../views/project/ProjectList'),
|
||||
meta: {
|
||||
title: 'Project',
|
||||
icon: 'fa fa-code-fork'
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/spiders',
|
||||
component: Layout,
|
||||
|
||||
@@ -16,6 +16,7 @@ import stats from './modules/stats'
|
||||
import setting from './modules/setting'
|
||||
import version from './modules/version'
|
||||
import tour from './modules/tour'
|
||||
import project from './modules/project'
|
||||
import getters from './getters'
|
||||
|
||||
Vue.use(Vuex)
|
||||
@@ -37,6 +38,7 @@ const store = new Vuex.Store({
|
||||
setting,
|
||||
version,
|
||||
tour,
|
||||
project,
|
||||
// 统计
|
||||
stats
|
||||
},
|
||||
|
||||
60
frontend/src/store/modules/project.js
Normal file
60
frontend/src/store/modules/project.js
Normal file
@@ -0,0 +1,60 @@
|
||||
import request from '../../api/request'
|
||||
|
||||
const state = {
|
||||
projectForm: {},
|
||||
projectList: [],
|
||||
projectTags: []
|
||||
}
|
||||
|
||||
const getters = {}
|
||||
|
||||
const mutations = {
|
||||
SET_PROJECT_FORM: (state, value) => {
|
||||
state.projectForm = value
|
||||
},
|
||||
SET_PROJECT_LIST: (state, value) => {
|
||||
state.projectList = value
|
||||
},
|
||||
SET_PROJECT_TAGS: (state, value) => {
|
||||
state.projectTags = value
|
||||
}
|
||||
}
|
||||
|
||||
const actions = {
|
||||
getProjectList ({ state, commit }, payload) {
|
||||
return request.get('/projects', payload)
|
||||
.then(response => {
|
||||
if (response.data.data) {
|
||||
commit('SET_PROJECT_LIST', response.data.data.map(d => {
|
||||
if (!d.spiders) d.spiders = []
|
||||
return d
|
||||
}))
|
||||
}
|
||||
})
|
||||
},
|
||||
getProjectTags ({ state, commit }) {
|
||||
return request.get('/projects/tags')
|
||||
.then(response => {
|
||||
if (response.data.data) {
|
||||
commit('SET_PROJECT_TAGS', response.data.data.map(d => d.tag))
|
||||
}
|
||||
})
|
||||
},
|
||||
addProject ({ state }) {
|
||||
return request.put('/projects', state.projectForm)
|
||||
},
|
||||
editProject ({ state }, id) {
|
||||
return request.post(`/projects/${id}`, state.projectForm)
|
||||
},
|
||||
removeProject ({ state }, id) {
|
||||
return request.delete(`/projects/${id}`)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
namespaced: true,
|
||||
state,
|
||||
getters,
|
||||
mutations,
|
||||
actions
|
||||
}
|
||||
@@ -156,7 +156,7 @@ const user = {
|
||||
},
|
||||
// 新增全局变量
|
||||
addGlobalVariable ({ commit, state }) {
|
||||
return request.post(`/variable`, state.globalVariableForm)
|
||||
return request.put(`/variable`, state.globalVariableForm)
|
||||
.then(() => {
|
||||
state.globalVariableForm = {}
|
||||
})
|
||||
|
||||
@@ -101,3 +101,10 @@ export default {
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.menu-wrapper >>> .fa {
|
||||
width: 16px;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
|
||||
330
frontend/src/views/project/ProjectList.vue
Normal file
330
frontend/src/views/project/ProjectList.vue
Normal file
@@ -0,0 +1,330 @@
|
||||
<template>
|
||||
<div class="app-container">
|
||||
<!--add popup-->
|
||||
<el-dialog
|
||||
:visible.sync="dialogVisible"
|
||||
width="640px"
|
||||
:before-close="onDialogClose">
|
||||
<el-form label-width="180px"
|
||||
class="add-form"
|
||||
:model="projectForm"
|
||||
:inline-message="true"
|
||||
ref="projectForm"
|
||||
label-position="right">
|
||||
<el-form-item :label="$t('Project Name')" prop="name" required>
|
||||
<el-input id="name" v-model="projectForm.name" :placeholder="$t('Project Name')"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Project Description')" prop="description">
|
||||
<el-input
|
||||
id="description"
|
||||
type="textarea"
|
||||
v-model="projectForm.description"
|
||||
:placeholder="$t('Project Description')"
|
||||
/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Tags')" prop="tags">
|
||||
<el-select
|
||||
id="tags"
|
||||
v-model="projectForm.tags"
|
||||
:placeholder="$t('Enter Tags')"
|
||||
allow-create
|
||||
filterable
|
||||
multiple
|
||||
>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<!--取消、保存-->
|
||||
<span slot="footer" class="dialog-footer">
|
||||
<el-button size="small" @click="onDialogClose">{{$t('Cancel')}}</el-button>
|
||||
<el-button id="btn-submit" size="small" type="primary" @click="onAddSubmit">{{$t('Submit')}}</el-button>
|
||||
</span>
|
||||
</el-dialog>
|
||||
<!--./add popup-->
|
||||
|
||||
<div class="action-wrapper">
|
||||
<div class="left">
|
||||
<el-select
|
||||
v-model="filter.tag"
|
||||
size="small"
|
||||
:placeholder="$t('Select Tag')"
|
||||
@change="onFilterChange"
|
||||
>
|
||||
<el-option value="" :label="$t('All Tags')"/>
|
||||
<el-option
|
||||
v-for="tag in projectTags"
|
||||
:key="tag"
|
||||
:label="tag"
|
||||
:value="tag"
|
||||
/>
|
||||
</el-select>
|
||||
</div>
|
||||
<div class="right">
|
||||
<el-button
|
||||
icon="el-icon-plus"
|
||||
type="primary"
|
||||
size="small"
|
||||
@click="onAdd"
|
||||
>
|
||||
{{$t('Add Project')}}
|
||||
</el-button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="content">
|
||||
<div v-if="projectList.length === 0" class="empty-list">
|
||||
{{ $t('You have no projects created. You can create a project by clicking the "Add" button.')}}
|
||||
</div>
|
||||
<ul v-else class="list">
|
||||
<li
|
||||
class="item"
|
||||
v-for="(item, index) in projectList"
|
||||
:key="item._id"
|
||||
@click="onView(item)"
|
||||
>
|
||||
<el-card
|
||||
class="item-card"
|
||||
>
|
||||
<i v-if="!isNoProject(item)" class="btn-edit fa fa-edit" @click="onEdit(item)"></i>
|
||||
<i v-if="!isNoProject(item)" class="btn-close fa fa-trash-o" @click="onRemove(item)"></i>
|
||||
<el-row>
|
||||
<h4 v-if="index !== projectList.length - 1" class="title">{{ item.name }}</h4>
|
||||
<h4 v-else class="title">{{ $t('No Project') }}</h4>
|
||||
</el-row>
|
||||
<el-row>
|
||||
<div class="spider-count">
|
||||
{{$t('Spider Count')}}: {{ item.spiders.length }}
|
||||
</div>
|
||||
</el-row>
|
||||
<el-row class="description-wrapper">
|
||||
<div class="description">
|
||||
{{ item.description }}
|
||||
</div>
|
||||
</el-row>
|
||||
<el-row class="tags-wrapper">
|
||||
<div class="tags">
|
||||
<el-tag
|
||||
v-for="(tag, index) in item.tags"
|
||||
:key="index"
|
||||
size="mini"
|
||||
class="tag"
|
||||
>
|
||||
{{ tag }}
|
||||
</el-tag>
|
||||
</div>
|
||||
</el-row>
|
||||
</el-card>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
|
||||
export default {
|
||||
name: 'ProjectList',
|
||||
data () {
|
||||
return {
|
||||
defaultTags: [],
|
||||
dialogVisible: false,
|
||||
isClickAction: false,
|
||||
filter: {
|
||||
tag: ''
|
||||
}
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('project', [
|
||||
'projectForm',
|
||||
'projectList',
|
||||
'projectTags'
|
||||
])
|
||||
},
|
||||
methods: {
|
||||
onDialogClose () {
|
||||
this.dialogVisible = false
|
||||
},
|
||||
onFilterChange () {
|
||||
this.$store.dispatch('project/getProjectList', this.filter)
|
||||
this.$st.sendEv('项目', '筛选项目')
|
||||
},
|
||||
onAdd () {
|
||||
this.isEdit = false
|
||||
this.dialogVisible = true
|
||||
this.$store.commit('project/SET_PROJECT_FORM', { tags: [] })
|
||||
this.$st.sendEv('项目', '添加项目')
|
||||
},
|
||||
onAddSubmit () {
|
||||
this.$refs.projectForm.validate(res => {
|
||||
if (res) {
|
||||
const form = JSON.parse(JSON.stringify(this.projectForm))
|
||||
if (this.isEdit) {
|
||||
this.$request.post(`/projects/${this.projectForm._id}`, form).then(response => {
|
||||
if (response.data.error) {
|
||||
this.$message.error(response.data.error)
|
||||
return
|
||||
}
|
||||
this.dialogVisible = false
|
||||
this.$store.dispatch('project/getProjectList')
|
||||
this.$message.success(this.$t('The project has been saved'))
|
||||
})
|
||||
} else {
|
||||
this.$request.put('/projects', form).then(response => {
|
||||
if (response.data.error) {
|
||||
this.$message.error(response.data.error)
|
||||
return
|
||||
}
|
||||
this.dialogVisible = false
|
||||
this.$store.dispatch('project/getProjectList')
|
||||
this.$message.success(this.$t('The project has been added'))
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
this.$st.sendEv('项目', '提交项目')
|
||||
},
|
||||
onEdit (row) {
|
||||
this.isClickAction = true
|
||||
setTimeout(() => {
|
||||
this.isClickAction = false
|
||||
}, 100)
|
||||
|
||||
this.$store.commit('project/SET_PROJECT_FORM', row)
|
||||
this.dialogVisible = true
|
||||
this.isEdit = true
|
||||
this.$st.sendEv('项目', '修改项目')
|
||||
},
|
||||
onRemove (row) {
|
||||
this.isClickAction = true
|
||||
setTimeout(() => {
|
||||
this.isClickAction = false
|
||||
}, 100)
|
||||
|
||||
this.$confirm(this.$t('Are you sure to delete the project?'), this.$t('Notification'), {
|
||||
confirmButtonText: this.$t('Confirm'),
|
||||
cancelButtonText: this.$t('Cancel'),
|
||||
type: 'warning'
|
||||
}).then(() => {
|
||||
this.$store.dispatch('project/removeProject', row._id)
|
||||
.then(() => {
|
||||
setTimeout(() => {
|
||||
this.$store.dispatch('project/getProjectList')
|
||||
this.$message.success(this.$t('The project has been removed'))
|
||||
}, 100)
|
||||
})
|
||||
}).catch(() => {
|
||||
})
|
||||
this.$st.sendEv('项目', '删除项目')
|
||||
},
|
||||
onView (row) {
|
||||
if (this.isClickAction) return
|
||||
|
||||
this.$router.push({
|
||||
name: 'SpiderList',
|
||||
params: {
|
||||
project_id: row._id
|
||||
}
|
||||
})
|
||||
},
|
||||
isNoProject (row) {
|
||||
return row._id === '000000000000000000000000'
|
||||
}
|
||||
},
|
||||
async created () {
|
||||
await this.$store.dispatch('project/getProjectList', this.filter)
|
||||
await this.$store.dispatch('project/getProjectTags')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.action-wrapper {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
padding-bottom: 10px;
|
||||
border-bottom: 1px solid #EBEEF5;
|
||||
}
|
||||
|
||||
.list {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
list-style: none;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.list .item {
|
||||
width: 320px;
|
||||
margin: 10px;
|
||||
}
|
||||
|
||||
.list .item .item-card {
|
||||
position: relative;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.list .item .item-card .title {
|
||||
margin: 10px 0 0 0;
|
||||
}
|
||||
|
||||
.list .item .item-card .spider-count {
|
||||
font-size: 12px;
|
||||
color: grey;
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
.list .item .item-card .description-wrapper {
|
||||
padding-bottom: 5px;
|
||||
margin-bottom: 0;
|
||||
border-bottom: 1px solid #EBEEF5;
|
||||
}
|
||||
|
||||
.list .item .item-card .description {
|
||||
font-size: 12px;
|
||||
color: grey;
|
||||
}
|
||||
|
||||
.list .item .item-card .tags {
|
||||
margin-bottom: -5px;
|
||||
}
|
||||
|
||||
.list .item .item-card .tags .tag {
|
||||
margin: 0 5px 5px 0;
|
||||
}
|
||||
|
||||
.list .item .item-card .el-row {
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.list .item .item-card .el-row:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.list .item .item-card .btn-edit {
|
||||
z-index: 1;
|
||||
color: grey;
|
||||
position: absolute;
|
||||
top: 11px;
|
||||
right: 40px;
|
||||
}
|
||||
|
||||
.list .item .item-card .btn-close {
|
||||
z-index: 1;
|
||||
color: grey;
|
||||
position: absolute;
|
||||
top: 10px;
|
||||
right: 10px;
|
||||
}
|
||||
|
||||
.empty-list {
|
||||
font-size: 24px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
height: calc(100vh - 240px);
|
||||
}
|
||||
|
||||
</style>
|
||||
@@ -58,6 +58,20 @@
|
||||
<el-form-item :label="$t('Display Name')" prop="display_name" required>
|
||||
<el-input id="display-name" v-model="spiderForm.display_name" :placeholder="$t('Display Name')"/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Project')" prop="project_id" required>
|
||||
<el-select
|
||||
v-model="spiderForm.project_id"
|
||||
:placeholder="$t('Project')"
|
||||
filterable
|
||||
>
|
||||
<el-option
|
||||
v-for="p in projectList"
|
||||
:key="p._id"
|
||||
:value="p._id"
|
||||
:label="p.name"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Execute Command')" prop="cmd" required>
|
||||
<el-input id="cmd" v-model="spiderForm.cmd" :placeholder="$t('Execute Command')"/>
|
||||
</el-form-item>
|
||||
@@ -104,6 +118,20 @@
|
||||
<el-form-item :label="$t('Display Name')" prop="display_name" required>
|
||||
<el-input v-model="spiderForm.display_name" :placeholder="$t('Display Name')"/>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Project')" prop="project_id" required>
|
||||
<el-select
|
||||
v-model="spiderForm.project_id"
|
||||
:placeholder="$t('Project')"
|
||||
filterable
|
||||
>
|
||||
<el-option
|
||||
v-for="p in projectList"
|
||||
:key="p._id"
|
||||
:value="p._id"
|
||||
:label="p.name"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Template')" prop="template" required>
|
||||
<el-select id="template" v-model="spiderForm.template" :value="spiderForm.template"
|
||||
:placeholder="$t('Template')">
|
||||
@@ -147,7 +175,29 @@
|
||||
<!-- </el-select>-->
|
||||
<!-- </el-form-item>-->
|
||||
<el-form-item>
|
||||
<el-input clearable @keyup.enter.native="onSearch" size="small" placeholder="名称" v-model="filter.keyword">
|
||||
<el-select
|
||||
v-model="filter.project_id"
|
||||
size="small"
|
||||
:placeholder="$t('Project')"
|
||||
@change="getList"
|
||||
>
|
||||
<el-option value="" :label="$t('All Projects')"/>
|
||||
<el-option
|
||||
v-for="p in projectList"
|
||||
:key="p._id"
|
||||
:value="p._id"
|
||||
:label="p.name"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item>
|
||||
<el-input
|
||||
v-model="filter.keyword"
|
||||
size="small"
|
||||
:placeholder="$t('Spider Name')"
|
||||
clearable
|
||||
@keyup.enter.native="onSearch"
|
||||
>
|
||||
<i slot="suffix" class="el-input__icon el-icon-search"></i>
|
||||
</el-input>
|
||||
</el-form-item>
|
||||
@@ -335,6 +385,7 @@ export default {
|
||||
crawlConfirmDialogVisible: false,
|
||||
activeSpiderId: undefined,
|
||||
filter: {
|
||||
project_id: '',
|
||||
keyword: '',
|
||||
type: 'all'
|
||||
},
|
||||
@@ -491,6 +542,9 @@ export default {
|
||||
...mapGetters('user', [
|
||||
'token'
|
||||
]),
|
||||
...mapState('project', [
|
||||
'projectList'
|
||||
]),
|
||||
uploadForm () {
|
||||
return {
|
||||
name: this.spiderForm.name,
|
||||
@@ -517,7 +571,12 @@ export default {
|
||||
this.getList()
|
||||
},
|
||||
onAdd () {
|
||||
let projectId = '000000000000000000000000'
|
||||
if (this.filter.project_id) {
|
||||
projectId = this.filter.project_id
|
||||
}
|
||||
this.$store.commit('spider/SET_SPIDER_FORM', {
|
||||
project_id: projectId,
|
||||
template: this.templateList[0]
|
||||
})
|
||||
this.addDialogVisible = true
|
||||
@@ -737,14 +796,20 @@ export default {
|
||||
sort_key: this.sort.sortKey,
|
||||
sort_direction: this.sort.sortDirection,
|
||||
keyword: this.filter.keyword,
|
||||
type: this.filter.type
|
||||
type: this.filter.type,
|
||||
project_id: this.filter.project_id
|
||||
}
|
||||
await this.$store.dispatch('spider/getSpiderList', params)
|
||||
}
|
||||
},
|
||||
async created () {
|
||||
// fetch spider types
|
||||
// await this.getTypes()
|
||||
// fetch project list
|
||||
await this.$store.dispatch('project/getProjectList')
|
||||
|
||||
// project id
|
||||
if (this.$route.params.project_id) {
|
||||
this.filter.project_id = this.$route.params.project_id
|
||||
}
|
||||
|
||||
// fetch spider list
|
||||
await this.getList()
|
||||
|
||||
@@ -137,6 +137,7 @@ export default {
|
||||
},
|
||||
computed: {
|
||||
...mapState('task', [
|
||||
'taskForm',
|
||||
'taskResultsData',
|
||||
'taskResultsTotalCount'
|
||||
]),
|
||||
@@ -164,6 +165,9 @@ export default {
|
||||
set (value) {
|
||||
this.$store.commit('task/SET_RESULTS_PAGE_SIZE', value)
|
||||
}
|
||||
},
|
||||
isRunning () {
|
||||
return ['pending', 'running'].includes(this.taskForm.status)
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
@@ -197,6 +201,9 @@ export default {
|
||||
|
||||
this.getTaskLog()
|
||||
this.handle = setInterval(() => {
|
||||
if (!this.isRunning) return
|
||||
this.$store.dispatch('task/getTaskData', this.$route.params.id)
|
||||
this.$store.dispatch('task/getTaskResults', this.$route.params.id)
|
||||
this.getTaskLog()
|
||||
}, 5000)
|
||||
},
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
<template>
|
||||
<div class="app-container">
|
||||
<!--dialog-->
|
||||
<el-dialog :visible.sync="dialogVisible" :title="$t('Edit User')">
|
||||
<el-dialog :visible.sync="dialogVisible" width="640px" :title="$t('Edit User')">
|
||||
<el-form ref="form" :model="userForm" label-width="80px" :rules="rules" inline-message>
|
||||
<el-form-item prop="username" :label="$t('Username')" required>
|
||||
<el-input v-model="userForm.username" :placeholder="$t('Username')" :disabled="!isAdd"></el-input>
|
||||
@@ -50,7 +50,10 @@
|
||||
:label="$t('Role')"
|
||||
>
|
||||
<template slot-scope="scope">
|
||||
<el-tag v-if="scope.row.role === 'admin'" type="primary">
|
||||
<el-tag v-if="scope.row.username === 'admin'" type="success">
|
||||
{{ $t('Super Admin') }}
|
||||
</el-tag>
|
||||
<el-tag v-else-if="scope.row.role === 'admin'" type="primary">
|
||||
{{ $t(scope.row.role) }}
|
||||
</el-tag>
|
||||
<el-tag v-else type="warning">
|
||||
@@ -71,8 +74,20 @@
|
||||
fixed="right"
|
||||
>
|
||||
<template slot-scope="scope">
|
||||
<el-button icon="el-icon-edit" type="warning" size="mini" @click="onEdit(scope.row)"></el-button>
|
||||
<el-button icon="el-icon-delete" type="danger" size="mini" @click="onRemove(scope.row)"></el-button>
|
||||
<el-button
|
||||
v-if="isShowEdit(scope.row)"
|
||||
icon="el-icon-edit"
|
||||
type="warning"
|
||||
size="mini"
|
||||
@click="onEdit(scope.row)"
|
||||
/>
|
||||
<el-button
|
||||
v-if="isShowRemove(scope.row)"
|
||||
icon="el-icon-delete"
|
||||
type="danger"
|
||||
size="mini"
|
||||
@click="onRemove(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
@@ -95,7 +110,8 @@
|
||||
|
||||
<script>
|
||||
import {
|
||||
mapState
|
||||
mapState,
|
||||
mapGetters
|
||||
} from 'vuex'
|
||||
import dayjs from 'dayjs'
|
||||
|
||||
@@ -133,6 +149,9 @@ export default {
|
||||
'userForm',
|
||||
'totalCount'
|
||||
]),
|
||||
...mapGetters('user', [
|
||||
'userInfo'
|
||||
]),
|
||||
pageSize: {
|
||||
get () {
|
||||
return this.$store.state.user.pageSize
|
||||
@@ -219,6 +238,15 @@ export default {
|
||||
this.dialogVisible = true
|
||||
},
|
||||
onValidateEmail (value) {
|
||||
},
|
||||
isShowEdit (row) {
|
||||
if (row.username === 'admin') {
|
||||
return this.userInfo.username === 'admin'
|
||||
}
|
||||
return true
|
||||
},
|
||||
isShowRemove (row) {
|
||||
return row.username !== 'admin'
|
||||
}
|
||||
},
|
||||
created () {
|
||||
@@ -227,23 +255,21 @@ export default {
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
<style lang="scss" scoped>
|
||||
.filter {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 8px;
|
||||
|
||||
.filter-search {
|
||||
width: 240px;
|
||||
}
|
||||
.filter-search {
|
||||
width: 240px;
|
||||
}
|
||||
|
||||
.right {
|
||||
|
||||
.btn {
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
}
|
||||
.right {
|
||||
.btn {
|
||||
margin-left: 10px;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.el-table {
|
||||
|
||||
51
spiders/amazon_config/Spiderfile
Normal file
51
spiders/amazon_config/Spiderfile
Normal file
@@ -0,0 +1,51 @@
|
||||
name: "amazon_config"
|
||||
display_name: "亚马逊中国(可配置)"
|
||||
remark: "亚马逊中国搜索手机,列表+分页"
|
||||
type: "configurable"
|
||||
col: "results_amazon_config"
|
||||
engine: scrapy
|
||||
start_url: https://www.amazon.cn/s?k=%E6%89%8B%E6%9C%BA&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&ref=nb_sb_noss_2
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: .s-result-item
|
||||
list_xpath: ""
|
||||
page_css: .a-last > a
|
||||
page_xpath: ""
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
css: span.a-text-normal
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: .a-link-normal
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: price
|
||||
css: ""
|
||||
xpath: .//*[@class="a-price-whole"]
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: price_fraction
|
||||
css: ""
|
||||
xpath: .//*[@class="a-price-fraction"]
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: img
|
||||
css: .s-image-square-aspect > img
|
||||
xpath: ""
|
||||
attr: src
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
57
spiders/autohome_config/Spiderfile
Normal file
57
spiders/autohome_config/Spiderfile
Normal file
@@ -0,0 +1,57 @@
|
||||
name: "autohome_config"
|
||||
display_name: "汽车之家(可配置)"
|
||||
remark: "汽车之家文章,列表+详情+分页"
|
||||
type: "configurable"
|
||||
col: "results_autohome_config"
|
||||
engine: scrapy
|
||||
start_url: https://www.autohome.com.cn/all/
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: ul.article > li
|
||||
list_xpath: ""
|
||||
page_css: a.page-item-next
|
||||
page_xpath: ""
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
css: li > a > h3
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: li > a
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: abstract
|
||||
css: li > a > p
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: time
|
||||
css: li > a .fn-left
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: views
|
||||
css: li > a .fn-right > em:first-child
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: comments
|
||||
css: li > a .fn-right > em:last-child
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
39
spiders/baidu_config/Spiderfile
Normal file
39
spiders/baidu_config/Spiderfile
Normal file
@@ -0,0 +1,39 @@
|
||||
name: "baidu_config"
|
||||
display_name: "百度搜索(可配置)"
|
||||
remark: "百度搜索Crawlab,列表+分页"
|
||||
type: "configurable"
|
||||
col: "results_baidu_config"
|
||||
engine: scrapy
|
||||
start_url: http://www.baidu.com/s?wd=crawlab
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: ""
|
||||
list_xpath: //body
|
||||
page_css: ""
|
||||
page_xpath: //body
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
css: ""
|
||||
xpath: .//h3/a
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: ""
|
||||
xpath: .//h3/a
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: abstract
|
||||
css: ""
|
||||
xpath: .//*[@class="c-abstract"]
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
5
spiders/chinaz/Spiderfile
Normal file
5
spiders/chinaz/Spiderfile
Normal file
@@ -0,0 +1,5 @@
|
||||
name: "chinaz"
|
||||
display_name: "站长之家 (Scrapy)"
|
||||
col: "results_chinaz"
|
||||
type: "customized"
|
||||
cmd: "scrapy crawl chinaz_spider"
|
||||
@@ -5,24 +5,3 @@
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
import os
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
MONGO_HOST = os.environ.get('MONGO_HOST') or 'localhost'
|
||||
MONGO_PORT = int(os.environ.get('MONGO_PORT') or '27017')
|
||||
MONGO_DB = os.environ.get('MONGO_DB') or 'crawlab_test'
|
||||
|
||||
|
||||
class MongoPipeline(object):
|
||||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||||
db = mongo[MONGO_DB]
|
||||
col_name = os.environ.get('CRAWLAB_COLLECTION') or 'sites'
|
||||
col = db[col_name]
|
||||
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
item['_id'] = item['domain']
|
||||
if self.col.find_one({'_id': item['_id']}) is None:
|
||||
self.col.save(item)
|
||||
return item
|
||||
|
||||
@@ -65,7 +65,7 @@ ROBOTSTXT_OBEY = True
|
||||
# Configure item pipelines
|
||||
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'chinaz.pipelines.MongoPipeline': 300,
|
||||
'crawlab.pipelines.CrawlabMongoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
|
||||
60
spiders/csdn_config/Spiderfile
Normal file
60
spiders/csdn_config/Spiderfile
Normal file
@@ -0,0 +1,60 @@
|
||||
name: "csdn_config"
|
||||
display_name: "CSDN(可配置)"
|
||||
remark: "CSDN Crawlab 文章,列表+详情+分页"
|
||||
type: "configurable"
|
||||
col: "results_csdn_config"
|
||||
engine: scrapy
|
||||
start_url: https://so.csdn.net/so/search/s.do?q=crawlab
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: .search-list-con > .search-list
|
||||
list_xpath: ""
|
||||
page_css: a.btn-next
|
||||
page_xpath: ""
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: url
|
||||
css: ""
|
||||
xpath: .//*[@class="limit_width"]/a
|
||||
attr: href
|
||||
next_stage: detail
|
||||
remark: ""
|
||||
- name: detail
|
||||
is_list: false
|
||||
list_css: ""
|
||||
list_xpath: ""
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: ""
|
||||
fields:
|
||||
- name: content
|
||||
css: ""
|
||||
xpath: .//div[@id="content_views"]
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: views
|
||||
css: .read-count
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: title
|
||||
css: .title-article
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: author
|
||||
css: .follow-nickName
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
AUTOTHROTTLE_ENABLED: "false"
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/79.0.3945.117 Safari/537.36
|
||||
57
spiders/douban_config/Spiderfile
Normal file
57
spiders/douban_config/Spiderfile
Normal file
@@ -0,0 +1,57 @@
|
||||
name: "douban_config"
|
||||
display_name: "豆瓣读书(可配置)"
|
||||
remark: "豆瓣读书新书推荐,列表"
|
||||
type: "configurable"
|
||||
col: "results_douban_config"
|
||||
engine: scrapy
|
||||
start_url: https://book.douban.com/latest
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: ul.cover-col-4 > li
|
||||
list_xpath: ""
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: ""
|
||||
fields:
|
||||
- name: title
|
||||
css: h2 > a
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: h2 > a
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: img
|
||||
css: a.cover img
|
||||
xpath: ""
|
||||
attr: src
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: rating
|
||||
css: p.rating > .color-lightgray
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: abstract
|
||||
css: p:last-child
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: info
|
||||
css: .color-gray
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
5
spiders/jd/Spiderfile
Normal file
5
spiders/jd/Spiderfile
Normal file
@@ -0,0 +1,5 @@
|
||||
name: "jd"
|
||||
display_name: "京东 (Scrapy)"
|
||||
col: "results_jd"
|
||||
type: "customized"
|
||||
cmd: "scrapy crawl jd_spider"
|
||||
@@ -12,3 +12,4 @@ class JdItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
name = scrapy.Field()
|
||||
price = scrapy.Field()
|
||||
url = scrapy.Field()
|
||||
|
||||
@@ -4,14 +4,3 @@
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
class JdPipeline(object):
|
||||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||||
db = mongo[MONGO_DB]
|
||||
col_name = os.environ.get('CRAWLAB_COLLECTION') or 'jd_products'
|
||||
col = db[col_name]
|
||||
|
||||
def process_item(self, item, spider):
|
||||
return item
|
||||
|
||||
@@ -19,7 +19,7 @@ NEWSPIDER_MODULE = 'jd.spiders'
|
||||
#USER_AGENT = 'jd (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
@@ -65,7 +65,7 @@ ROBOTSTXT_OBEY = True
|
||||
# Configure item pipelines
|
||||
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'jd.pipelines.JdPipeline': 300,
|
||||
'crawlab.pipelines.CrawlabMongoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
|
||||
@@ -1,11 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import scrapy
|
||||
|
||||
from jd.items import JdItem
|
||||
|
||||
|
||||
class JdSpiderSpider(scrapy.Spider):
|
||||
name = 'jd_spider'
|
||||
allowed_domains = ['jd.com']
|
||||
start_urls = ['http://jd.com/']
|
||||
|
||||
def start_requests(self):
|
||||
for i in range(1, 50):
|
||||
yield scrapy.Request(url=f'https://search.jd.com/Search?keyword=手机&enc=utf-8&page={i}')
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
for el in response.css('.gl-item'):
|
||||
yield JdItem(
|
||||
url=el.css('.p-name > a::attr("href")').extract_first(),
|
||||
name=el.css('.p-name > a::attr("title")').extract_first(),
|
||||
price=float(el.css('.p-price i::text').extract_first()),
|
||||
)
|
||||
|
||||
4
spiders/realestate/Spiderfile
Normal file
4
spiders/realestate/Spiderfile
Normal file
@@ -0,0 +1,4 @@
|
||||
name: "realestate"
|
||||
display_name: "链家网 (Scrapy)"
|
||||
col: "results_realestate"
|
||||
cmd: "scrapy crawl lianjia"
|
||||
@@ -4,22 +4,3 @@
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
import os
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
MONGO_HOST = os.environ.get('MONGO_HOST') or 'localhost'
|
||||
MONGO_PORT = int(os.environ.get('MONGO_PORT') or '27017')
|
||||
MONGO_DB = os.environ.get('MONGO_DB') or 'crawlab_test'
|
||||
|
||||
|
||||
class MongoPipeline(object):
|
||||
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
|
||||
db = mongo[MONGO_DB]
|
||||
col_name = os.environ.get('CRAWLAB_COLLECTION')
|
||||
col = db[col_name]
|
||||
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
self.col.save(item)
|
||||
return item
|
||||
|
||||
@@ -64,7 +64,7 @@ ROBOTSTXT_OBEY = True
|
||||
# Configure item pipelines
|
||||
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'realestate.pipelines.MongoPipeline': 300,
|
||||
'crawlab.pipelines.CrawlabMongoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
|
||||
5
spiders/sinastock/Spiderfile
Normal file
5
spiders/sinastock/Spiderfile
Normal file
@@ -0,0 +1,5 @@
|
||||
name: "sinastock"
|
||||
display_name: "新浪股票 (Scrapy)"
|
||||
type: "customized"
|
||||
col: "results_sinastock"
|
||||
cmd: "scrapy crawl sinastock_spider"
|
||||
@@ -4,25 +4,3 @@
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
import os
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
class SinastockPipeline(object):
|
||||
mongo = MongoClient(
|
||||
host=os.environ.get('MONGO_HOST') or 'localhost',
|
||||
port=int(os.environ.get('MONGO_PORT') or 27017)
|
||||
)
|
||||
db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
|
||||
col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'stock_news')
|
||||
|
||||
# create indexes
|
||||
col.create_index('stocks')
|
||||
col.create_index('url')
|
||||
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
if self.col.find_one({'url': item['url']}) is None:
|
||||
self.col.save(item)
|
||||
return item
|
||||
|
||||
@@ -64,7 +64,7 @@ ROBOTSTXT_OBEY = True
|
||||
# Configure item pipelines
|
||||
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'sinastock.pipelines.SinastockPipeline': 300,
|
||||
'crawlab.pipelines.CrawlabMongoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
|
||||
54
spiders/v2ex_config/Spiderfile
Normal file
54
spiders/v2ex_config/Spiderfile
Normal file
@@ -0,0 +1,54 @@
|
||||
name: "v2ex_config"
|
||||
display_name: "V2ex(可配置)"
|
||||
remark: "V2ex,列表+详情"
|
||||
type: "configurable"
|
||||
col: "results_v2ex_config"
|
||||
engine: scrapy
|
||||
start_url: https://v2ex.com/
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: .cell.item
|
||||
list_xpath: ""
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
css: a.topic-link
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: a.topic-link
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: detail
|
||||
remark: ""
|
||||
- name: replies
|
||||
css: .count_livid
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: detail
|
||||
is_list: false
|
||||
list_css: ""
|
||||
list_xpath: ""
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: ""
|
||||
fields:
|
||||
- name: content
|
||||
css: ""
|
||||
xpath: .//*[@class="markdown_body"]
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
AUTOTHROTTLE_ENABLED: "true"
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/79.0.3945.117 Safari/537.36
|
||||
5
spiders/xueqiu/Spiderfile
Normal file
5
spiders/xueqiu/Spiderfile
Normal file
@@ -0,0 +1,5 @@
|
||||
name: "xueqiu"
|
||||
display_name: "雪球网 (Scrapy)"
|
||||
type: "customized"
|
||||
col: "results_xueqiu"
|
||||
cmd: "scrapy crawl xueqiu_spider"
|
||||
@@ -4,26 +4,3 @@
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
import os
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
class XueqiuPipeline(object):
|
||||
mongo = MongoClient(
|
||||
host=os.environ.get('MONGO_HOST') or 'localhost',
|
||||
port=int(os.environ.get('MONGO_PORT') or 27017)
|
||||
)
|
||||
db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
|
||||
col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'results_xueqiu')
|
||||
|
||||
# create indexes
|
||||
col.create_index('stocks')
|
||||
col.create_index('id')
|
||||
col.create_index('url')
|
||||
|
||||
def process_item(self, item, spider):
|
||||
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
|
||||
if self.col.find_one({'id': item['id']}) is None:
|
||||
self.col.save(item)
|
||||
return item
|
||||
|
||||
@@ -18,7 +18,7 @@ NEWSPIDER_MODULE = 'xueqiu.spiders'
|
||||
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
# CONCURRENT_REQUESTS = 32
|
||||
@@ -64,7 +64,7 @@ ROBOTSTXT_OBEY = True
|
||||
# Configure item pipelines
|
||||
# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'xueqiu.pipelines.XueqiuPipeline': 300,
|
||||
'crawlab.pipelines.CrawlabMongoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
|
||||
39
spiders/xueqiu_config/Spiderfile
Normal file
39
spiders/xueqiu_config/Spiderfile
Normal file
@@ -0,0 +1,39 @@
|
||||
name: "xueqiu_config"
|
||||
display_name: "雪球网(可配置)"
|
||||
remark: "雪球网新闻,列表"
|
||||
type: "configurable"
|
||||
col: "results_xueqiu_config"
|
||||
engine: scrapy
|
||||
start_url: https://xueqiu.com/
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: ""
|
||||
list_xpath: .//*[contains(@class, "AnonymousHome_home__timeline__item")]
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: ""
|
||||
fields:
|
||||
- name: title
|
||||
css: h3 > a
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: h3 > a
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: abstract
|
||||
css: p
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
45
spiders/zongheng_config/Spiderfile
Normal file
45
spiders/zongheng_config/Spiderfile
Normal file
@@ -0,0 +1,45 @@
|
||||
name: "zongheng_config"
|
||||
display_name: "纵横(可配置)"
|
||||
remark: "纵横小说网,列表"
|
||||
type: "configurable"
|
||||
col: "results_zongheng_config"
|
||||
engine: scrapy
|
||||
start_url: http://www.zongheng.com/rank/details.html?rt=1&d=1
|
||||
start_stage: list
|
||||
stages:
|
||||
- name: list
|
||||
is_list: true
|
||||
list_css: .rank_d_list
|
||||
list_xpath: ""
|
||||
page_css: ""
|
||||
page_xpath: ""
|
||||
page_attr: href
|
||||
fields:
|
||||
- name: title
|
||||
css: .rank_d_b_name > a
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: url
|
||||
css: .rank_d_b_name > a
|
||||
xpath: ""
|
||||
attr: href
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: abstract
|
||||
css: body
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
- name: votes
|
||||
css: .rank_d_b_ticket
|
||||
xpath: ""
|
||||
attr: ""
|
||||
next_stage: ""
|
||||
remark: ""
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: "false"
|
||||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||||
like Gecko) Chrome/78.0.3904.108 Safari/537.36
|
||||
Reference in New Issue
Block a user