diff --git a/CHANGELOG-zh.md b/CHANGELOG-zh.md index 4099e43a..b3420b47 100644 --- a/CHANGELOG-zh.md +++ b/CHANGELOG-zh.md @@ -1,3 +1,20 @@ +# 0.4.9 (2020-03-31) +### 功能 / 优化 +- **挑战**. 用户可以完成不同的趣味挑战.. +- **更高级的权限控制**. 更细化的权限管理,例如普通用户只能查看或管理自己的爬虫或项目,而管理用户可以查看或管理所有爬虫或项目. +- **反馈**. 允许用户发送反馈和评分给 Crawlab 开发组. +- **更好的主页指标**. 优化主页上的指标展示. +- **可配置爬虫转化为自定义爬虫**. 用户可以将自己的可配置爬虫转化为 Scrapy 自定义爬虫. +- **查看定时任务触发的任务**. 允许用户查看定时任务触发的任务. [#648](https://github.com/crawlab-team/crawlab/issues/648) +- **支持结果去重**. 允许用户配置结果去重. [#579](https://github.com/crawlab-team/crawlab/issues/579) +- **支持任务重试**. 允许任务重新触发历史任务. + +### Bug 修复 +- **CLI 无法在 Windows 上使用**. [#580](https://github.com/crawlab-team/crawlab/issues/580) +- **重新上传错误**. [#643](https://github.com/crawlab-team/crawlab/issues/643) [#640](https://github.com/crawlab-team/crawlab/issues/640) +- **上传丢失文件目录**. [#646](https://github.com/crawlab-team/crawlab/issues/646) +- **无法在爬虫定时任务标签中添加定时任务**. + # 0.4.8 (2020-03-11) ### 功能 / 优化 - **支持更多编程语言安装**. 现在用户可以安装或预装更多的编程语言,包括 Java、.Net Core、PHP. diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b4792f3..ae15b56d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,20 @@ +# 0.4.9 (2020-03-31) +### Features / Enhancement +- **Challenges**. Users can achieve different challenges based on their actions. +- **More Advanced Access Control**. More granular access control, e.g. normal users can only view/manage their own spiders/projects and admin users can view/manage all spiders/projects. +- **Feedback**. Allow users to send feedbacks and ratings to Crawlab team. +- **Better Home Page Metrics**. Optimized metrics display on home page. +- **Configurable Spiders Converted to Customized Spiders**. Allow users to convert their configurable spiders into customized spiders which are also Scrapy spiders. +- **View Tasks Triggered by Schedule**. Allow users to view tasks triggered by a schedule. [#648](https://github.com/crawlab-team/crawlab/issues/648) +- **Support Results De-Duplication**. Allow users to configure de-duplication of results. [#579](https://github.com/crawlab-team/crawlab/issues/579) +- **Support Task Restart**. Allow users to re-run historical tasks. + +### Bug Fixes +- **CLI unable to use on Windows**. [#580](https://github.com/crawlab-team/crawlab/issues/580) +- **Re-upload error**. [#643](https://github.com/crawlab-team/crawlab/issues/643) [#640](https://github.com/crawlab-team/crawlab/issues/640) +- **Upload missing folders**. [#646](https://github.com/crawlab-team/crawlab/issues/646) +- **Unable to add schedules in Spider Page**. + # 0.4.8 (2020-03-11) ### Features / Enhancement - **Support Installations of More Programming Languages**. Now users can install or pre-install more programming languages including Java, .Net Core and PHP. diff --git a/README-zh.md b/README-zh.md index 1adf3dc7..fabab977 100644 --- a/README-zh.md +++ b/README-zh.md @@ -305,6 +305,9 @@ Crawlab使用起来很方便,也很通用,可以适用于几乎任何主流 + + + ## 社区 & 赞助 diff --git a/README.md b/README.md index 25a16c8e..edd3b6d7 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,9 @@ Crawlab is easy to use, general enough to adapt spiders in any language and any + + + ## Community & Sponsorship diff --git a/backend/conf/config.yml b/backend/conf/config.yml index 6affb507..8c07a507 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -44,6 +44,7 @@ setting: enableTutorial: "N" runOnMaster: "Y" demoSpiders: "N" + checkScrapy: "Y" notification: mail: server: '' diff --git a/backend/constants/action.go b/backend/constants/action.go new file mode 100644 index 00000000..389a11bc --- /dev/null +++ b/backend/constants/action.go @@ -0,0 +1,8 @@ +package constants + +const ( + ActionTypeVisit = "visit" + ActionTypeInstallDep = "install_dep" + ActionTypeInstallLang = "install_lang" + ActionTypeViewDisclaimer = "view_disclaimer" +) diff --git a/backend/constants/auth.go b/backend/constants/auth.go new file mode 100644 index 00000000..136391a0 --- /dev/null +++ b/backend/constants/auth.go @@ -0,0 +1,7 @@ +package constants + +const ( + OwnerTypeAll = "all" + OwnerTypeMe = "me" + OwnerTypePublic = "public" +) diff --git a/backend/constants/challenge.go b/backend/constants/challenge.go new file mode 100644 index 00000000..5c056e91 --- /dev/null +++ b/backend/constants/challenge.go @@ -0,0 +1,20 @@ +package constants + +const ( + ChallengeLogin7d = "login_7d" + ChallengeLogin30d = "login_30d" + ChallengeLogin90d = "login_90d" + ChallengeLogin180d = "login_180d" + ChallengeCreateCustomizedSpider = "create_customized_spider" + ChallengeCreateConfigurableSpider = "create_configurable_spider" + ChallengeCreateSchedule = "create_schedule" + ChallengeCreateNodes = "create_nodes" + ChallengeCreateUser = "create_user" + ChallengeRunRandom = "run_random" + ChallengeScrape1k = "scrape_1k" + ChallengeScrape10k = "scrape_10k" + ChallengeScrape100k = "scrape_100k" + ChallengeInstallDep = "install_dep" + ChallengeInstallLang = "install_lang" + ChallengeViewDisclaimer = "view_disclaimer" +) diff --git a/backend/data/challenge_data.json b/backend/data/challenge_data.json new file mode 100644 index 00000000..5a51dc33 --- /dev/null +++ b/backend/data/challenge_data.json @@ -0,0 +1,142 @@ +[ + { + "name": "login_7d", + "title_cn": "连续登录 7 天", + "title_en": "Logged-in for 7 days", + "description_cn": "连续 7 天登录 Crawlab,即可完成挑战!", + "description_en": "Logged-in for consecutive 7 days to complete the challenge", + "difficulty": 1 + }, + { + "name": "login_30d", + "title_cn": "连续登录 30 天", + "title_en": "Logged-in for 30 days", + "description_cn": "连续 30 天登录 Crawlab,即可完成挑战!", + "description_en": "Logged-in for consecutive 30 days to complete the challenge", + "difficulty": 2 + }, + { + "name": "login_90d", + "title_cn": "连续登录 90 天", + "title_en": "Logged-in for 90 days", + "description_cn": "连续 90 天登录 Crawlab,即可完成挑战!", + "description_en": "Logged-in for consecutive 90 days to complete the challenge", + "difficulty": 3 + }, + { + "name": "login_180d", + "title_cn": "连续登录 180 天", + "title_en": "Logged-in for 180 days", + "description_cn": "连续 180 天登录 Crawlab,即可完成挑战!", + "description_en": "Logged-in for consecutive 180 days to complete the challenge", + "difficulty": 4 + }, + { + "name": "create_customized_spider", + "title_cn": "创建 1 个自定义爬虫", + "title_en": "Create a customized spider", + "description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '自定义爬虫',输入相应的参数,点击添加,即可完成挑战!", + "description_en": "In Spider List page, click 'Add Spider', select 'Customized Spider', enter params, click 'Add' to finish the challenge.", + "difficulty": 1, + "path": "/spiders" + }, + { + "name": "create_configurable_spider", + "title_cn": "创建 1 个可配置爬虫", + "title_en": "Create a configurable spider", + "description_cn": "在爬虫列表中,点击 '添加爬虫',选择 '可配置爬虫',输入相应的参数,点击添加,即可完成挑战!", + "description_en": "In Spider List page, click 'Add Spider', select 'Configurable Spider', enter params, click 'Add' to finish the challenge.", + "difficulty": 1, + "path": "/spiders" + }, + { + "name": "run_random", + "title_cn": "用随机模式成功运行爬虫", + "title_en": "Run a spider in random mode successfully", + "description_cn": "在您创建好的爬虫中,导航到其对应的详情页(爬虫列表中点击爬虫),选择随机模式运行一个爬虫,并能运行成功。", + "description_en": "In your created spiders, navigate to corresponding detail page (click spider in Spider List page), run a spider in random mode successfully.", + "difficulty": 1, + "path": "/spiders" + }, + { + "name": "scrape_1k", + "title_cn": "抓取 1 千条数据", + "title_en": "Scrape 1k records", + "description_cn": "运行您创建好的爬虫,抓取 1 千条及以上的结果数据,即可完成挑战!", + "description_en": "Run your created spiders, scrape 1k and more results to finish the challenge.", + "difficulty": 2, + "path": "/spiders" + }, + { + "name": "scrape_10k", + "title_cn": "抓取 1 万条数据", + "title_en": "Scrape 10k records", + "description_cn": "运行您创建好的爬虫,抓取 1 万条及以上的结果数据,即可完成挑战!", + "description_en": "Run your created spiders, scrape 10k and more results to finish the challenge.", + "difficulty": 3, + "path": "/spiders" + }, + { + "name": "scrape_100k", + "title_cn": "抓取 10 万条数据", + "title_en": "Scrape 100k records", + "description_cn": "运行您创建好的爬虫,抓取 10 万条及以上的结果数据,即可完成挑战!", + "description_en": "Run your created spiders, scrape 100k and more results to finish the challenge.", + "difficulty": 4, + "path": "/spiders" + }, + { + "name": "create_schedule", + "title_cn": "创建 1 个定时任务", + "title_en": "Create a schedule", + "description_cn": "在定时任务列表中,创建一个定时任务,正确设置好 Cron 表达式,即可完成挑战!", + "description_en": "In Schedule List page, create a schedule and configure cron expression to finish the task.", + "difficulty": 1, + "path": "/schedules" + }, + { + "name": "create_nodes", + "title_cn": "创建 1 个节点集群", + "title_en": "Create a node cluster", + "description_cn": "按照文档的部署指南,部署含有 3 个节点的集群,即可完成挑战!", + "description_en": "Deploy a 3-node cluster according to the deployment guidance in documentation to finish the task.", + "difficulty": 3, + "path": "/nodes" + }, + { + "name": "install_dep", + "title_cn": "安装 1 个依赖", + "title_en": "Install a dependency successfully", + "description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,搜索并安装所需的 1 个依赖,即可完成挑战!", + "description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', search and install a dependency.", + "difficulty": 3, + "path": "/nodes" + }, + { + "name": "install_lang", + "title_cn": "安装 1 个语言环境", + "title_en": "Install a language successfully", + "description_cn": "在 '节点列表->安装' 或 '节点详情->安装' 中,点击安装所需的 1 个语言环境,即可完成挑战!", + "description_en": "In 'Node List -> Installation' or 'Node Detail -> Installation', install a language.", + "difficulty": 3, + "path": "/nodes" + }, + { + "name": "view_disclaimer", + "title_cn": "阅读免责声明", + "title_en": "View disclaimer", + "description_cn": "在左侧菜单栏,点击 '免责声明' 查看其内容,即可完成挑战!", + "description_en": "In the left side menu, click 'Disclaimer' and view its content to finish the challenge.", + "difficulty": 1, + "path": "/disclaimer" + }, + { + "name": "create_user", + "title_cn": "创建 1 个用户", + "title_en": "Create a user", + "description_cn": "在用户管理页面中创建一个新用户,即可完成挑战!", + "description_en": "In User Admin page, create a new user to finish the challenge.", + "difficulty": 1, + "path": "/users" + } +] \ No newline at end of file diff --git a/backend/entity/rpc.go b/backend/entity/rpc.go index 3f5ddcea..48f14b26 100644 --- a/backend/entity/rpc.go +++ b/backend/entity/rpc.go @@ -1,11 +1,11 @@ package entity type RpcMessage struct { - Id string `json:"id"` - Method string `json:"method"` - NodeId string `json:"node_id"` - Params map[string]string `json:"params"` - Timeout int `json:"timeout"` - Result string `json:"result"` - Error string `json:"error"` + Id string `json:"id"` // 消息ID + Method string `json:"method"` // 消息方法 + NodeId string `json:"node_id"` // 节点ID + Params map[string]string `json:"params"` // 参数 + Timeout int `json:"timeout"` // 超时 + Result string `json:"result"` // 结果 + Error string `json:"error"` // 错误 } diff --git a/backend/main.go b/backend/main.go index d53991e2..b494a8b3 100644 --- a/backend/main.go +++ b/backend/main.go @@ -9,6 +9,7 @@ import ( "crawlab/model" "crawlab/routes" "crawlab/services" + "crawlab/services/challenge" "crawlab/services/rpc" "github.com/apex/log" "github.com/gin-gonic/gin" @@ -91,6 +92,22 @@ func main() { panic(err) } log.Info("initialized dependency fetcher successfully") + + // 初始化挑战服务 + if err := challenge.InitChallengeService(); err != nil { + log.Error("init challenge service error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("initialized challenge service successfully") + + // 初始化清理服务 + if err := services.InitCleanService(); err != nil { + log.Error("init clean service error:" + err.Error()) + debug.PrintStack() + panic(err) + } + log.Info("initialized clean service successfully") } // 初始化任务执行器 @@ -214,6 +231,7 @@ func main() { authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志 authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果 authGroup.GET("/tasks/:id/results/download", routes.DownloadTaskResultsCsv) // 下载任务结果 + authGroup.POST("/tasks/:id/restart", routes.RestartTask) // 重新开始任务 } // 定时任务 { @@ -231,6 +249,7 @@ func main() { authGroup.GET("/users/:id", routes.GetUser) // 用户详情 authGroup.POST("/users/:id", routes.PostUser) // 更改用户 authGroup.DELETE("/users/:id", routes.DeleteUser) // 删除用户 + authGroup.PUT("/users-add", routes.PutUser) // 添加用户 authGroup.GET("/me", routes.GetMe) // 获取自己账户 authGroup.POST("/me", routes.PostMe) // 修改自己账户 } @@ -254,6 +273,18 @@ func main() { authGroup.POST("/projects/:id", routes.PostProject) // 新增 authGroup.DELETE("/projects/:id", routes.DeleteProject) // 删除 } + // 挑战 + { + authGroup.GET("/challenges", routes.GetChallengeList) // 挑战列表 + authGroup.POST("/challenges-check", routes.CheckChallengeList) // 检查挑战列表 + } + // 操作 + { + //authGroup.GET("/actions", routes.GetActionList) // 操作列表 + //authGroup.GET("/actions/:id", routes.GetAction) // 操作 + authGroup.PUT("/actions", routes.PutAction) // 新增操作 + //authGroup.POST("/actions/:id", routes.PostAction) // 修改操作 + } // 统计数据 authGroup.GET("/stats/home", routes.GetHomeStats) // 首页统计数据 // 文件 @@ -262,7 +293,7 @@ func main() { authGroup.GET("/git/branches", routes.GetGitRemoteBranches) // 获取 Git 分支 authGroup.GET("/git/public-key", routes.GetGitSshPublicKey) // 获取 SSH 公钥 authGroup.GET("/git/commits", routes.GetGitCommits) // 获取 Git Commits - authGroup.POST("/git/checkout", routes.PostGitCheckout) // 获取 Git Commits + authGroup.POST("/git/checkout", routes.PostGitCheckout) // 获取 Git Commits } } diff --git a/backend/mock/spider.go b/backend/mock/spider.go index ef3e6104..e6c20a6b 100644 --- a/backend/mock/spider.go +++ b/backend/mock/spider.go @@ -1,6 +1,7 @@ package mock import ( + "crawlab/constants" "crawlab/model" "github.com/apex/log" "github.com/gin-gonic/gin" @@ -26,6 +27,7 @@ var SpiderList = []model.Spider{ LastRunTs: time.Now(), CreateTs: time.Now(), UpdateTs: time.Now(), + UserId: constants.ObjectIdNull, }, } diff --git a/backend/mock/spider_test.go b/backend/mock/spider_test.go index f4dbea63..11e2c9cd 100644 --- a/backend/mock/spider_test.go +++ b/backend/mock/spider_test.go @@ -2,6 +2,7 @@ package mock import ( "bytes" + "crawlab/constants" "crawlab/model" "encoding/json" "github.com/globalsign/mgo/bson" @@ -61,6 +62,7 @@ func TestPostSpider(t *testing.T) { LastRunTs: time.Now(), CreateTs: time.Now(), UpdateTs: time.Now(), + UserId: constants.ObjectIdNull, } var resp Response var spiderId = "5d429e6c19f7abede924fee2" diff --git a/backend/model/action.go b/backend/model/action.go new file mode 100644 index 00000000..15406181 --- /dev/null +++ b/backend/model/action.go @@ -0,0 +1,162 @@ +package model + +import ( + "crawlab/constants" + "crawlab/database" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "runtime/debug" + "time" +) + +type Action struct { + Id bson.ObjectId `json:"_id" bson:"_id"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + Type string `json:"type" bson:"type"` + + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` +} + +func (a *Action) Save() error { + s, c := database.GetCol("actions") + defer s.Close() + + a.UpdateTs = time.Now() + + if err := c.UpdateId(a.Id, a); err != nil { + debug.PrintStack() + return err + } + return nil +} + +func (a *Action) Add() error { + s, c := database.GetCol("actions") + defer s.Close() + + a.Id = bson.NewObjectId() + a.UpdateTs = time.Now() + a.CreateTs = time.Now() + if err := c.Insert(a); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + return nil +} + +func GetAction(id bson.ObjectId) (Action, error) { + s, c := database.GetCol("actions") + defer s.Close() + var user Action + if err := c.Find(bson.M{"_id": id}).One(&user); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return user, err + } + return user, nil +} + +func GetActionList(filter interface{}, skip int, limit int, sortKey string) ([]Action, error) { + s, c := database.GetCol("actions") + defer s.Close() + + var actions []Action + if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&actions); err != nil { + debug.PrintStack() + return actions, err + } + return actions, nil +} + +func GetActionListTotal(filter interface{}) (int, error) { + s, c := database.GetCol("actions") + defer s.Close() + + var result int + result, err := c.Find(filter).Count() + if err != nil { + return result, err + } + return result, nil +} + +func GetVisitDays(uid bson.ObjectId) (int, error) { + type ResData struct { + Days int `json:"days" bson:"days"` + } + s, c := database.GetCol("actions") + defer s.Close() + + pipeline := []bson.M{ + { + "$match": bson.M{ + "user_id": uid, + "type": constants.ActionTypeVisit, + }, + }, + { + "$addFields": bson.M{ + "date": bson.M{ + "$dateToString": bson.M{ + "format": "%Y%m%d", + "date": "$create_ts", + "timezone": "Asia/Shanghai", + }, + }, + }, + }, + { + "$group": bson.M{ + "_id": "$date", + }, + }, + { + "_id": nil, + "days": bson.M{"$sum": 1}, + }, + } + + var resData []ResData + if err := c.Pipe(pipeline).All(&resData); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return 0, err + } + + return resData[0].Days, nil +} + +func UpdateAction(id bson.ObjectId, item Action) error { + s, c := database.GetCol("actions") + defer s.Close() + + var result Action + if err := c.FindId(id).One(&result); err != nil { + debug.PrintStack() + return err + } + + if err := item.Save(); err != nil { + return err + } + return nil +} + +func RemoveAction(id bson.ObjectId) error { + s, c := database.GetCol("actions") + defer s.Close() + + var result Action + if err := c.FindId(id).One(&result); err != nil { + return err + } + + if err := c.RemoveId(id); err != nil { + return err + } + + return nil +} diff --git a/backend/model/challenge.go b/backend/model/challenge.go new file mode 100644 index 00000000..09f4db89 --- /dev/null +++ b/backend/model/challenge.go @@ -0,0 +1,187 @@ +package model + +import ( + "crawlab/database" + "github.com/apex/log" + "github.com/globalsign/mgo" + "github.com/globalsign/mgo/bson" + "runtime/debug" + "time" +) + +type Challenge struct { + Id bson.ObjectId `json:"_id" bson:"_id"` + Name string `json:"name" bson:"name"` + TitleCn string `json:"title_cn" bson:"title_cn"` + TitleEn string `json:"title_en" bson:"title_en"` + DescriptionCn string `json:"description_cn" bson:"description_cn"` + DescriptionEn string `json:"description_en" bson:"description_en"` + Difficulty int `json:"difficulty" bson:"difficulty"` + Path string `json:"path" bson:"path"` + + // 前端展示 + Achieved bool `json:"achieved" bson:"achieved"` + + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` +} + +func (ch *Challenge) Save() error { + s, c := database.GetCol("challenges") + defer s.Close() + + ch.UpdateTs = time.Now() + + if err := c.UpdateId(ch.Id, ch); err != nil { + debug.PrintStack() + return err + } + return nil +} + +func (ch *Challenge) Add() error { + s, c := database.GetCol("challenges") + defer s.Close() + + ch.Id = bson.NewObjectId() + ch.UpdateTs = time.Now() + ch.CreateTs = time.Now() + if err := c.Insert(ch); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + return nil +} + +func GetChallenge(id bson.ObjectId) (Challenge, error) { + s, c := database.GetCol("challenges") + defer s.Close() + + var ch Challenge + if err := c.Find(bson.M{"_id": id}).One(&ch); err != nil { + if err != mgo.ErrNotFound { + log.Errorf(err.Error()) + debug.PrintStack() + return ch, err + } + } + + return ch, nil +} + +func GetChallengeByName(name string) (Challenge, error) { + s, c := database.GetCol("challenges") + defer s.Close() + + var ch Challenge + if err := c.Find(bson.M{"name": name}).One(&ch); err != nil { + if err != mgo.ErrNotFound { + log.Errorf(err.Error()) + debug.PrintStack() + return ch, err + } + } + + return ch, nil +} + +func GetChallengeList(filter interface{}, skip int, limit int, sortKey string) ([]Challenge, error) { + s, c := database.GetCol("challenges") + defer s.Close() + + var challenges []Challenge + if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challenges); err != nil { + debug.PrintStack() + return challenges, err + } + + return challenges, nil +} + +func GetChallengeListWithAchieved(filter interface{}, skip int, limit int, sortKey string, uid bson.ObjectId) ([]Challenge, error) { + challenges, err := GetChallengeList(filter, skip, limit, sortKey) + if err != nil { + return challenges, err + } + + for i, ch := range challenges { + query := bson.M{ + "user_id": uid, + "challenge_id": ch.Id, + } + + list, err := GetChallengeAchievementList(query, 0, 1, "-_id") + if err != nil { + continue + } + + challenges[i].Achieved = len(list) > 0 + } + + return challenges, nil +} + +func GetChallengeListTotal(filter interface{}) (int, error) { + s, c := database.GetCol("challenges") + defer s.Close() + + var result int + result, err := c.Find(filter).Count() + if err != nil { + return result, err + } + return result, nil +} + +type ChallengeAchievement struct { + Id bson.ObjectId `json:"_id" bson:"_id"` + ChallengeId bson.ObjectId `json:"challenge_id" bson:"challenge_id"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` +} + +func (ca *ChallengeAchievement) Save() error { + s, c := database.GetCol("challenges_achievements") + defer s.Close() + + ca.UpdateTs = time.Now() + + if err := c.UpdateId(ca.Id, c); err != nil { + debug.PrintStack() + return err + } + return nil +} + +func (ca *ChallengeAchievement) Add() error { + s, c := database.GetCol("challenges_achievements") + defer s.Close() + + ca.Id = bson.NewObjectId() + ca.UpdateTs = time.Now() + ca.CreateTs = time.Now() + if err := c.Insert(ca); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + return nil +} + +func GetChallengeAchievementList(filter interface{}, skip int, limit int, sortKey string) ([]ChallengeAchievement, error) { + s, c := database.GetCol("challenges_achievements") + defer s.Close() + + var challengeAchievements []ChallengeAchievement + if err := c.Find(filter).Skip(skip).Limit(limit).Sort(sortKey).All(&challengeAchievements); err != nil { + debug.PrintStack() + return challengeAchievements, err + } + + return challengeAchievements, nil +} diff --git a/backend/model/node.go b/backend/model/node.go index 6e29a073..4d299f51 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -266,7 +266,7 @@ func GetNodeBaseInfo() (ip string, mac string, hostname string, key string, erro debug.PrintStack() return "", "", "", "", err } - return ip, mac, key, hostname, nil + return ip, mac, hostname, key, nil } // 根据redis的key值,重置node节点为offline diff --git a/backend/model/project.go b/backend/model/project.go index 92c72655..2889d6aa 100644 --- a/backend/model/project.go +++ b/backend/model/project.go @@ -15,11 +15,13 @@ type Project struct { Description string `json:"description" bson:"description"` Tags []string `json:"tags" bson:"tags"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` - // 前端展示 - Spiders []Spider `json:"spiders" bson:"spiders"` + Spiders []Spider `json:"spiders" bson:"spiders"` + Username string `json:"username" bson:"username"` + + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } func (p *Project) Save() error { @@ -89,15 +91,21 @@ func GetProject(id bson.ObjectId) (Project, error) { return p, nil } -func GetProjectList(filter interface{}, skip int, sortKey string) ([]Project, error) { +func GetProjectList(filter interface{}, sortKey string) ([]Project, error) { s, c := database.GetCol("projects") defer s.Close() var projects []Project - if err := c.Find(filter).Skip(skip).Limit(constants.Infinite).Sort(sortKey).All(&projects); err != nil { + if err := c.Find(filter).Sort(sortKey).All(&projects); err != nil { debug.PrintStack() return projects, err } + + for i, p := range projects { + // 获取用户名称 + user, _ := GetUser(p.UserId) + projects[i].Username = user.Username + } return projects, nil } @@ -144,3 +152,16 @@ func RemoveProject(id bson.ObjectId) error { return nil } + +func GetProjectCount(filter interface{}) (int, error) { + s, c := database.GetCol("projects") + defer s.Close() + + count, err := c.Find(filter).Count() + if err != nil { + return 0, err + } + + return count, nil +} + diff --git a/backend/model/schedule.go b/backend/model/schedule.go index a23b6973..ee4028af 100644 --- a/backend/model/schedule.go +++ b/backend/model/schedule.go @@ -29,6 +29,7 @@ type Schedule struct { // 前端展示 SpiderName string `json:"spider_name" bson:"spider_name"` + Username string `json:"user_name" bson:"user_name"` Nodes []Node `json:"nodes" bson:"nodes"` Message string `json:"message" bson:"message"` @@ -83,6 +84,10 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) { schedule.SpiderName = spider.Name } + // 获取用户名称 + user, _ := GetUser(schedule.UserId) + schedule.Username = user.Username + schs = append(schs, schedule) } return schs, nil @@ -92,11 +97,16 @@ func GetSchedule(id bson.ObjectId) (Schedule, error) { s, c := database.GetCol("schedules") defer s.Close() - var result Schedule - if err := c.FindId(id).One(&result); err != nil { - return result, err + var schedule Schedule + if err := c.FindId(id).One(&schedule); err != nil { + return schedule, err } - return result, nil + + // 获取用户名称 + user, _ := GetUser(schedule.UserId) + schedule.Username = user.Username + + return schedule, nil } func UpdateSchedule(id bson.ObjectId, item Schedule) error { @@ -147,11 +157,11 @@ func RemoveSchedule(id bson.ObjectId) error { return nil } -func GetScheduleCount() (int, error) { +func GetScheduleCount(filter interface{}) (int, error) { s, c := database.GetCol("schedules") defer s.Close() - count, err := c.Count() + count, err := c.Find(filter).Count() if err != nil { return 0, err } diff --git a/backend/model/spider.go b/backend/model/spider.go index 49f735a4..666ed7d1 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -33,6 +33,7 @@ type Spider struct { Remark string `json:"remark" bson:"remark"` // 备注 Src string `json:"src" bson:"src"` // 源码位置 ProjectId bson.ObjectId `json:"project_id" bson:"project_id"` // 项目ID + IsPublic bool `json:"is_public" bson:"is_public"` // 是否公开 // 自定义爬虫 Cmd string `json:"cmd" bson:"cmd"` // 执行命令 @@ -58,15 +59,22 @@ type Spider struct { // 长任务 IsLongTask bool `json:"is_long_task" bson:"is_long_task"` // 是否为长任务 + // 去重 + IsDedup bool `json:"is_dedup" bson:"is_dedup"` // 是否去重 + DedupField string `json:"dedup_field" bson:"dedup_field"` // 去重字段 + DedupMethod string `json:"dedup_method" bson:"dedup_method"` // 去重方式 + // 前端展示 LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 LastStatus string `json:"last_status"` // 最后执行状态 Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置 LatestTasks []Task `json:"latest_tasks"` // 最近任务列表 + Username string `json:"username"` // 用户名称 // 时间 - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } // 更新爬虫 @@ -82,6 +90,7 @@ func (spider *Spider) Save() error { } if err := c.UpdateId(spider.Id, spider); err != nil { + log.Errorf(err.Error()) debug.PrintStack() return err } @@ -181,10 +190,22 @@ func GetSpiderList(filter interface{}, skip int, limit int, sortStr string) ([]S continue } + // 获取用户 + var user User + if spider.UserId.Valid() { + user, err = GetUser(spider.UserId) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + // 赋值 spiders[i].LastRunTs = task.CreateTs spiders[i].LastStatus = task.Status spiders[i].LatestTasks = latestTasks + spiders[i].Username = user.Username } count, _ := c.Find(filter).Count() @@ -220,13 +241,21 @@ func GetSpiderByName(name string) Spider { s, c := database.GetCol("spiders") defer s.Close() - var result Spider - if err := c.Find(bson.M{"name": name}).One(&result); err != nil && err != mgo.ErrNotFound { + var spider Spider + if err := c.Find(bson.M{"name": name}).One(&spider); err != nil && err != mgo.ErrNotFound { log.Errorf("get spider error: %s, spider_name: %s", err.Error(), name) //debug.PrintStack() - return result + return spider } - return result + + // 获取用户 + var user User + if spider.UserId.Valid() { + user, _ = GetUser(spider.UserId) + } + spider.Username = user.Username + + return spider } // 获取爬虫(根据ID) @@ -252,6 +281,14 @@ func GetSpider(id bson.ObjectId) (Spider, error) { } spider.Config = config } + + // 获取用户名称 + var user User + if spider.UserId.Valid() { + user, _ = GetUser(spider.UserId) + } + spider.Username = user.Username + return spider, nil } @@ -323,11 +360,11 @@ func RemoveAllSpider() error { } // 获取爬虫总数 -func GetSpiderCount() (int, error) { +func GetSpiderCount(filter interface{}) (int, error) { s, c := database.GetCol("spiders") defer s.Close() - count, err := c.Count() + count, err := c.Find(filter).Count() if err != nil { return 0, err } diff --git a/backend/model/task.go b/backend/model/task.go index abb5ffc5..75edd631 100644 --- a/backend/model/task.go +++ b/backend/model/task.go @@ -25,14 +25,17 @@ type Task struct { RuntimeDuration float64 `json:"runtime_duration" bson:"runtime_duration"` TotalDuration float64 `json:"total_duration" bson:"total_duration"` Pid int `json:"pid" bson:"pid"` - UserId bson.ObjectId `json:"user_id" bson:"user_id"` + RunType string `json:"run_type" bson:"run_type"` + ScheduleId bson.ObjectId `json:"schedule_id" bson:"schedule_id"` // 前端数据 SpiderName string `json:"spider_name"` NodeName string `json:"node_name"` + Username string `json:"username"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } type TaskDailyItem struct { @@ -126,6 +129,10 @@ func GetTaskList(filter interface{}, skip int, limit int, sortKey string) ([]Tas if node, err := task.GetNode(); err == nil { tasks[i].NodeName = node.Name } + + // 获取用户名称 + user, _ := GetUser(task.UserId) + task.Username = user.Username } return tasks, nil } @@ -154,6 +161,11 @@ func GetTask(id string) (Task, error) { debug.PrintStack() return task, err } + + // 获取用户名称 + user, _ := GetUser(task.UserId) + task.Username = user.Username + return task, nil } diff --git a/backend/model/user.go b/backend/model/user.go index 9dadec0f..074a197a 100644 --- a/backend/model/user.go +++ b/backend/model/user.go @@ -19,8 +19,9 @@ type User struct { Email string `json:"email" bson:"email"` Setting UserSetting `json:"setting" bson:"setting"` - CreateTs time.Time `json:"create_ts" bson:"create_ts"` - UpdateTs time.Time `json:"update_ts" bson:"update_ts"` + UserId bson.ObjectId `json:"user_id" bson:"user_id"` + CreateTs time.Time `json:"create_ts" bson:"create_ts"` + UpdateTs time.Time `json:"update_ts" bson:"update_ts"` } type UserSetting struct { diff --git a/backend/routes/action.go b/backend/routes/action.go new file mode 100644 index 00000000..f0d262ab --- /dev/null +++ b/backend/routes/action.go @@ -0,0 +1,114 @@ +package routes + +import ( + "crawlab/model" + "crawlab/services" + "github.com/gin-gonic/gin" + "github.com/globalsign/mgo/bson" + "net/http" +) + +func GetAction(c *gin.Context) { + id := c.Param("id") + + user, err := model.GetAction(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: user, + }) +} + +func GetActionList(c *gin.Context) { + pageNum := c.GetInt("page_num") + pageSize := c.GetInt("page_size") + + users, err := model.GetActionList(nil, (pageNum-1)*pageSize, pageSize, "-create_ts") + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + total, err := model.GetActionListTotal(nil) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, ListResponse{ + Status: "ok", + Message: "success", + Data: users, + Total: total, + }) +} + +func PutAction(c *gin.Context) { + // 绑定请求数据 + var action model.Action + if err := c.ShouldBindJSON(&action); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + action.UserId = services.GetCurrentUserId(c) + + if err := action.Add(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func PostAction(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + } + + var item model.Action + if err := c.ShouldBindJSON(&item); err != nil { + HandleError(http.StatusBadRequest, c, err) + return + } + + if err := model.UpdateAction(bson.ObjectIdHex(id), item); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} + +func DeleteAction(c *gin.Context) { + id := c.Param("id") + + if !bson.IsObjectIdHex(id) { + HandleErrorF(http.StatusBadRequest, c, "invalid id") + return + } + + // 从数据库中删除该爬虫 + if err := model.RemoveAction(bson.ObjectIdHex(id)); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} diff --git a/backend/routes/challenge.go b/backend/routes/challenge.go new file mode 100644 index 00000000..1f03654a --- /dev/null +++ b/backend/routes/challenge.go @@ -0,0 +1,45 @@ +package routes + +import ( + "crawlab/constants" + "crawlab/model" + "crawlab/services" + "crawlab/services/challenge" + "github.com/gin-gonic/gin" + "net/http" +) + +func GetChallengeList(c *gin.Context) { + // 获取列表 + users, err := model.GetChallengeListWithAchieved(nil, 0, constants.Infinite, "create_ts", services.GetCurrentUserId(c)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 获取总数 + total, err := model.GetChallengeListTotal(nil) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + c.JSON(http.StatusOK, ListResponse{ + Status: "ok", + Message: "success", + Data: users, + Total: total, + }) +} + +func CheckChallengeList(c *gin.Context) { + uid := services.GetCurrentUserId(c) + if err := challenge.CheckChallengeAndUpdateAll(uid); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + }) +} diff --git a/backend/routes/config_spider.go b/backend/routes/config_spider.go index ac6a11e0..52b61ff5 100644 --- a/backend/routes/config_spider.go +++ b/backend/routes/config_spider.go @@ -51,6 +51,9 @@ func PutConfigSpider(c *gin.Context) { // 将FileId置空 spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull) + // UserId + spider.UserId = services.GetCurrentUserId(c) + // 创建爬虫目录 spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) if utils.Exists(spiderDir) { @@ -109,8 +112,12 @@ func UploadConfigSpider(c *gin.Context) { spider, err := model.GetSpider(bson.ObjectIdHex(id)) if err != nil { HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id)) + return } + // UserId + spider.UserId = services.GetCurrentUserId(c) + // 获取上传文件 file, header, err := c.Request.FormFile("file") if err != nil { @@ -174,6 +181,7 @@ func UploadConfigSpider(c *gin.Context) { // 根据序列化后的数据处理爬虫文件 if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil { HandleError(http.StatusInternalServerError, c, err) + return } c.JSON(http.StatusOK, Response{ @@ -205,6 +213,11 @@ func PostConfigSpiderSpiderfile(c *gin.Context) { return } + // UserId + if !spider.UserId.Valid() { + spider.UserId = bson.ObjectIdHex(constants.ObjectIdNull) + } + // 反序列化 var configData entity.ConfigSpiderData if err := yaml.Unmarshal([]byte(content), &configData); err != nil { @@ -247,6 +260,11 @@ func PostConfigSpiderConfig(c *gin.Context) { return } + // UserId + if !spider.UserId.Valid() { + spider.UserId = bson.ObjectIdHex(constants.ObjectIdNull) + } + // 反序列化配置数据 var configData entity.ConfigSpiderData if err := c.ShouldBindJSON(&configData); err != nil { diff --git a/backend/routes/projects.go b/backend/routes/project.go similarity index 95% rename from backend/routes/projects.go rename to backend/routes/project.go index 34b2d7f4..f0dd1198 100644 --- a/backend/routes/projects.go +++ b/backend/routes/project.go @@ -4,6 +4,7 @@ import ( "crawlab/constants" "crawlab/database" "crawlab/model" + "crawlab/services" "github.com/gin-gonic/gin" "github.com/globalsign/mgo/bson" "net/http" @@ -18,8 +19,11 @@ func GetProjectList(c *gin.Context) { query["tags"] = tag } + // 获取校验 + query = services.GetAuthQuery(query, c) + // 获取列表 - projects, err := model.GetProjectList(query, 0, "+_id") + projects, err := model.GetProjectList(query, "+_id") if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -74,6 +78,9 @@ func PutProject(c *gin.Context) { return } + // UserId + p.UserId = services.GetCurrentUserId(c) + if err := p.Add(); err != nil { HandleError(http.StatusInternalServerError, c, err) return diff --git a/backend/routes/schedule.go b/backend/routes/schedule.go index 3776019c..27ad7825 100644 --- a/backend/routes/schedule.go +++ b/backend/routes/schedule.go @@ -9,7 +9,12 @@ import ( ) func GetScheduleList(c *gin.Context) { - results, err := model.GetScheduleList(nil) + query := bson.M{} + + // 获取校验 + query = services.GetAuthQuery(query, c) + + results, err := model.GetScheduleList(query) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -77,7 +82,7 @@ func PutSchedule(c *gin.Context) { } // 加入用户ID - item.UserId = services.GetCurrentUser(c).Id + item.UserId = services.GetCurrentUserId(c) // 更新数据库 if err := model.AddSchedule(item); err != nil { diff --git a/backend/routes/spider.go b/backend/routes/spider.go index d195a5e7..c3dd4623 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -29,13 +29,14 @@ import ( // ======== 爬虫管理 ======== func GetSpiderList(c *gin.Context) { - pageNum, _ := c.GetQuery("page_num") - pageSize, _ := c.GetQuery("page_size") - keyword, _ := c.GetQuery("keyword") - pid, _ := c.GetQuery("project_id") - t, _ := c.GetQuery("type") - sortKey, _ := c.GetQuery("sort_key") - sortDirection, _ := c.GetQuery("sort_direction") + pageNum := c.Query("page_num") + pageSize := c.Query("page_size") + keyword := c.Query("keyword") + pid := c.Query("project_id") + t := c.Query("type") + sortKey := c.Query("sort_key") + sortDirection := c.Query("sort_direction") + ownerType := c.Query("owner_type") // 筛选-名称 filter := bson.M{ @@ -65,6 +66,21 @@ func GetSpiderList(c *gin.Context) { filter["project_id"] = bson.ObjectIdHex(pid) } + // 筛选-用户 + if ownerType == constants.OwnerTypeAll { + user := services.GetCurrentUser(c) + if user.Role == constants.RoleNormal { + filter["$or"] = []bson.M{ + {"user_id": services.GetCurrentUserId(c)}, + {"is_public": true}, + } + } + } else if ownerType == constants.OwnerTypeMe { + filter["user_id"] = services.GetCurrentUserId(c) + } else if ownerType == constants.OwnerTypePublic { + filter["is_public"] = true + } + // 排序 sortStr := "-_id" if sortKey != "" && sortDirection != "" { @@ -126,6 +142,11 @@ func PostSpider(c *gin.Context) { return } + // UserId + if !item.UserId.Valid() { + item.UserId = bson.ObjectIdHex(constants.ObjectIdNull) + } + if err := model.UpdateSpider(bson.ObjectIdHex(id), item); err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -137,6 +158,19 @@ func PostSpider(c *gin.Context) { return } + // 获取爬虫 + spider, err := model.GetSpider(bson.ObjectIdHex(id)) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 去重处理 + if err := services.UpdateSpiderDedup(spider); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", @@ -189,6 +223,9 @@ func PutSpider(c *gin.Context) { // 将FileId置空 spider.FileId = bson.ObjectIdHex(constants.ObjectIdNull) + // UserId + spider.UserId = services.GetCurrentUserId(c) + // 爬虫目录 spiderDir := filepath.Join(viper.GetString("spider.path"), spider.Name) @@ -274,6 +311,9 @@ func CopySpider(c *gin.Context) { return } + // UserId + spider.UserId = services.GetCurrentUserId(c) + // 复制爬虫 if err := services.CopySpider(spider, reqBody.Name); err != nil { HandleError(http.StatusInternalServerError, c, err) @@ -336,7 +376,12 @@ func UploadSpider(c *gin.Context) { var gfFile model.GridFs if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: %s", err.Error()) + debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) + return + } } // 上传到GridFs @@ -365,6 +410,8 @@ func UploadSpider(c *gin.Context) { Type: constants.Customized, Src: filepath.Join(srcPath, spiderName), FileId: fid, + ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), + UserId: services.GetCurrentUserId(c), } if name != "" { spider.Name = name @@ -407,12 +454,12 @@ func UploadSpider(c *gin.Context) { } } - // 发起同步 - services.PublishAllSpiders() - // 获取爬虫 spider = model.GetSpiderByName(spiderName) + // 发起同步 + services.PublishSpider(spider) + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", @@ -477,22 +524,32 @@ func UploadSpiderFromId(c *gin.Context) { // 判断文件是否已经存在 var gfFile model.GridFs - if err := gf.Find(bson.M{"filename": uploadFile.Filename}).One(&gfFile); err == nil { + if err := gf.Find(bson.M{"filename": spider.Name}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: " + err.Error()) + debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) + return + } } // 上传到GridFs - fid, err := services.UploadToGridFs(uploadFile.Filename, tmpFilePath) + fid, err := services.UploadToGridFs(spider.Name, tmpFilePath) if err != nil { log.Errorf("upload to grid fs error: %s", err.Error()) debug.PrintStack() + HandleError(http.StatusInternalServerError, c, err) return } // 更新file_id spider.FileId = fid - _ = spider.Save() + if err := spider.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } // 发起同步 services.PublishSpider(spider) @@ -614,10 +671,12 @@ func RunSelectedSpider(c *gin.Context) { } for _, node := range nodes { t := model.Task{ - SpiderId: taskParam.SpiderId, - NodeId: node.Id, - Param: taskParam.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: taskParam.SpiderId, + NodeId: node.Id, + Param: taskParam.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeAllNodes, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) @@ -631,9 +690,11 @@ func RunSelectedSpider(c *gin.Context) { } else if reqBody.RunType == constants.RunTypeRandom { // 随机 t := model.Task{ - SpiderId: taskParam.SpiderId, - Param: taskParam.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: taskParam.SpiderId, + Param: taskParam.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeRandom, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) if err != nil { @@ -645,10 +706,12 @@ func RunSelectedSpider(c *gin.Context) { // 指定节点 for _, nodeId := range reqBody.NodeIds { t := model.Task{ - SpiderId: taskParam.SpiderId, - NodeId: nodeId, - Param: taskParam.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: taskParam.SpiderId, + NodeId: nodeId, + Param: taskParam.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeSelectedNodes, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) @@ -796,7 +859,7 @@ func GetSpiderStats(c *gin.Context) { overview.AvgWaitDuration = overview.TotalWaitDuration / taskCount overview.AvgRuntimeDuration = overview.TotalRuntimeDuration / taskCount - items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id}) + items, err := model.GetDailyTaskStats(bson.M{"spider_id": spider.Id, "user_id": bson.M{"user_id": services.GetCurrentUserId(c)}}) if err != nil { log.Errorf(err.Error()) HandleError(http.StatusInternalServerError, c, err) diff --git a/backend/routes/stats.go b/backend/routes/stats.go index 8590bbd7..46c1afc8 100644 --- a/backend/routes/stats.go +++ b/backend/routes/stats.go @@ -3,6 +3,7 @@ package routes import ( "crawlab/constants" "crawlab/model" + "crawlab/services" "github.com/gin-gonic/gin" "github.com/globalsign/mgo/bson" "net/http" @@ -14,6 +15,7 @@ func GetHomeStats(c *gin.Context) { SpiderCount int `json:"spider_count"` ActiveNodeCount int `json:"active_node_count"` ScheduleCount int `json:"schedule_count"` + ProjectCount int `json:"project_count"` } type Data struct { @@ -22,7 +24,7 @@ func GetHomeStats(c *gin.Context) { } // 任务总数 - taskCount, err := model.GetTaskCount(nil) + taskCount, err := model.GetTaskCount(bson.M{"user_id": services.GetCurrentUserId(c)}) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -36,21 +38,28 @@ func GetHomeStats(c *gin.Context) { } // 爬虫总数 - spiderCount, err := model.GetSpiderCount() + spiderCount, err := model.GetSpiderCount(bson.M{"user_id": services.GetCurrentUserId(c)}) if err != nil { HandleError(http.StatusInternalServerError, c, err) return } // 定时任务数 - scheduleCount, err := model.GetScheduleCount() + scheduleCount, err := model.GetScheduleCount(bson.M{"user_id": services.GetCurrentUserId(c)}) + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + + // 项目数 + projectCount, err := model.GetProjectCount(bson.M{"user_id": services.GetCurrentUserId(c)}) if err != nil { HandleError(http.StatusInternalServerError, c, err) return } // 每日任务数 - items, err := model.GetDailyTaskStats(bson.M{}) + items, err := model.GetDailyTaskStats(bson.M{"user_id": services.GetCurrentUserId(c)}) if err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -65,6 +74,7 @@ func GetHomeStats(c *gin.Context) { TaskCount: taskCount, SpiderCount: spiderCount, ScheduleCount: scheduleCount, + ProjectCount: projectCount, }, Daily: items, }, diff --git a/backend/routes/task.go b/backend/routes/task.go index 2880abb9..ff674766 100644 --- a/backend/routes/task.go +++ b/backend/routes/task.go @@ -13,11 +13,12 @@ import ( ) type TaskListRequestData struct { - PageNum int `form:"page_num"` - PageSize int `form:"page_size"` - NodeId string `form:"node_id"` - SpiderId string `form:"spider_id"` - Status string `form:"status"` + PageNum int `form:"page_num"` + PageSize int `form:"page_size"` + NodeId string `form:"node_id"` + SpiderId string `form:"spider_id"` + ScheduleId string `form:"schedule_id"` + Status string `form:"status"` } type TaskResultsRequestData struct { @@ -47,10 +48,16 @@ func GetTaskList(c *gin.Context) { if data.SpiderId != "" { query["spider_id"] = bson.ObjectIdHex(data.SpiderId) } - //新增根据任务状态获取task列表 + // 根据任务状态获取task列表 if data.Status != "" { query["status"] = data.Status } + if data.ScheduleId != "" { + query["schedule_id"] = bson.ObjectIdHex(data.ScheduleId) + } + + // 获取校验 + query = services.GetAuthQuery(query, c) // 获取任务列表 tasks, err := model.GetTaskList(query, (data.PageNum-1)*data.PageSize, data.PageSize, "-create_ts") @@ -112,10 +119,12 @@ func PutTask(c *gin.Context) { } for _, node := range nodes { t := model.Task{ - SpiderId: reqBody.SpiderId, - NodeId: node.Id, - Param: reqBody.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: reqBody.SpiderId, + NodeId: node.Id, + Param: reqBody.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeAllNodes, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) @@ -129,9 +138,11 @@ func PutTask(c *gin.Context) { } else if reqBody.RunType == constants.RunTypeRandom { // 随机 t := model.Task{ - SpiderId: reqBody.SpiderId, - Param: reqBody.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: reqBody.SpiderId, + Param: reqBody.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeRandom, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) if err != nil { @@ -143,10 +154,12 @@ func PutTask(c *gin.Context) { // 指定节点 for _, nodeId := range reqBody.NodeIds { t := model.Task{ - SpiderId: reqBody.SpiderId, - NodeId: nodeId, - Param: reqBody.Param, - UserId: services.GetCurrentUser(c).Id, + SpiderId: reqBody.SpiderId, + NodeId: nodeId, + Param: reqBody.Param, + UserId: services.GetCurrentUserId(c), + RunType: constants.RunTypeSelectedNodes, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), } id, err := services.AddTask(t) @@ -340,3 +353,15 @@ func CancelTask(c *gin.Context) { } HandleSuccess(c) } + +func RestartTask(c *gin.Context) { + id := c.Param("id") + + uid := services.GetCurrentUserId(c) + + if err := services.RestartTask(id, uid); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + HandleSuccess(c) +} \ No newline at end of file diff --git a/backend/routes/user.go b/backend/routes/user.go index fcca967e..86d46a61 100644 --- a/backend/routes/user.go +++ b/backend/routes/user.go @@ -95,8 +95,11 @@ func PutUser(c *gin.Context) { reqData.Role = constants.RoleNormal } + // UserId + uid := services.GetCurrentUserId(c) + // 添加用户 - if err := services.CreateNewUser(reqData.Username, reqData.Password, reqData.Role, reqData.Email); err != nil { + if err := services.CreateNewUser(reqData.Username, reqData.Password, reqData.Role, reqData.Email, uid); err != nil { HandleError(http.StatusInternalServerError, c, err) return } @@ -120,6 +123,10 @@ func PostUser(c *gin.Context) { return } + if item.UserId.Hex() == "" { + item.UserId = bson.ObjectIdHex(constants.ObjectIdNull) + } + if err := model.UpdateUser(bson.ObjectIdHex(id), item); err != nil { HandleError(http.StatusInternalServerError, c, err) return @@ -230,6 +237,11 @@ func PostMe(c *gin.Context) { user.Setting.WechatRobotWebhook = reqBody.Setting.WechatRobotWebhook } user.Setting.EnabledNotifications = reqBody.Setting.EnabledNotifications + + if user.UserId.Hex() == "" { + user.UserId = bson.ObjectIdHex(constants.ObjectIdNull) + } + if err := user.Save(); err != nil { HandleError(http.StatusInternalServerError, c, err) return diff --git a/backend/services/auth.go b/backend/services/auth.go new file mode 100644 index 00000000..096d9f14 --- /dev/null +++ b/backend/services/auth.go @@ -0,0 +1,20 @@ +package services + +import ( + "crawlab/constants" + "github.com/gin-gonic/gin" + "github.com/globalsign/mgo/bson" +) + +func GetAuthQuery(query bson.M, c *gin.Context) bson.M { + user := GetCurrentUser(c) + if user.Role == constants.RoleAdmin { + // 获得所有数据 + return query + } else { + // 只获取自己的数据 + query["user_id"] = user.Id + return query + } +} + diff --git a/backend/services/challenge/base.go b/backend/services/challenge/base.go new file mode 100644 index 00000000..a7758708 --- /dev/null +++ b/backend/services/challenge/base.go @@ -0,0 +1,138 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "encoding/json" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "io/ioutil" + "path" + "runtime/debug" +) + +type Service interface { + Check() (bool, error) +} + +func GetService(name string, uid bson.ObjectId) Service { + switch name { + case constants.ChallengeLogin7d: + return &Login7dService{UserId: uid} + case constants.ChallengeLogin30d: + return &Login30dService{UserId: uid} + case constants.ChallengeLogin90d: + return &Login90dService{UserId: uid} + case constants.ChallengeLogin180d: + return &Login180dService{UserId: uid} + case constants.ChallengeCreateCustomizedSpider: + return &CreateCustomizedSpiderService{UserId: uid} + case constants.ChallengeCreateConfigurableSpider: + return &CreateConfigurableSpiderService{UserId: uid} + case constants.ChallengeCreateSchedule: + return &CreateScheduleService{UserId: uid} + case constants.ChallengeCreateNodes: + return &CreateNodesService{UserId: uid} + case constants.ChallengeRunRandom: + return &RunRandomService{UserId: uid} + case constants.ChallengeScrape1k: + return &Scrape1kService{UserId: uid} + case constants.ChallengeScrape10k: + return &Scrape10kService{UserId: uid} + case constants.ChallengeScrape100k: + return &Scrape100kService{UserId: uid} + case constants.ChallengeInstallDep: + return &InstallDepService{UserId: uid} + case constants.ChallengeInstallLang: + return &InstallLangService{UserId: uid} + case constants.ChallengeViewDisclaimer: + return &ViewDisclaimerService{UserId: uid} + case constants.ChallengeCreateUser: + return &CreateUserService{UserId: uid} + } + return nil +} + +func AddChallengeAchievement(name string, uid bson.ObjectId) error { + ch, err := model.GetChallengeByName(name) + if err != nil { + return err + } + ca := model.ChallengeAchievement{ + ChallengeId: ch.Id, + UserId: uid, + } + if err := ca.Add(); err != nil { + return err + } + return nil +} + +func CheckChallengeAndUpdate(ch model.Challenge, uid bson.ObjectId) error { + svc := GetService(ch.Name, uid) + achieved, err := svc.Check() + if err != nil { + return err + } + if achieved && !ch.Achieved { + if err := AddChallengeAchievement(ch.Name, uid); err != nil { + return err + } + } + return nil +} + +func CheckChallengeAndUpdateAll(uid bson.ObjectId) error { + challenges, err := model.GetChallengeListWithAchieved(nil, 0, constants.Infinite, "-_id", uid) + if err != nil { + return err + } + for _, ch := range challenges { + if err := CheckChallengeAndUpdate(ch, uid); err != nil { + continue + } + } + return nil +} + +func InitChallengeService() error { + // 读取文件 + contentBytes, err := ioutil.ReadFile(path.Join("data", "challenge_data.json")) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + // 反序列化 + var challenges []model.Challenge + if err := json.Unmarshal(contentBytes, &challenges); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + for _, ch := range challenges { + chDb, err := model.GetChallengeByName(ch.Name) + if err != nil { + continue + } + if chDb.Name == "" { + if err := ch.Add(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } else { + ch.Id = chDb.Id + ch.CreateTs = chDb.CreateTs + if err := ch.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + } + + return nil +} diff --git a/backend/services/challenge/create_configurable_spider.go b/backend/services/challenge/create_configurable_spider.go new file mode 100644 index 00000000..45e969f7 --- /dev/null +++ b/backend/services/challenge/create_configurable_spider.go @@ -0,0 +1,23 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type CreateConfigurableSpiderService struct { + UserId bson.ObjectId +} + +func (s *CreateConfigurableSpiderService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "type": constants.Configurable, + } + _, count, err := model.GetSpiderList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return count > 0, nil +} diff --git a/backend/services/challenge/create_customized_spider.go b/backend/services/challenge/create_customized_spider.go new file mode 100644 index 00000000..6c61318f --- /dev/null +++ b/backend/services/challenge/create_customized_spider.go @@ -0,0 +1,23 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type CreateCustomizedSpiderService struct { + UserId bson.ObjectId +} + +func (s *CreateCustomizedSpiderService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "type": constants.Customized, + } + _, count, err := model.GetSpiderList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return count > 0, nil +} diff --git a/backend/services/challenge/create_nodes.go b/backend/services/challenge/create_nodes.go new file mode 100644 index 00000000..42ec25f7 --- /dev/null +++ b/backend/services/challenge/create_nodes.go @@ -0,0 +1,22 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type CreateNodesService struct { + UserId bson.ObjectId +} + +func (s *CreateNodesService) Check() (bool, error) { + query := bson.M{ + "status": constants.StatusOnline, + } + list, err := model.GetScheduleList(query) + if err != nil { + return false, err + } + return len(list) >= 3, nil +} diff --git a/backend/services/challenge/create_schedule.go b/backend/services/challenge/create_schedule.go new file mode 100644 index 00000000..3e0ce0e1 --- /dev/null +++ b/backend/services/challenge/create_schedule.go @@ -0,0 +1,21 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type CreateScheduleService struct { + UserId bson.ObjectId +} + +func (s *CreateScheduleService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + } + list, err := model.GetScheduleList(query) + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/create_user.go b/backend/services/challenge/create_user.go new file mode 100644 index 00000000..e0272801 --- /dev/null +++ b/backend/services/challenge/create_user.go @@ -0,0 +1,21 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type CreateUserService struct { + UserId bson.ObjectId +} + +func (s *CreateUserService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + } + list, err := model.GetUserList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/install_dep.go b/backend/services/challenge/install_dep.go new file mode 100644 index 00000000..4730249e --- /dev/null +++ b/backend/services/challenge/install_dep.go @@ -0,0 +1,23 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type InstallDepService struct { + UserId bson.ObjectId +} + +func (s *InstallDepService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "type": constants.ActionTypeInstallDep, + } + list, err := model.GetActionList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/install_lang.go b/backend/services/challenge/install_lang.go new file mode 100644 index 00000000..15732a2f --- /dev/null +++ b/backend/services/challenge/install_lang.go @@ -0,0 +1,23 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type InstallLangService struct { + UserId bson.ObjectId +} + +func (s *InstallLangService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "type": constants.ActionTypeInstallLang, + } + list, err := model.GetActionList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/login_180d.go b/backend/services/challenge/login_180d.go new file mode 100644 index 00000000..96cc9e26 --- /dev/null +++ b/backend/services/challenge/login_180d.go @@ -0,0 +1,18 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Login180dService struct { + UserId bson.ObjectId +} + +func (s *Login180dService) Check() (bool, error) { + days, err := model.GetVisitDays(s.UserId) + if err != nil { + return false, err + } + return days >= 180, nil +} diff --git a/backend/services/challenge/login_30d.go b/backend/services/challenge/login_30d.go new file mode 100644 index 00000000..5234d5fe --- /dev/null +++ b/backend/services/challenge/login_30d.go @@ -0,0 +1,18 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Login30dService struct { + UserId bson.ObjectId +} + +func (s *Login30dService) Check() (bool, error) { + days, err := model.GetVisitDays(s.UserId) + if err != nil { + return false, err + } + return days >= 30, nil +} diff --git a/backend/services/challenge/login_7d.go b/backend/services/challenge/login_7d.go new file mode 100644 index 00000000..91540423 --- /dev/null +++ b/backend/services/challenge/login_7d.go @@ -0,0 +1,18 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Login7dService struct { + UserId bson.ObjectId +} + +func (s *Login7dService) Check() (bool, error) { + days, err := model.GetVisitDays(s.UserId) + if err != nil { + return false, err + } + return days >= 7, nil +} diff --git a/backend/services/challenge/login_90d.go b/backend/services/challenge/login_90d.go new file mode 100644 index 00000000..a8526b87 --- /dev/null +++ b/backend/services/challenge/login_90d.go @@ -0,0 +1,18 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Login90dService struct { + UserId bson.ObjectId +} + +func (s *Login90dService) Check() (bool, error) { + days, err := model.GetVisitDays(s.UserId) + if err != nil { + return false, err + } + return days >= 90, nil +} diff --git a/backend/services/challenge/run_random.go b/backend/services/challenge/run_random.go new file mode 100644 index 00000000..30c63f0c --- /dev/null +++ b/backend/services/challenge/run_random.go @@ -0,0 +1,25 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type RunRandomService struct { + UserId bson.ObjectId +} + +func (s *RunRandomService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "run_type": constants.RunTypeRandom, + "status": constants.StatusFinished, + "schedule_id": bson.ObjectIdHex(constants.ObjectIdNull), + } + list, err := model.GetTaskList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/scrape_100k.go b/backend/services/challenge/scrape_100k.go new file mode 100644 index 00000000..68a90eda --- /dev/null +++ b/backend/services/challenge/scrape_100k.go @@ -0,0 +1,24 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Scrape100kService struct { + UserId bson.ObjectId +} + +func (s *Scrape100kService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "result_count": bson.M{ + "$gte": 100000, + }, + } + list, err := model.GetTaskList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/scrape_10k.go b/backend/services/challenge/scrape_10k.go new file mode 100644 index 00000000..ae70b450 --- /dev/null +++ b/backend/services/challenge/scrape_10k.go @@ -0,0 +1,24 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Scrape10kService struct { + UserId bson.ObjectId +} + +func (s *Scrape10kService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "result_count": bson.M{ + "$gte": 10000, + }, + } + list, err := model.GetTaskList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/scrape_1k.go b/backend/services/challenge/scrape_1k.go new file mode 100644 index 00000000..cad2469f --- /dev/null +++ b/backend/services/challenge/scrape_1k.go @@ -0,0 +1,24 @@ +package challenge + +import ( + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type Scrape1kService struct { + UserId bson.ObjectId +} + +func (s *Scrape1kService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "result_count": bson.M{ + "$gte": 1000, + }, + } + list, err := model.GetTaskList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/challenge/view_disclaimer.go b/backend/services/challenge/view_disclaimer.go new file mode 100644 index 00000000..fc9fe21c --- /dev/null +++ b/backend/services/challenge/view_disclaimer.go @@ -0,0 +1,23 @@ +package challenge + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/globalsign/mgo/bson" +) + +type ViewDisclaimerService struct { + UserId bson.ObjectId +} + +func (s *ViewDisclaimerService) Check() (bool, error) { + query := bson.M{ + "user_id": s.UserId, + "type": constants.ActionTypeViewDisclaimer, + } + list, err := model.GetActionList(query, 0, 1, "-_id") + if err != nil { + return false, err + } + return len(list) > 0, nil +} diff --git a/backend/services/clean.go b/backend/services/clean.go new file mode 100644 index 00000000..bbd3571d --- /dev/null +++ b/backend/services/clean.go @@ -0,0 +1,122 @@ +package services + +import ( + "crawlab/constants" + "crawlab/model" + "github.com/apex/log" + "github.com/globalsign/mgo/bson" + "runtime/debug" +) + +func InitTaskCleanUserIds() { + adminUser, err := GetAdminUser() + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + tasks, err := model.GetTaskList(nil, 0, constants.Infinite, "+_id") + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + for _, t := range tasks { + if !t.ScheduleId.Valid() { + t.ScheduleId = bson.ObjectIdHex(constants.ObjectIdNull) + if err := t.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + + if !t.UserId.Valid() { + t.UserId = adminUser.Id + if err := t.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + } +} + +func InitProjectCleanUserIds() { + adminUser, err := GetAdminUser() + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + projects, err := model.GetProjectList(nil, "+_id") + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + for _, p := range projects { + if !p.UserId.Valid() { + p.UserId = adminUser.Id + if err := p.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + } +} + +func InitSpiderCleanUserIds() { + adminUser, err := GetAdminUser() + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + spiders, _ := model.GetSpiderAllList(nil) + for _, s := range spiders { + if !s.UserId.Valid() { + s.UserId = adminUser.Id + if err := s.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + } +} + +func InitScheduleCleanUserIds() { + adminUser, err := GetAdminUser() + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + schedules, _ := model.GetScheduleList(nil) + for _, s := range schedules { + if !s.UserId.Valid() { + s.UserId = adminUser.Id + if err := s.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + } + } +} + +func InitCleanService() error { + if model.IsMaster() { + // 清理任务UserIds + InitTaskCleanUserIds() + // 清理项目UserIds + InitProjectCleanUserIds() + // 清理爬虫UserIds + InitSpiderCleanUserIds() + // 清理定时任务UserIds + InitScheduleCleanUserIds() + } + return nil +} diff --git a/backend/services/config_spider.go b/backend/services/config_spider.go index 29e1c2ca..68c170df 100644 --- a/backend/services/config_spider.go +++ b/backend/services/config_spider.go @@ -17,6 +17,7 @@ import ( "gopkg.in/yaml.v2" "os" "path/filepath" + "runtime/debug" "strings" ) @@ -214,7 +215,11 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con var gfFile model.GridFs if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf("remove grid fs error: %s", err.Error()) + debug.PrintStack() + return err + } } // 上传到GridFs diff --git a/backend/services/git.go b/backend/services/git.go index 18d679de..e3d28934 100644 --- a/backend/services/git.go +++ b/backend/services/git.go @@ -289,8 +289,16 @@ func SyncSpiderGit(s model.Spider) (err error) { // 检查是否为 Scrapy sync := spider_handler.SpiderSync{Spider: s} sync.CheckIsScrapy() + + // 同步到GridFS + if err := UploadSpiderToGridFsFromMaster(s); err != nil { + SaveSpiderGitSyncError(s, err.Error()) + return err + } + // 如果没有错误,则保存空字符串 SaveSpiderGitSyncError(s, "") + return nil } log.Error(err.Error()) @@ -315,6 +323,13 @@ func SyncSpiderGit(s model.Spider) (err error) { return err } + // 获取更新后的爬虫 + s, err = model.GetSpider(s.Id) + if err != nil { + SaveSpiderGitSyncError(s, err.Error()) + return err + } + // 检查是否为 Scrapy sync := spider_handler.SpiderSync{Spider: s} sync.CheckIsScrapy() diff --git a/backend/services/schedule.go b/backend/services/schedule.go index ad41b969..78d430b1 100644 --- a/backend/services/schedule.go +++ b/backend/services/schedule.go @@ -51,11 +51,13 @@ func AddScheduleTask(s model.Schedule) func() { } for _, node := range nodes { t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - NodeId: node.Id, - Param: param, - UserId: s.UserId, + Id: id.String(), + SpiderId: s.SpiderId, + NodeId: node.Id, + Param: param, + UserId: s.UserId, + RunType: constants.RunTypeAllNodes, + ScheduleId: s.Id, } if _, err := AddTask(t); err != nil { @@ -65,10 +67,12 @@ func AddScheduleTask(s model.Schedule) func() { } else if s.RunType == constants.RunTypeRandom { // 随机 t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - Param: param, - UserId: s.UserId, + Id: id.String(), + SpiderId: s.SpiderId, + Param: param, + UserId: s.UserId, + RunType: constants.RunTypeRandom, + ScheduleId: s.Id, } if _, err := AddTask(t); err != nil { log.Errorf(err.Error()) @@ -79,11 +83,13 @@ func AddScheduleTask(s model.Schedule) func() { // 指定节点 for _, nodeId := range s.NodeIds { t := model.Task{ - Id: id.String(), - SpiderId: s.SpiderId, - NodeId: nodeId, - Param: param, - UserId: s.UserId, + Id: id.String(), + SpiderId: s.SpiderId, + NodeId: nodeId, + Param: param, + UserId: s.UserId, + RunType: constants.RunTypeSelectedNodes, + ScheduleId: s.Id, } if _, err := AddTask(t); err != nil { diff --git a/backend/services/spider.go b/backend/services/spider.go index 77bb19ab..2a8a3d48 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -60,7 +60,12 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error { var gfFile model.GridFs if err := gf.Find(bson.M{"filename": spiderZipFileName}).One(&gfFile); err == nil { // 已经存在文件,则删除 - _ = gf.RemoveId(gfFile.Id) + log.Errorf(gfFile.Id.Hex() + " already exists. removing...") + if err := gf.RemoveId(gfFile.Id); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } } // 上传到GridFs @@ -72,7 +77,9 @@ func UploadSpiderToGridFsFromMaster(spider model.Spider) error { // 保存爬虫 FileId spider.FileId = fid - _ = spider.Save() + if err := spider.Save(); err != nil { + return err + } // 获取爬虫同步实例 spiderSync := spider_handler.SpiderSync{ @@ -102,27 +109,33 @@ func UploadToGridFs(fileName string, filePath string) (fid bson.ObjectId, err er // 创建一个新GridFS文件 f, err := gf.Create(fileName) if err != nil { + log.Errorf("create file error: " + err.Error()) debug.PrintStack() return } - //分片读取爬虫zip文件 + // 分片读取爬虫zip文件 err = ReadFileByStep(filePath, WriteToGridFS, f) if err != nil { + log.Errorf("read file by step error: " + err.Error()) debug.PrintStack() return "", err } // 删除zip文件 if err = os.Remove(filePath); err != nil { + log.Errorf("remove file error: " + err.Error()) debug.PrintStack() return } + // 关闭文件,提交写入 if err = f.Close(); err != nil { + log.Errorf("close file error: " + err.Error()) debug.PrintStack() return "", err } + // 文件ID fid = f.Id().(bson.ObjectId) @@ -183,8 +196,14 @@ func PublishSpider(spider model.Spider) { // 查询gf file,不存在则标记为爬虫文件不存在 gfFile = model.GetGridFs(spider.FileId) if gfFile == nil { - spider.FileId = constants.ObjectIdNull - _ = spider.Save() + log.Errorf("get grid fs file error: cannot find grid fs file") + log.Errorf("grid fs file_id: " + spider.FileId.Hex()) + log.Errorf("spider_name: " + spider.Name) + debug.PrintStack() + //spider.FileId = constants.ObjectIdNull + //if err := spider.Save(); err != nil { + // return + //} return } } @@ -208,6 +227,7 @@ func PublishSpider(spider model.Spider) { spiderSync.CheckIsScrapy() return } + // md5文件不存在,则下载 md5 := filepath.Join(path, spider_handler.Md5File) if !utils.Exists(md5) { @@ -215,6 +235,7 @@ func PublishSpider(spider model.Spider) { spiderSync.RemoveDownCreate(gfFile.Md5) return } + // md5值不一样,则下载 md5Str := utils.GetSpiderMd5Str(md5) if gfFile.Md5 != md5Str { @@ -412,7 +433,29 @@ func CopySpider(spider model.Spider, newName string) error { return nil } -func InitDemoSpiders () { +func UpdateSpiderDedup(spider model.Spider) error { + s, c := database.GetCol(spider.Col) + defer s.Close() + + if !spider.IsDedup { + _ = c.DropIndex(spider.DedupField) + //if err := c.DropIndex(spider.DedupField); err != nil { + // return err + //} + return nil + } + + if err := c.EnsureIndex(mgo.Index{ + Key: []string{spider.DedupField}, + Unique: true, + }); err != nil { + return err + } + + return nil +} + +func InitDemoSpiders() { // 添加Demo爬虫 templateSpidersDir := "./template/spiders" for _, info := range utils.ListDir(templateSpidersDir) { @@ -471,6 +514,7 @@ func InitDemoSpiders () { ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), FileId: bson.ObjectIdHex(constants.ObjectIdNull), Cmd: configData.Cmd, + UserId: bson.ObjectIdHex(constants.ObjectIdNull), } if err := spider.Add(); err != nil { log.Errorf("add spider error: " + err.Error()) @@ -497,6 +541,7 @@ func InitDemoSpiders () { ProjectId: bson.ObjectIdHex(constants.ObjectIdNull), FileId: bson.ObjectIdHex(constants.ObjectIdNull), Config: configData, + UserId: bson.ObjectIdHex(constants.ObjectIdNull), } if err := spider.Add(); err != nil { log.Errorf("add spider error: " + err.Error()) @@ -543,6 +588,9 @@ func InitSpiderService() error { if err := GitCron.Start(); err != nil { return err } + + // 清理UserId + InitSpiderCleanUserIds() } return nil diff --git a/backend/services/spider_handler/spider.go b/backend/services/spider_handler/spider.go index ab1d5774..189fed60 100644 --- a/backend/services/spider_handler/spider.go +++ b/backend/services/spider_handler/spider.go @@ -45,13 +45,18 @@ func (s *SpiderSync) CheckIsScrapy() { if s.Spider.Type == constants.Configurable { return } + if viper.GetString("setting.checkScrapy") != "Y" { + return + } s.Spider.IsScrapy = utils.Exists(path.Join(s.Spider.Src, "scrapy.cfg")) - // TODO: 暂时停用自动检测Scrapy项目功能 - //if err := s.Spider.Save(); err != nil { - // log.Errorf(err.Error()) - // debug.PrintStack() - // return - //} + if s.Spider.IsScrapy { + s.Spider.Cmd = "scrapy crawl" + } + if err := s.Spider.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } } func (s *SpiderSync) AfterRemoveDownCreate() { diff --git a/backend/services/task.go b/backend/services/task.go index 469fa8da..0a9392f9 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -23,6 +23,7 @@ import ( "runtime" "runtime/debug" "strconv" + "strings" "sync" "syscall" "time" @@ -107,18 +108,20 @@ func AssignTask(task model.Task) error { } // 设置环境变量 -func SetEnv(cmd *exec.Cmd, envs []model.Env, taskId string, dataCol string) *exec.Cmd { +func SetEnv(cmd *exec.Cmd, envs []model.Env, task model.Task, spider model.Spider) *exec.Cmd { // 默认把Node.js的全局node_modules加入环境变量 envPath := os.Getenv("PATH") homePath := os.Getenv("HOME") nodeVersion := "v8.12.0" nodePath := path.Join(homePath, ".nvm/versions/node", nodeVersion, "lib/node_modules") - _ = os.Setenv("PATH", nodePath+":"+envPath) + if !strings.Contains(envPath, nodePath) { + _ = os.Setenv("PATH", nodePath+":"+envPath) + } _ = os.Setenv("NODE_PATH", nodePath) // 默认环境变量 - cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+taskId) - cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+dataCol) + cmd.Env = append(os.Environ(), "CRAWLAB_TASK_ID="+task.Id) + cmd.Env = append(cmd.Env, "CRAWLAB_COLLECTION="+spider.Col) cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_HOST="+viper.GetString("mongo.host")) cmd.Env = append(cmd.Env, "CRAWLAB_MONGO_PORT="+viper.GetString("mongo.port")) if viper.GetString("mongo.db") != "" { @@ -136,6 +139,13 @@ func SetEnv(cmd *exec.Cmd, envs []model.Env, taskId string, dataCol string) *exe cmd.Env = append(cmd.Env, "PYTHONUNBUFFERED=0") cmd.Env = append(cmd.Env, "PYTHONIOENCODING=utf-8") cmd.Env = append(cmd.Env, "TZ=Asia/Shanghai") + cmd.Env = append(cmd.Env, "CRAWLAB_DEDUP_FIELD="+spider.DedupField) + cmd.Env = append(cmd.Env, "CRAWLAB_DEDUP_METHOD="+spider.DedupMethod) + if spider.IsDedup { + cmd.Env = append(cmd.Env, "CRAWLAB_IS_DEDUP=1") + } else { + cmd.Env = append(cmd.Env, "CRAWLAB_IS_DEDUP=0") + } //任务环境变量 for _, env := range envs { @@ -270,7 +280,7 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue}) } } - cmd = SetEnv(cmd, envs, t.Id, s.Col) + cmd = SetEnv(cmd, envs, t, s) // 起一个goroutine来监控进程 ch := utils.TaskExecChanMap.ChanBlocked(t.Id) @@ -455,7 +465,7 @@ func ExecuteTask(id int) { } // 开始执行任务 - log.Infof(GetWorkerPrefix(id) + "开始执行任务(ID:" + t.Id + ")") + log.Infof(GetWorkerPrefix(id) + "start task (id:" + t.Id + ")") // 储存任务 _ = t.Save() @@ -529,7 +539,7 @@ func ExecuteTask(id int) { // 统计时长 duration := toc.Sub(tic).Seconds() durationStr := strconv.FormatFloat(duration, 'f', 6, 64) - log.Infof(GetWorkerPrefix(id) + "任务(ID:" + t.Id + ")" + "执行完毕. 消耗时间:" + durationStr + "秒") + log.Infof(GetWorkerPrefix(id) + "task (id:" + t.Id + ")" + " finished. elapsed:" + durationStr + " sec") } func SpiderFileCheck(t model.Task, spider model.Spider) error { @@ -668,6 +678,35 @@ func CancelTask(id string) (err error) { return nil } +func RestartTask(id string, uid bson.ObjectId) (err error) { + // 获取任务 + oldTask, err := model.GetTask(id) + if err != nil { + log.Errorf("task not found, task id : %s, error: %s", id, err.Error()) + debug.PrintStack() + return err + } + + newTask := model.Task{ + SpiderId: oldTask.SpiderId, + NodeId: oldTask.NodeId, + Param: oldTask.Param, + UserId: uid, + RunType: oldTask.RunType, + ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull), + } + + // 加入任务队列 + _, err = AddTask(newTask) + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return err + } + + return nil +} + func AddTask(t model.Task) (string, error) { // 生成任务ID id := uuid.NewV4() diff --git a/backend/services/user.go b/backend/services/user.go index a01e721b..adc56136 100644 --- a/backend/services/user.go +++ b/backend/services/user.go @@ -14,7 +14,7 @@ import ( ) func InitUserService() error { - _ = CreateNewUser("admin", "admin", constants.RoleAdmin, "") + _ = CreateNewUser("admin", "admin", constants.RoleAdmin, "", bson.ObjectIdHex(constants.ObjectIdNull)) return nil } @@ -90,12 +90,13 @@ func CheckToken(tokenStr string) (user model.User, err error) { return } -func CreateNewUser(username string, password string, role string, email string) error { +func CreateNewUser(username string, password string, role string, email string, uid bson.ObjectId) error { user := model.User{ Username: strings.ToLower(username), Password: utils.EncryptPassword(password), Role: role, Email: email, + UserId: uid, Setting: model.UserSetting{ NotificationTrigger: constants.NotificationTriggerNever, EnabledNotifications: []string{ @@ -112,6 +113,18 @@ func CreateNewUser(username string, password string, role string, email string) } func GetCurrentUser(c *gin.Context) *model.User { - data, _ := c.Get("currentUser") + data, _ := c.Get(constants.ContextUser) return data.(*model.User) } + +func GetCurrentUserId(c *gin.Context) bson.ObjectId { + return GetCurrentUser(c).Id +} + +func GetAdminUser() (user *model.User, err error) { + u, err := model.GetUserByUsername("admin") + if err != nil { + return user, err + } + return &u, nil +} diff --git a/backend/utils/chan_test.go b/backend/utils/chan_test.go index 7b5f1bba..4bc75917 100644 --- a/backend/utils/chan_test.go +++ b/backend/utils/chan_test.go @@ -2,75 +2,77 @@ package utils import ( . "github.com/smartystreets/goconvey/convey" + "sync" "testing" ) func TestNewChanMap(t *testing.T) { - mapTest := make(map[string]chan string) + mapTest := sync.Map{} chanTest := make(chan string) test := "test" Convey("Call NewChanMap to generate ChanMap", t, func() { - mapTest[test] = chanTest + mapTest.Store("test", chanTest) chanMapTest := ChanMap{mapTest} chanMap := NewChanMap() - chanMap.m[test] = chanTest + chanMap.m.Store("test", chanTest) Convey(test, func() { - So(chanMap, ShouldResemble, &chanMapTest) + v1, ok := chanMap.m.Load("test") + So(ok, ShouldBeTrue) + v2, ok := chanMapTest.m.Load("test") + So(ok, ShouldBeTrue) + So(v1, ShouldResemble, v2) }) - }) } func TestChan(t *testing.T) { - mapTest := make(map[string]chan string) + mapTest := sync.Map{} chanTest := make(chan string) - mapTest["test"] = chanTest + mapTest.Store("test", chanTest) chanMapTest := ChanMap{mapTest} Convey("Test Chan use exist key", t, func() { - ch1 := chanMapTest.Chan( - "test") + ch1 := chanMapTest.Chan("test") Convey("ch1 should equal chanTest", func() { So(ch1, ShouldEqual, chanTest) }) - }) Convey("Test Chan use no-exist key", t, func() { ch2 := chanMapTest.Chan("test2") Convey("ch2 should equal chanMapTest.m[test2]", func() { - - So(chanMapTest.m["test2"], ShouldEqual, ch2) + v, ok := chanMapTest.m.Load("test2") + So(ok, ShouldBeTrue) + So(v, ShouldEqual, ch2) }) Convey("Cap of chanMapTest.m[test2] should equal 10", func() { - So(10, ShouldEqual, cap(chanMapTest.m["test2"])) + So(10, ShouldEqual, cap(ch2)) }) }) } func TestChanBlocked(t *testing.T) { - mapTest := make(map[string]chan string) + mapTest := sync.Map{} chanTest := make(chan string) - mapTest["test"] = chanTest + mapTest.Store("test", chanTest) chanMapTest := ChanMap{mapTest} Convey("Test Chan use exist key", t, func() { - ch1 := chanMapTest.ChanBlocked( - "test") + ch1 := chanMapTest.ChanBlocked("test") Convey("ch1 should equal chanTest", func() { So(ch1, ShouldEqual, chanTest) }) - }) Convey("Test Chan use no-exist key", t, func() { ch2 := chanMapTest.ChanBlocked("test2") Convey("ch2 should equal chanMapTest.m[test2]", func() { - - So(chanMapTest.m["test2"], ShouldEqual, ch2) + v, ok := chanMapTest.m.Load("test2") + So(ok, ShouldBeTrue) + So(v, ShouldEqual, ch2) }) Convey("Cap of chanMapTest.m[test2] should equal 10", func() { - So(0, ShouldEqual, cap(chanMapTest.m["test2"])) + So(0, ShouldEqual, cap(ch2)) }) }) } diff --git a/backend/utils/file.go b/backend/utils/file.go index 072930cf..040b78de 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -149,10 +149,9 @@ func DeCompress(srcFile *os.File, dstPath string) error { } // 如果文件目录不存在,则创建一个 - dirPath := filepath.Dir(innerFile.Name) + dirPath := filepath.Join(dstPath, filepath.Dir(innerFile.Name)) if !Exists(dirPath) { - err = os.MkdirAll(filepath.Join(dstPath, dirPath), os.ModeDir|os.ModePerm) - if err != nil { + if err = os.MkdirAll(dirPath, os.ModeDir|os.ModePerm); err != nil { log.Errorf("Unzip File Error : " + err.Error()) debug.PrintStack() return err @@ -168,7 +167,8 @@ func DeCompress(srcFile *os.File, dstPath string) error { } // 创建新文件 - newFile, err := os.OpenFile(filepath.Join(dstPath, innerFile.Name), os.O_RDWR|os.O_CREATE|os.O_TRUNC, info.Mode()) + newFilePath := filepath.Join(dstPath, innerFile.Name) + newFile, err := os.OpenFile(newFilePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, info.Mode()) if err != nil { log.Errorf("Unzip File Error : " + err.Error()) debug.PrintStack() diff --git a/docker-compose.yml b/docker-compose.yml index e9cb08cf..7f4f95da 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,9 +19,9 @@ services: # CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info # CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除 # CRAWLAB_LOG_DELETEFREQUENCY: "@hourly" # frequency of deleting log files 删除日志文件的频率. 默认为每小时 + # CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数) # CRAWLAB_SERVER_REGISTER_TYPE: "mac" # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突) # CRAWLAB_SERVER_REGISTER_IP: "127.0.0.1" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效 - # CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数) # CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境 # CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境 # CRAWLAB_SERVER_LANG_DOTNET: "Y" # whether to pre-install .Net core 预安装 .Net Core 语言环境 @@ -30,6 +30,7 @@ services: # CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程 # CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务 # CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫 + # CRAWLAB_SETTING_CHECKSCRAPY: "Y" # whether to automatically check if the spider is scrapy 是否自动检测爬虫为scrapy # CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址 # CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口 # CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱 diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 93d41827..1917a490 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -51,6 +51,11 @@ export default { // remove loading-placeholder const elLoading = document.querySelector('#loading-placeholder') elLoading.remove() + + // send visit event + await this.$request.put('/actions', { + type: 'visit' + }) } } diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js index 8504f624..3724de17 100644 --- a/frontend/src/api/request.js +++ b/frontend/src/api/request.js @@ -33,16 +33,19 @@ const request = (method, path, params, data, others = {}) => { return Promise.reject(response) }).catch((e) => { let response = e.response + if (!response) { + return e + } if (response.status === 400) { Message.error(response.data.error) } if (response.status === 401 && router.currentRoute.path !== '/login') { - console.log('login') router.push('/login') } if (response.status === 500) { Message.error(response.data.error) } + return e }) } diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index 001be5b5..89765d2d 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -34,7 +34,8 @@ /> - + - +
- 我已阅读并同意 《免责声明》 所有内容 + + 我已阅读并同意 + + 《免责声明》 + + 所有内容 + + + I have read and agree all content in + + Disclaimer + +
- 跳转到任务详情页 + {{$t('Redirect to task detail')}} +
+
+ + {{$t('Retry (Maximum 5 Times)')}}
@@ -129,6 +145,7 @@ export default { nodeList: [] }, isAllowDisclaimer: true, + isRetry: false, isRedirect: true, isLoading: false, isParametersVisible: false, @@ -142,6 +159,9 @@ export default { ...mapState('setting', [ 'setting' ]), + ...mapState('lang', [ + 'lang' + ]), isConfirmDisabled () { if (this.isLoading) return true if (!this.isAllowDisclaimer) return true @@ -309,7 +329,7 @@ export default { margin-bottom: 20px; } - .crawl-confirm-dialog >>> .disclaimer-wrapper a { + .crawl-confirm-dialog >>> .checkbox-wrapper a { color: #409eff; } diff --git a/frontend/src/components/Config/ConfigList.vue b/frontend/src/components/Config/ConfigList.vue index d4d83119..f84873de 100644 --- a/frontend/src/components/Config/ConfigList.vue +++ b/frontend/src/components/Config/ConfigList.vue @@ -131,14 +131,38 @@
- + {{$t('Run')}} + + {{$t('Convert to Customized')}} + - + {{$t('Save')}}
@@ -303,7 +327,7 @@
- + {{$t('Save')}}
@@ -316,7 +340,13 @@
- + {{$t('Save')}} @@ -330,7 +360,10 @@ diff --git a/frontend/src/components/File/FileDetail.vue b/frontend/src/components/File/FileDetail.vue index ee88cef7..cc55542a 100644 --- a/frontend/src/components/File/FileDetail.vue +++ b/frontend/src/components/File/FileDetail.vue @@ -1,12 +1,18 @@ diff --git a/frontend/src/components/Settings/GitSettings.vue b/frontend/src/components/Settings/GitSettings.vue index c80e5427..477133c4 100644 --- a/frontend/src/components/Settings/GitSettings.vue +++ b/frontend/src/components/Settings/GitSettings.vue @@ -323,7 +323,7 @@ export default { } } finally { this.isGitResetLoading = false - await this.updateGit() + // await this.updateGit() } }) this.$st.sendEv('爬虫详情', 'Git 设置', '点击重置') diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 899d9d44..25cf246d 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -222,6 +222,16 @@ export default { 'Add Variable': '添加变量', 'Copy Spider': '复制爬虫', 'New Spider Name': '新爬虫名称', + 'All Spiders': '所有爬虫', + 'My Spiders': '我的爬虫', + 'Public Spiders': '公共爬虫', + 'Is Public': '是否公共', + 'Owner': '所有者', + 'Convert to Customized': '转化为自定义', + 'Is De-Duplicated': '是否去重', + 'Please enter de-duplicated field': '请输入去重字段', + 'Overwrite': '覆盖', + 'Ignore': '忽略', // 爬虫列表 'Name': '名称', @@ -256,6 +266,9 @@ export default { 'Empty results': '空结果', 'Navigate to Spider': '导航到爬虫', 'Navigate to Node': '导航到节点', + 'Restart': '重新运行', + 'Redirect to task detail': '跳转到任务详情页', + 'Retry (Maximum 5 Times)': '是否重试(最多 5 次)', // 任务列表 'Node': '节点', @@ -269,6 +282,7 @@ export default { // 项目 'All Tags': '全部标签', + 'Projects': '项目', 'Project Name': '项目名称', 'Project Description': '项目描述', 'Tags': '标签', @@ -291,6 +305,7 @@ export default { 'Cron': 'Cron', 'Cron Expression': 'Cron 表达式', 'Cron expression is invalid': 'Cron 表达式不正确', + 'View Tasks': '查看任务', // 网站 'Site': '网站', @@ -306,6 +321,13 @@ export default { 'Home Page Response Time (sec)': '首页响应时间(秒)', 'Home Page Response Status Code': '首页响应状态码', + // 反馈 + 'Feedback': '反馈', + 'Feedbacks': '反馈', + 'Wechat': '微信', + 'Content': '内容', + 'Rating': '评分', + // 用户 'Super Admin': '超级管理员', @@ -361,7 +383,11 @@ export default { 'Are you sure to delete this node?': '你确定要删除该节点?', 'Are you sure to run this spider?': '你确定要运行该爬虫?', 'Are you sure to delete this file/directory?': '你确定要删除该文件/文件夹?', + 'Are you sure to convert this spider to customized spider?': '你确定要转化该爬虫为自定义爬虫?', + 'Are you sure to delete this task?': '您确定要删除该任务?', 'Added spider successfully': '成功添加爬虫', + 'Converted successfully': '成功转化', + 'Converted unsuccessfully': '未成功转化', 'Uploaded spider files successfully': '成功上传爬虫文件', 'Node info has been saved successfully': '节点信息已成功保存', 'A task has been scheduled successfully': '已经成功派发一个任务', @@ -419,6 +445,7 @@ export default { 'How to Upgrade': '升级方式', 'Release': '发布', 'Add Wechat to join discussion group': '添加微信 tikazyq1 加入交流群', + 'Submitted successfully': '提交成功', // 登录 'Sign in': '登录', @@ -459,6 +486,14 @@ export default { 'General': '通用', 'Enable Tutorial': '启用教程', + // 挑战 + 'Challenge': '挑战', + 'Challenges': '挑战', + 'Difficulty': '难度', + 'Achieved': '已达成', + 'Not Achieved': '未达成', + 'Start Challenge': '开始挑战', + // 全局 'Related Documentation': '相关文档', 'Click to view related Documentation': '点击查看相关文档', @@ -584,6 +619,11 @@ docker run -d --restart always --name crawlab_worker \\ 'Are you sure to stop selected items?': '您是否确认停止所选项?', 'Sent signals to cancel selected tasks': '已经向所选任务发送取消任务信号', 'Copied successfully': '已成功复制', + 'You have started the challenge.': '您已开始挑战', + 'Please enter your email': '请输入您的邮箱', + 'Please enter your Wechat account': '请输入您的微信账号', + 'Please enter your feedback content': '请输入您的反馈内容', + 'No response from the server. Please make sure your server is running correctly. You can also refer to the documentation to solve this issue.': '服务器无响应,请保证您的服务器正常运行。您也可以参考文档来解决这个问题(文档链接在下方)', // 其他 'Star crawlab-team/crawlab on GitHub': '在 GitHub 上为 Crawlab 加星吧' diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index 69b7f35b..c334360d 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -192,12 +192,51 @@ export const constantRouterMap = [ } ] }, + { + path: '/challenges', + component: Layout, + meta: { + title: 'ChallengeList', + icon: 'fa fa-flash' + }, + children: [ + { + path: '', + name: 'ChallengeList', + component: () => import('../views/challenge/ChallengeList'), + meta: { + title: 'Challenges', + icon: 'fa fa-flash' + } + } + ] + }, + { + path: '/feedback', + component: Layout, + meta: { + title: 'Feedback', + icon: 'fa fa-commenting-o' + }, + children: [ + { + path: '', + name: 'Feedback', + component: () => import('../views/feedback/Feedback'), + meta: { + title: 'Feedback', + icon: 'fa fa-commenting' + } + } + ] + }, { path: '/users', component: Layout, meta: { title: 'User', - icon: 'fa fa-user' + icon: 'fa fa-users', + isNew: true }, children: [ { @@ -206,7 +245,7 @@ export const constantRouterMap = [ component: () => import('../views/user/UserList'), meta: { title: 'Users', - icon: 'fa fa-user' + icon: 'fa fa-users' } } ] diff --git a/frontend/src/store/modules/spider.js b/frontend/src/store/modules/spider.js index fe982ded..1f6e2b1e 100644 --- a/frontend/src/store/modules/spider.js +++ b/frontend/src/store/modules/spider.js @@ -50,7 +50,10 @@ const state = { templateList: [], // spider file tree - fileTree: {} + fileTree: {}, + + // config list ts + configListTs: undefined } const getters = {} @@ -110,6 +113,9 @@ const mutations = { }, SET_SPIDER_SCRAPY_PIPELINES (state, value) { state.spiderScrapyPipelines = value + }, + SET_CONFIG_LIST_TS (state, value) { + state.configListTs = value } } diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 67f6a153..23a08bda 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -15,7 +15,8 @@ const state = { filter: { node_id: '', spider_id: '', - status: '' + status: '', + schedule_id: '' }, // pagination pageNum: 1, @@ -122,7 +123,8 @@ const actions = { page_size: state.pageSize, node_id: state.filter.node_id || undefined, spider_id: state.filter.spider_id || undefined, - status: state.filter.status || undefined + status: state.filter.status || undefined, + schedule_id: state.filter.schedule_id || undefined }) .then(response => { commit('SET_TASK_LIST', response.data.data || []) @@ -140,6 +142,12 @@ const actions = { ids: ids }) }, + restartTask ({ state, dispatch }, id) { + return request.post(`/tasks/${id}/restart`) + .then(() => { + dispatch('getTaskList') + }) + }, getTaskLog ({ state, commit }, id) { return request.get(`/tasks/${id}/log`) .then(response => { diff --git a/frontend/src/store/modules/user.js b/frontend/src/store/modules/user.js index 4bb6e918..cc517f8b 100644 --- a/frontend/src/store/modules/user.js +++ b/frontend/src/store/modules/user.js @@ -71,20 +71,16 @@ const user = { actions: { // 登录 - login ({ commit }, userInfo) { + async login ({ commit }, userInfo) { const username = userInfo.username.trim() - return new Promise((resolve, reject) => { - request.post('/login', { username, password: userInfo.password }) - .then(response => { - const token = response.data.data - commit('SET_TOKEN', token) - window.localStorage.setItem('token', token) - resolve() - }) - .catch(error => { - reject(error) - }) - }) + let res + res = await request.post('/login', { username, password: userInfo.password }) + if (res.status === 200) { + const token = res.data.data + commit('SET_TOKEN', token) + window.localStorage.setItem('token', token) + } + return res }, // 获取用户信息 @@ -152,7 +148,7 @@ const user = { // 添加用户 addUser ({ dispatch, commit, state }) { - return request.put('/users', state.userForm) + return request.put('/users-add', state.userForm) }, // 新增全局变量 addGlobalVariable ({ commit, state }) { diff --git a/frontend/src/utils/request.js b/frontend/src/utils/request.js deleted file mode 100644 index 7217fd63..00000000 --- a/frontend/src/utils/request.js +++ /dev/null @@ -1,73 +0,0 @@ -import axios from 'axios' -import { Message, MessageBox } from 'element-ui' -import store from '../store' -import { getToken } from '@/utils/auth' - -// 创建axios实例 -const service = axios.create({ - baseURL: process.env.BASE_API, // api 的 base_url - timeout: 5000 // 请求超时时间 -}) - -// request拦截器 -service.interceptors.request.use( - config => { - if (store.getters.token) { - config.headers['X-Token'] = getToken() // 让每个请求携带自定义token 请根据实际情况自行修改 - } - return config - }, - error => { - // Do something with request error - console.log(error) // for debug - Promise.reject(error) - } -) - -// response 拦截器 -service.interceptors.response.use( - response => { - /** - * code为非20000是抛错 可结合自己业务进行修改 - */ - const res = response.data - if (res.code !== 20000) { - Message({ - message: res.message, - type: 'error', - duration: 5 * 1000 - }) - - // 50008:非法的token; 50012:其他客户端登录了; 50014:Token 过期了; - if (res.code === 50008 || res.code === 50012 || res.code === 50014) { - MessageBox.confirm( - '你已被登出,可以取消继续留在该页面,或者重新登录', - '确定登出', - { - confirmButtonText: '重新登录', - cancelButtonText: '取消', - type: 'warning' - } - ).then(() => { - store.dispatch('FedLogOut').then(() => { - location.reload() // 为了重新实例化vue-router对象 避免bug - }) - }) - } - return Promise.reject(new Error('error')) - } else { - return response.data - } - }, - error => { - console.log('err' + error) // for debug - Message({ - message: error.message, - type: 'error', - duration: 5 * 1000 - }) - return Promise.reject(error) - } -) - -export default service diff --git a/frontend/src/views/challenge/ChallengeList.vue b/frontend/src/views/challenge/ChallengeList.vue new file mode 100644 index 00000000..a73888d7 --- /dev/null +++ b/frontend/src/views/challenge/ChallengeList.vue @@ -0,0 +1,198 @@ + + + + + diff --git a/frontend/src/views/doc/Disclaimer.vue b/frontend/src/views/doc/Disclaimer.vue index b05f8e96..d66c6614 100644 --- a/frontend/src/views/doc/Disclaimer.vue +++ b/frontend/src/views/doc/Disclaimer.vue @@ -60,6 +60,11 @@ This Disclaimer and privacy protection statement (hereinafter referred to as "di 8. 传播:任何公司或个人在网络上发布,传播我们软件的行为都是允许的,但因公司或个人传播软件可能造成的任何法律和刑事事件 Crawlab 开发组不负任何责任。 ` } + }, + mounted () { + this.$request.put('/actions', { + type: 'view_disclaimer' + }) } } diff --git a/frontend/src/views/feedback/Feedback.vue b/frontend/src/views/feedback/Feedback.vue new file mode 100644 index 00000000..1f22c56a --- /dev/null +++ b/frontend/src/views/feedback/Feedback.vue @@ -0,0 +1,175 @@ + + + + + diff --git a/frontend/src/views/home/Home.vue b/frontend/src/views/home/Home.vue index ebfb7563..8412f393 100644 --- a/frontend/src/views/home/Home.vue +++ b/frontend/src/views/home/Home.vue @@ -3,20 +3,33 @@
  • - - - - - - - - - - -
    {{overviewStats[m.name]}}
    -
    -
    -
    +
    + + +
    +
    +
    +
    + {{overviewStats[m.name]}} +
    +
    + {{$t(m.label)}} +
    +
    +
    + + + + + + + + + + + + +
@@ -41,10 +54,11 @@ export default { overviewStats: {}, dailyTasks: [], metrics: [ - { name: 'task_count', label: 'Total Tasks', icon: ['fa', 'play'], color: '#f56c6c', path: 'tasks' }, - { name: 'spider_count', label: 'Spiders', icon: ['fa', 'bug'], color: '#67c23a', path: 'spiders' }, - { name: 'active_node_count', label: 'Active Nodes', icon: ['fa', 'server'], color: '#409EFF', path: 'nodes' }, - { name: 'schedule_count', label: 'Schedules', icon: ['fa', 'clock'], color: '#409EFF', path: 'schedules' } + { name: 'task_count', label: 'Total Tasks', icon: 'fa fa-check', color: 'blue', path: 'tasks' }, + { name: 'spider_count', label: 'Spiders', icon: 'fa fa-bug', color: 'green', path: 'spiders' }, + { name: 'active_node_count', label: 'Active Nodes', icon: 'fa fa-server', color: 'red', path: 'nodes' }, + { name: 'schedule_count', label: 'Schedules', icon: 'fa fa-clock-o', color: 'orange', path: 'schedules' }, + { name: 'project_count', label: 'Projects', icon: 'fa fa-code-fork', color: 'grey', path: 'projects' } ] } }, @@ -105,45 +119,73 @@ export default { margin-right: 0; } - .metric-item { - flex-basis: 25%; + .metric-item:hover { + transform: scale(1.05); + transition: transform 0.5s ease; + } - .metric-card:hover { + .metric-item { + flex-basis: 20%; + height: 64px; + display: flex; + color: white; + cursor: pointer; + transform: scale(1); + transition: transform 0.5s ease; + + .metric-icon { + display: inline-flex; + width: 64px; + align-items: center; + justify-content: center; + border-top-left-radius: 5px; + border-bottom-left-radius: 5px; + font-size: 24px; + + svg { + width: 24px; + } } - .metric-card { - margin-right: 30px; - cursor: pointer; + .metric-content { + display: flex; + width: calc(100% - 80px); + align-items: center; + opacity: 0.85; + font-size: 14px; + padding-left: 15px; + border-top-right-radius: 5px; + border-bottom-right-radius: 5px; - .icon-col { - text-align: right; - - i { - margin-bottom: 15px; - font-size: 56px; - } + .metric-number { + font-weight: bolder; + margin-bottom: 5px; } + } - .text-col { - padding-left: 20px; - height: 76px; - text-align: center; + .metric-icon.blue, + .metric-content.blue { + background: #409eff; + } - .label { - cursor: pointer; - font-size: 16px; - display: block; - height: 24px; - color: grey; - font-weight: 900; - } + .metric-icon.green, + .metric-content.green { + background: #67c23a; + } - .value { - font-size: 24px; - display: block; - height: 32px; - } - } + .metric-icon.red, + .metric-content.red { + background: #f56c6c; + } + + .metric-icon.orange, + .metric-content.orange { + background: #E6A23C; + } + + .metric-icon.grey, + .metric-content.grey { + background: #97a8be; } } } diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 7dfa804f..53a59a7f 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -208,7 +208,9 @@ docker-compose up -d }, logout () { this.$store.dispatch('user/logout') + this.$store.dispatch('delAllViews') this.$router.push('/login') + this.$st.sendEv('全局', '登出') }, setLang (lang) { window.localStorage.setItem('lang', lang) diff --git a/frontend/src/views/login/index.vue b/frontend/src/views/login/index.vue index 04cf9c74..9de1d26b 100644 --- a/frontend/src/views/login/index.vue +++ b/frontend/src/views/login/index.vue @@ -150,18 +150,41 @@ export default { }, methods: { handleLogin () { - this.$refs.loginForm.validate(valid => { - if (valid) { - this.loading = true - this.$store.dispatch('user/login', this.loginForm).then(() => { - this.loading = false - this.$router.push({ path: this.redirect || '/' }) - this.$store.dispatch('user/getInfo') - }).catch(() => { - this.$message.error(this.$t('Error when logging in (Please read documentation Q&A)')) - this.loading = false + this.$refs.loginForm.validate(async valid => { + if (!valid) return + this.loading = true + const res = await this.$store.dispatch('user/login', this.loginForm) + if (res.status === 200) { + // success + this.$router.push({ path: this.redirect || '/' }) + this.$st.sendEv('全局', '登录', '成功') + await this.$store.dispatch('user/getInfo') + } else if (res.message === 'Network Error' || !res.response) { + // no response + this.$message({ + type: 'error', + message: this.$t('No response from the server. Please make sure your server is running correctly. You can also refer to the documentation to solve this issue.'), + customClass: 'message-error', + duration: 5000 }) + this.$st.sendEv('全局', '登录', '服务器无响应') + } else if (res.response.status === 401) { + // incorrect username or password + this.$message({ + type: 'error', + message: '[401] ' + this.$t('Incorrect username or password') + }) + this.$st.sendEv('全局', '登录', '用户名密码错误') + } else { + // other error + this.$message({ + type: 'error', + message: `[${res.response.status}] ${res.response.data.error}`, + customClass: 'message-error' + }) + this.$st.sendEv('全局', '登录', '其他错误') } + this.loading = false }) }, handleSignup () { @@ -171,9 +194,11 @@ export default { this.$store.dispatch('user/register', this.loginForm).then(() => { this.handleLogin() this.loading = false + this.$st.sendEv('全局', '注册', '成功') }).catch(err => { this.$message.error(this.$t(err)) this.loading = false + this.$st.sendEv('全局', '注册', '失败') }) } }) @@ -363,6 +388,11 @@ const initCanvas = () => { left: 0; } } + + .message-error .el-message__content { + width: 360px; + line-height: 18px; + }