diff --git a/backend/main.go b/backend/main.go index e83eec31..7e9d1369 100644 --- a/backend/main.go +++ b/backend/main.go @@ -247,7 +247,8 @@ func main() { // 文件 authGroup.GET("/file", routes.GetFile) // 获取文件 // Git - authGroup.GET("/git/branches", routes.GetGitBranches) // 获取 Git 分支 + authGroup.GET("/git/branches", routes.GetGitBranches) // 获取 Git 分支 + authGroup.GET("/git/public-key", routes.GetGitSshPublicKey) // 获取 SSH 公钥 } } diff --git a/backend/model/spider.go b/backend/model/spider.go index b0a3a8d6..61fb53ec 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -53,6 +53,7 @@ type Spider struct { GitPassword string `json:"git_password" bson:"git_password"` // Git 密码 GitAutoSync bool `json:"git_auto_sync" bson:"git_auto_sync"` // Git 是否自动同步 GitSyncFrequency string `json:"git_sync_frequency" bson:"git_sync_frequency"` // Git 同步频率 + GitSyncError string `json:"git_sync_error" bson:"git_sync_error"` // Git 同步错误 // 前端展示 LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 @@ -166,6 +167,15 @@ func GetSpiderList(filter interface{}, skip int, limit int, sortStr string) ([]S return spiders, count, nil } +// 获取所有爬虫列表 +func GetSpiderAllList(filter interface{}) (spiders []Spider, err error) { + spiders, _, err = GetSpiderList(filter, 0, constants.Infinite, "_id") + if err != nil { + return spiders, err + } + return spiders, nil +} + // 获取爬虫(根据FileId) func GetSpiderByFileId(fileId bson.ObjectId) *Spider { s, c := database.GetCol("spiders") diff --git a/backend/routes/git.go b/backend/routes/git.go index 4c46d2cd..56fbc683 100644 --- a/backend/routes/git.go +++ b/backend/routes/git.go @@ -19,3 +19,11 @@ func GetGitBranches(c *gin.Context) { }) } +func GetGitSshPublicKey(c *gin.Context) { + content := services.GetGitSshPublicKey() + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: content, + }) +} diff --git a/backend/routes/spider.go b/backend/routes/spider.go index eeab65cd..39314970 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -119,6 +119,12 @@ func PostSpider(c *gin.Context) { return } + // 更新 GitCron + if err := services.GitCron.Update(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", @@ -197,6 +203,12 @@ func PutSpider(c *gin.Context) { return } + // 更新 GitCron + if err := services.GitCron.Update(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", @@ -434,6 +446,12 @@ func DeleteSpider(c *gin.Context) { return } + // 更新 GitCron + if err := services.GitCron.Update(); err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ Status: "ok", Message: "success", diff --git a/backend/services/git.go b/backend/services/git.go index c294955f..14fcb52d 100644 --- a/backend/services/git.go +++ b/backend/services/git.go @@ -2,13 +2,18 @@ package services import ( "bytes" + "crawlab/lib/cron" "crawlab/model" + "crawlab/services/spider_handler" "crawlab/utils" "fmt" "github.com/apex/log" + "github.com/globalsign/mgo/bson" "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/transport/ssh" + "io/ioutil" "net/url" "os" "os/exec" @@ -18,6 +23,21 @@ import ( "strings" ) +var GitCron *GitCronScheduler + +type GitCronScheduler struct { + cron *cron.Cron +} + +func SaveSpiderGitSyncError(s model.Spider, errMsg string) { + s.GitSyncError = errMsg + if err := s.Save(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } +} + func GetGitBranches(url string) (branches []string, err error) { var stdout bytes.Buffer var stderr bytes.Buffer @@ -81,6 +101,7 @@ func SyncSpiderGit(s model.Spider) (err error) { if err != nil { log.Error(err.Error()) debug.PrintStack() + SaveSpiderGitSyncError(s, err.Error()) return err } @@ -89,6 +110,7 @@ func SyncSpiderGit(s model.Spider) (err error) { if s.GitUsername != "" && s.GitPassword != "" { u, err := url.Parse(s.GitUrl) if err != nil { + SaveSpiderGitSyncError(s, err.Error()) return err } gitUrl = fmt.Sprintf( @@ -110,13 +132,23 @@ func SyncSpiderGit(s model.Spider) (err error) { if err != nil { log.Error(err.Error()) debug.PrintStack() + SaveSpiderGitSyncError(s, err.Error()) return err } + // 生成验证信息 + var auth ssh.AuthMethod + if !strings.HasPrefix(s.GitUrl, "http") { + // 为 SSH + u, _ := url.Parse(s.GitUrl) + auth, _ = ssh.NewPublicKeysFromFile(u.User.String(), path.Join(os.Getenv("HOME"), ".ssh", "id_rsa"), "") + } + // 获取 repo _ = repo.Fetch(&git.FetchOptions{ RemoteName: "origin", Force: true, + Auth: auth, }) // 获得 WorkTree @@ -124,15 +156,23 @@ func SyncSpiderGit(s model.Spider) (err error) { if err != nil { log.Error(err.Error()) debug.PrintStack() + SaveSpiderGitSyncError(s, err.Error()) return err } // 拉取 repo if err := wt.Pull(&git.PullOptions{ RemoteName: "origin", + Auth: auth, }); err != nil { + if err.Error() == "already up-to-date" { + // 如果没有错误,则保存空字符串 + SaveSpiderGitSyncError(s, "") + return nil + } log.Error(err.Error()) debug.PrintStack() + SaveSpiderGitSyncError(s, err.Error()) return err } @@ -142,13 +182,127 @@ func SyncSpiderGit(s model.Spider) (err error) { }); err != nil { log.Error(err.Error()) debug.PrintStack() + SaveSpiderGitSyncError(s, err.Error()) return err } // 同步到GridFS if err := UploadSpiderToGridFsFromMaster(s); err != nil { + SaveSpiderGitSyncError(s, err.Error()) + return err + } + + // 检查是否为 Scrapy + sync := spider_handler.SpiderSync{Spider: s} + sync.CheckIsScrapy() + + // 如果没有错误,则保存空字符串 + SaveSpiderGitSyncError(s, "") + + return nil +} + +func (g *GitCronScheduler) Start() error { + c := cron.New(cron.WithSeconds()) + + // 启动cron服务 + g.cron.Start() + + // 更新任务列表 + if err := g.Update(); err != nil { + log.Errorf("update scheduler error: %s", err.Error()) + debug.PrintStack() + return err + } + + // 每30秒更新一次任务列表 + spec := "*/30 * * * * *" + if _, err := c.AddFunc(spec, UpdateGitCron); err != nil { + log.Errorf("add func update schedulers error: %s", err.Error()) + debug.PrintStack() return err } return nil } + +func (g *GitCronScheduler) RemoveAll() { + entries := g.cron.Entries() + for i := 0; i < len(entries); i++ { + g.cron.Remove(entries[i].ID) + } +} + +func (g *GitCronScheduler) Update() error { + // 删除所有定时任务 + g.RemoveAll() + + // 获取开启 Git 自动同步的爬虫 + spiders, err := model.GetSpiderAllList(bson.M{"git_auto_sync": true}) + if err != nil { + log.Errorf("get spider list error: %s", err.Error()) + debug.PrintStack() + return err + } + + // 遍历任务列表 + for _, s := range spiders { + // 添加到定时任务 + if err := g.AddJob(s); err != nil { + log.Errorf("add job error: %s, job: %s, cron: %s", err.Error(), s.Name, s.GitSyncFrequency) + debug.PrintStack() + return err + } + } + + return nil +} + +func (g *GitCronScheduler) AddJob(s model.Spider) error { + spec := s.GitSyncFrequency + + // 添加定时任务 + _, err := g.cron.AddFunc(spec, AddGitCronJob(s)) + if err != nil { + log.Errorf("add func task error: %s", err.Error()) + debug.PrintStack() + return err + } + + return nil +} + +func AddGitCronJob(s model.Spider) func() { + return func() { + if err := SyncSpiderGit(s); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return + } + } +} + +func UpdateGitCron() { + if err := GitCron.Update(); err != nil { + log.Errorf(err.Error()) + return + } +} + +func GetGitSshPublicKey() string { + if !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh")) || + !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa")) || + !utils.Exists(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa.pub")) { + cmd := exec.Command("ssh-keygen -q -t rsa -N \"\" -f $HOME/.ssh/id_rsa") + if err := cmd.Start(); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return "" + } + } + content, err := ioutil.ReadFile(path.Join(os.Getenv("HOME"), ".ssh", "id_rsa.pub")) + if err != nil { + return "" + } + return string(content) +} diff --git a/backend/services/spider.go b/backend/services/spider.go index a3ef2426..5b50bbf0 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -264,12 +264,13 @@ func RemoveSpider(id string) error { // 启动爬虫服务 func InitSpiderService() error { // 构造定时任务执行器 - c := cron.New(cron.WithSeconds()) - if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { + cPub := cron.New(cron.WithSeconds()) + if _, err := cPub.AddFunc("0 * * * * *", PublishAllSpiders); err != nil { return err } + // 启动定时任务 - c.Start() + cPub.Start() if model.IsMaster() { // 添加Demo爬虫 @@ -374,6 +375,16 @@ func InitSpiderService() error { // 发布所有爬虫 PublishAllSpiders() + + // 构造 Git 定时任务 + GitCron = &GitCronScheduler{ + cron: cron.New(cron.WithSeconds()), + } + + // 启动 Git 定时任务 + if err := GitCron.Start(); err != nil { + return err + } } return nil diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 7ca24bef..0b58fbbc 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -180,7 +180,9 @@ export default { this.$message.success(this.$t('Spider info has been saved successfully')) } await this.$store.dispatch('spider/getSpiderData', this.$route.params.id) - await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) + if (this.spiderForm.is_scrapy) { + await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id) + } }) this.$st.sendEv('爬虫详情', '概览', '保存') }, diff --git a/frontend/src/components/Settings/GitSettings.vue b/frontend/src/components/Settings/GitSettings.vue index a1557135..939c0a71 100644 --- a/frontend/src/components/Settings/GitSettings.vue +++ b/frontend/src/components/Settings/GitSettings.vue @@ -98,6 +98,34 @@ /> + + + {{spiderForm.git_sync_error}} + + + + + {{sshPublicKey}} + + + + {{$t('Copy')}} + + +
0) { - this.$set(this.spiderForm, 'git_branch', this.gitBranches[0]) + try { + const res = await this.$request.get('/git/branches', { url: this.spiderForm.git_url }) + this.gitBranches = res.data.data + if (!this.spiderForm.git_branch && this.gitBranches.length > 0) { + this.$set(this.spiderForm, 'git_branch', this.gitBranches[0]) + } + } finally { + this.isGitBranchesLoading = false } - this.isGitBranchesLoading = false }, async onSync () { this.isGitSyncLoading = true @@ -185,6 +217,7 @@ export default { } } finally { this.isGitSyncLoading = false + await this.$store.dispatch('spider/getSpiderData', this.$route.params.id) } }, onReset () { @@ -207,12 +240,24 @@ export default { this.isGitResetLoading = false } }) + }, + async getSshPublicKey () { + const res = await this.$request.get('/git/public-key') + this.sshPublicKey = res.data.data + }, + copySshPublicKey () { + const el = document.querySelector('#ssh-public-key') + el.focus() + el.setSelectionRange(0, this.sshPublicKey.length) + document.execCommand('copy') + this.$message.success(this.$t('SSH Public Key is copied to the clipboard')) } }, async created () { if (this.spiderForm.git_url) { this.onGitUrlChange() } + await this.getSshPublicKey() } } @@ -226,6 +271,39 @@ export default { width: 640px; } + .git-settings .git-settings-form >>> .el-alert { + padding: 0 5px; + margin: 0; + } + + .git-settings .git-settings-form >>> .el-alert__description { + padding: 0; + margin: 0; + font-size: 14px; + line-height: 24px; + } + + .git-settings .git-settings-form .copy { + display: inline; + line-height: 14px; + position: absolute; + top: 5px; + right: 5px; + cursor: pointer; + } + + .git-settings .git-settings-form .copy { + } + + #ssh-public-key { + position: absolute; + z-index: -1; + top: 0; + left: 0; + height: 0; + /*visibility: hidden;*/ + } + .git-settings .title { border-bottom: 1px solid #DCDFE6; padding-bottom: 15px; diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 2ece3075..2c938e47 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -77,6 +77,7 @@ export default { 'Auto Sync': '自动同步', 'Sync Frequency': '同步频率', 'Reset': '重置', + 'Copy': '复制', // 主页 'Total Tasks': '总任务数', @@ -208,6 +209,7 @@ export default { 'Git Username': 'Git 用户名', 'Git Password': 'Git 密码', 'Has Credential': '需要验证', + 'SSH Public Key': 'SSH 公钥', // 爬虫列表 'Name': '名称', @@ -542,6 +544,7 @@ docker run -d --restart always --name crawlab_worker \\ 'Git has been synchronized successfully': 'Git 已经成功同步', 'Git has been reset successfully': 'Git 已经成功重置', 'This would delete all files of the spider. Are you sure to continue?': '重置将删除该爬虫所有文件,您希望继续吗?', + 'SSH Public Key is copied to the clipboard': 'SSH 公钥已粘贴到剪切板', // 其他 'Star crawlab-team/crawlab on GitHub': '在 GitHub 上为 Crawlab 加星吧'