diff --git a/.github/workflows/dockerpush.yml b/.github/workflows/dockerpush.yml new file mode 100644 index 00000000..00de65ec --- /dev/null +++ b/.github/workflows/dockerpush.yml @@ -0,0 +1,53 @@ +name: Docker + +on: + push: + # Publish `master` as Docker `latest` image. + branches: + - master + - release + + # Publish `v1.2.3` tags as releases. + tags: + - v* + + # Run tests for any PRs. + pull_request: + +env: + IMAGE_NAME: tikazyq/crawlab + +jobs: + # Push image to GitHub Package Registry. + # See also https://docs.docker.com/docker-hub/builds/ + push: + runs-on: ubuntu-latest + if: github.event_name == 'push' + + steps: + - uses: actions/checkout@v2 + + - name: Build image + run: docker build . --file Dockerfile --tag tikazyq/crawlab + + - name: Log into registry + run: echo ${{ secrets.DOCKER_PASSWORD}} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin + + - name: Push image + run: | + IMAGE_ID=tikazyq/$IMAGE_NAME + + # Strip git ref prefix from version + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # Strip "v" prefix from tag name + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # Use Docker `latest` tag convention + [ "$VERSION" == "master" ] && VERSION=latest + + echo IMAGE_ID=$IMAGE_ID + echo VERSION=$VERSION + + docker tag image $IMAGE_ID:$VERSION + docker push $IMAGE_ID:$VERSION diff --git a/CHANGELOG-zh.md b/CHANGELOG-zh.md index 38738789..e4f87aeb 100644 --- a/CHANGELOG-zh.md +++ b/CHANGELOG-zh.md @@ -1,3 +1,13 @@ +# 0.4.7 (unknown) +### 功能 / 优化 +- **更好的支持 Scrapy**. 爬虫识别,`settings.py` 配置,日志级别选择,爬虫选择. [#435](https://github.com/crawlab-team/crawlab/issues/435) +- **Git 同步**. 允许用户将 Git 项目同步到 Crawlab. +- **长任务支持**. 用户可以添加长任务爬虫,这些爬虫可以跑长期运行的任务. [425](https://github.com/crawlab-team/crawlab/issues/425) +- **爬虫列表优化**. 分状态任务列数统计,任务列表详情弹出框,图例. [425](https://github.com/crawlab-team/crawlab/issues/425) +- **版本升级检测**. 检测最新版本,通知用户升级. + +### Bug 修复 + # 0.4.6 (2020-02-13) ### 功能 / 优化 - **Node.js SDK**. 用户可以将 SDK 应用到他们的 Node.js 爬虫中. diff --git a/CHANGELOG.md b/CHANGELOG.md index 707dd09d..5248c661 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +# 0.4.7 (unknown) +### Features / Enhancement +- **Better Support for Scrapy**. Spiders identification, `settings.py` configuration, log level selection, spider selection. [#435](https://github.com/crawlab-team/crawlab/issues/435) +- **Git Sync**. Allow users to sync git projects to Crawlab. +- **Long Task Support**. Users can add long-task spiders which is supposed to run without finishing. [#425](https://github.com/crawlab-team/crawlab/issues/425) +- **Spider List Optimization**. Tasks count by status, tasks detail popup, legend. [#425](https://github.com/crawlab-team/crawlab/issues/425) +- **Upgrade Check**. Check latest version and notifiy users to upgrade. + +### Bug Fixes + # 0.4.6 (2020-02-13) ### Features / Enhancement - **SDK for Node.js**. Users can apply SDK in their Node.js spiders. diff --git a/backend/entity/version.go b/backend/entity/version.go new file mode 100644 index 00000000..97a0278d --- /dev/null +++ b/backend/entity/version.go @@ -0,0 +1,23 @@ +package entity + +type Release struct { + Name string `json:"name"` + Draft bool `json:"draft"` + PreRelease bool `json:"pre_release"` + PublishedAt string `json:"published_at"` + Body string `json:"body"` +} + +type ReleaseSlices []Release + +func (r ReleaseSlices) Len() int { + return len(r) +} + +func (r ReleaseSlices) Less(i, j int) bool { + return r[i].PublishedAt < r[j].PublishedAt +} + +func (r ReleaseSlices) Swap(i, j int) { + r[i], r[j] = r[j], r[i] +} diff --git a/backend/main.go b/backend/main.go index 7e9d1369..6c00c797 100644 --- a/backend/main.go +++ b/backend/main.go @@ -133,7 +133,8 @@ func main() { anonymousGroup.PUT("/users", routes.PutUser) // 添加用户 anonymousGroup.GET("/setting", routes.GetSetting) // 获取配置信息 // release版本 - anonymousGroup.GET("/version", routes.GetVersion) // 获取发布的版本 + anonymousGroup.GET("/version", routes.GetVersion) // 获取发布的版本 + anonymousGroup.GET("/releases/latest", routes.GetLatestRelease) // 获取最近发布的版本 } authGroup := app.Group("/", middlewares.AuthorizationMiddleware()) { diff --git a/backend/model/spider.go b/backend/model/spider.go index 61fb53ec..a741fc89 100644 --- a/backend/model/spider.go +++ b/backend/model/spider.go @@ -55,10 +55,14 @@ type Spider struct { GitSyncFrequency string `json:"git_sync_frequency" bson:"git_sync_frequency"` // Git 同步频率 GitSyncError string `json:"git_sync_error" bson:"git_sync_error"` // Git 同步错误 + // 长任务 + IsLongTask bool `json:"is_long_task" bson:"is_long_task"` // 是否为长任务 + // 前端展示 - LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 - LastStatus string `json:"last_status"` // 最后执行状态 - Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置 + LastRunTs time.Time `json:"last_run_ts"` // 最后一次执行时间 + LastStatus string `json:"last_status"` // 最后执行状态 + Config entity.ConfigSpiderData `json:"config"` // 可配置爬虫配置 + LatestTasks []Task `json:"latest_tasks"` // 最近任务列表 // 时间 CreateTs time.Time `json:"create_ts" bson:"create_ts"` @@ -124,6 +128,18 @@ func (spider *Spider) GetLastTask() (Task, error) { return tasks[0], nil } +// 爬虫正在运行的任务 +func (spider *Spider) GetLatestTasks(latestN int) (tasks []Task, err error) { + tasks, err = GetTaskList(bson.M{"spider_id": spider.Id}, 0, latestN, "-create_ts") + if err != nil { + return tasks, err + } + if tasks == nil { + return tasks, err + } + return tasks, nil +} + // 删除爬虫 func (spider *Spider) Delete() error { s, c := database.GetCol("spiders") @@ -157,9 +173,18 @@ func GetSpiderList(filter interface{}, skip int, limit int, sortStr string) ([]S continue } + // 获取正在运行的爬虫 + latestTasks, err := spider.GetLatestTasks(50) // TODO: latestN 暂时写死,后面加入数据库 + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + continue + } + // 赋值 spiders[i].LastRunTs = task.CreateTs spiders[i].LastStatus = task.Status + spiders[i].LatestTasks = latestTasks } count, _ := c.Find(filter).Count() diff --git a/backend/routes/spider.go b/backend/routes/spider.go index 39314970..dc84f462 100644 --- a/backend/routes/spider.go +++ b/backend/routes/spider.go @@ -35,13 +35,23 @@ func GetSpiderList(c *gin.Context) { sortKey, _ := c.GetQuery("sort_key") sortDirection, _ := c.GetQuery("sort_direction") - // 筛选 + // 筛选-名称 filter := bson.M{ "name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}, } + + // 筛选-类型 if t != "" && t != "all" { filter["type"] = t } + + // 筛选-是否为长任务 + if t == "long-task" { + delete(filter, "type") + filter["is_long_task"] = true + } + + // 筛选-项目 if pid == "" { // do nothing } else if pid == constants.ObjectIdNull { diff --git a/backend/routes/version.go b/backend/routes/version.go new file mode 100644 index 00000000..ec3b80c7 --- /dev/null +++ b/backend/routes/version.go @@ -0,0 +1,20 @@ +package routes + +import ( + "crawlab/services" + "github.com/gin-gonic/gin" + "net/http" +) + +func GetLatestRelease(c *gin.Context) { + latestRelease, err := services.GetLatestRelease() + if err != nil { + HandleError(http.StatusInternalServerError, c, err) + return + } + c.JSON(http.StatusOK, Response{ + Status: "ok", + Message: "success", + Data: latestRelease, + }) +} diff --git a/backend/services/task.go b/backend/services/task.go index e940b325..76aeed83 100644 --- a/backend/services/task.go +++ b/backend/services/task.go @@ -350,10 +350,9 @@ func SaveTaskResultCount(id string) func() { func ExecuteTask(id int) { if flag, ok := LockList.Load(id); ok { if flag.(bool) { - log.Debugf(GetWorkerPrefix(id) + "正在执行任务...") + log.Debugf(GetWorkerPrefix(id) + "running tasks...") return } - } // 上锁 @@ -378,6 +377,7 @@ func ExecuteTask(id int) { // 节点队列 queueCur := "tasks:node:" + node.Id.Hex() + // 节点队列任务 var msg string if msg, err = database.RedisClient.LPop(queueCur); err != nil { @@ -387,6 +387,7 @@ func ExecuteTask(id int) { } } + // 如果没有获取到任务,返回 if msg == "" { return } @@ -504,6 +505,8 @@ func ExecuteTask(id int) { log.Errorf(GetWorkerPrefix(id) + err.Error()) return } + + // 统计数据 t.Status = constants.StatusFinished // 任务状态: 已完成 t.FinishTs = time.Now() // 结束时间 t.RuntimeDuration = t.FinishTs.Sub(t.StartTs).Seconds() // 运行时长 @@ -849,6 +852,14 @@ func SendNotifications(u model.User, t model.Task, s model.Spider) { } } +func UnlockLongTask(s model.Spider, n model.Node) { + if s.IsLongTask { + colName := "long-tasks" + key := fmt.Sprintf("%s:%s", s.Id.Hex(), n.Id.Hex()) + _ = database.RedisClient.HDel(colName, key) + } +} + func InitTaskExecutor() error { c := cron.New(cron.WithSeconds()) Exec = &Executor{ diff --git a/backend/services/version.go b/backend/services/version.go new file mode 100644 index 00000000..34df7b22 --- /dev/null +++ b/backend/services/version.go @@ -0,0 +1,29 @@ +package services + +import ( + "crawlab/entity" + "github.com/apex/log" + "github.com/imroc/req" + "runtime/debug" + "sort" +) + +func GetLatestRelease() (release entity.Release, err error) { + res, err := req.Get("https://api.github.com/repos/crawlab-team/crawlab/releases") + if err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return release, err + } + + var releaseDataList entity.ReleaseSlices + if err := res.ToJSON(&releaseDataList); err != nil { + log.Errorf(err.Error()) + debug.PrintStack() + return release, err + } + + sort.Sort(releaseDataList) + + return releaseDataList[len(releaseDataList)-1], nil +} diff --git a/frontend/src/App.vue b/frontend/src/App.vue index ec60d28b..93d41827 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -35,33 +35,6 @@ export default { }, methods: {}, async mounted () { - // window.setUseStats = (value) => { - // document.querySelector('.el-message__closeBtn').click() - // if (value === 1) { - // this.$st.sendPv('/allow_stats') - // this.$st.sendEv('全局', '允许/禁止统计', '允许') - // } else { - // this.$st.sendPv('/disallow_stats') - // this.$st.sendEv('全局', '允许/禁止统计', '禁止') - // } - // localStorage.setItem('useStats', value) - // } - - // first-time user - // if (this.useStats === undefined || this.useStats === null) { - // this.$message({ - // type: 'info', - // dangerouslyUseHTMLString: true, - // showClose: true, - // duration: 0, - // message: '

' + this.$t('Do you allow us to collect some statistics to improve Crawlab?') + '

' + - // '
' + - // '' + - // '' + - // '
' - // }) - // } - // set uid if first visit if (this.uid === undefined || this.uid === null) { localStorage.setItem('uid', this.$utils.encrypt.UUID()) @@ -71,6 +44,13 @@ export default { if (this.sid === undefined || this.sid === null) { sessionStorage.setItem('sid', this.$utils.encrypt.UUID()) } + + // get latest version + await this.$store.dispatch('version/getLatestRelease') + + // remove loading-placeholder + const elLoading = document.querySelector('#loading-placeholder') + elLoading.remove() } } diff --git a/frontend/src/components/Common/CrawlConfirmDialog.vue b/frontend/src/components/Common/CrawlConfirmDialog.vue index 6233d81c..1d5e2d7a 100644 --- a/frontend/src/components/Common/CrawlConfirmDialog.vue +++ b/frontend/src/components/Common/CrawlConfirmDialog.vue @@ -68,7 +68,7 @@ 我已阅读并同意 《免责声明》 所有内容 -
+
跳转到任务详情页
@@ -149,10 +149,15 @@ export default { this.$refs['form'].validate(async valid => { if (!valid) return + let param = this.form.param + if (this.spiderForm.type === 'customized' && this.spiderForm.is_scrapy) { + param = `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}` + } + const res = await this.$store.dispatch('spider/crawlSpider', { spiderId: this.spiderId, nodeIds: this.form.nodeIds, - param: `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}`, + param, runType: this.form.runType }) @@ -163,10 +168,12 @@ export default { this.$emit('close') this.$st.sendEv('爬虫确认', '确认运行', this.form.runType) - if (this.isRedirect) { + if (this.isRedirect && !this.spiderForm.is_long_task) { this.$router.push('/tasks/' + id) this.$st.sendEv('爬虫确认', '跳转到任务详情') } + + this.$emit('confirm') }) }, onClickDisclaimer () { diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index 0b58fbbc..02a22818 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -76,6 +76,14 @@ /> + + + + + diff --git a/frontend/src/components/Status/StatusLegend.vue b/frontend/src/components/Status/StatusLegend.vue new file mode 100644 index 00000000..7a4e443f --- /dev/null +++ b/frontend/src/components/Status/StatusLegend.vue @@ -0,0 +1,40 @@ + + + + + diff --git a/frontend/src/components/Status/StatusTag.vue b/frontend/src/components/Status/StatusTag.vue index 29f53fcd..273ba8e4 100644 --- a/frontend/src/components/Status/StatusTag.vue +++ b/frontend/src/components/Status/StatusTag.vue @@ -21,7 +21,8 @@ export default { running: { label: 'Running', type: 'warning' }, finished: { label: 'Finished', type: 'success' }, error: { label: 'Error', type: 'danger' }, - cancelled: { label: 'Cancelled', type: 'info' } + cancelled: { label: 'Cancelled', type: 'info' }, + abnormal: { label: 'Abnormal', type: 'danger' } } } }, @@ -43,6 +44,8 @@ export default { icon () { if (this.status === 'finished') { return 'el-icon-check' + } else if (this.status === 'pending') { + return 'el-icon-loading' } else if (this.status === 'running') { return 'el-icon-loading' } else if (this.status === 'error') { @@ -50,9 +53,10 @@ export default { } else if (this.status === 'cancelled') { return 'el-icon-video-pause' } else if (this.status === 'abnormal') { + return 'el-icon-warning' + } else { return 'el-icon-question' } - return '' } } } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 2c938e47..5c7e03e2 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -39,6 +39,7 @@ export default { Error: '错误', NA: '未知', Cancelled: '已取消', + Abnormal: '异常', // 操作 Add: '添加', @@ -78,6 +79,8 @@ export default { 'Sync Frequency': '同步频率', 'Reset': '重置', 'Copy': '复制', + 'Upgrade': '版本升级', + 'Ok': '确定', // 主页 'Total Tasks': '总任务数', @@ -210,6 +213,10 @@ export default { 'Git Password': 'Git 密码', 'Has Credential': '需要验证', 'SSH Public Key': 'SSH 公钥', + 'Is Long Task': '是否为长任务', + 'Long Task': '长任务', + 'Running Task Count': '运行中的任务数', + 'Running Tasks': '运行中的任务', // 爬虫列表 'Name': '名称', @@ -390,6 +397,9 @@ export default { 'New directory name': '新目录名称', 'Enter new file name': '输入新文件名称', 'New file name': '新文件名称', + 'Release Note': '发布记录', + 'How to Upgrade': '升级方式', + 'Release': '发布', // 登录 'Sign in': '登录', diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 595ec7b0..85270729 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -174,10 +174,13 @@ const actions = { link.remove() }, cancelTask ({ state, dispatch }, id) { - return request.post(`/tasks/${id}/cancel`) - .then(() => { - dispatch('getTaskData', id) - }) + return new Promise(resolve => { + request.post(`/tasks/${id}/cancel`) + .then(res => { + dispatch('getTaskData', id) + resolve(res) + }) + }) } } diff --git a/frontend/src/store/modules/version.js b/frontend/src/store/modules/version.js index 873f21bd..6fa9375a 100644 --- a/frontend/src/store/modules/version.js +++ b/frontend/src/store/modules/version.js @@ -1,5 +1,11 @@ +import request from '../../api/request' + const state = { - version: '' + version: '', + latestRelease: { + name: '', + body: '' + } } const getters = {} @@ -7,10 +13,20 @@ const getters = {} const mutations = { SET_VERSION: (state, value) => { state.version = value + }, + SET_LATEST_RELEASE: (state, value) => { + state.latestRelease = value } } -const actions = {} +const actions = { + async getLatestRelease ({ commit }) { + const res = await request.get('/releases/latest') + if (!res.data.error) { + commit('SET_LATEST_RELEASE', res.data.data) + } + } +} export default { namespaced: true, diff --git a/frontend/src/utils/scrapy.js b/frontend/src/utils/scrapy.js index e6028ca6..63681a78 100644 --- a/frontend/src/utils/scrapy.js +++ b/frontend/src/utils/scrapy.js @@ -198,6 +198,15 @@ export default { 'TELNETCONSOLE_HOST', 'TELNETCONSOLE_PASSWORD', 'TELNETCONSOLE_PORT', - 'TELNETCONSOLE_USERNAME' + 'TELNETCONSOLE_USERNAME', + 'REDIS_ITEMS_KEY', + 'REDIS_ITEMS_SERIALIZER', + 'REDIS_HOST', + 'REDIS_PORT', + 'REDIS_URL', + 'REDIS_PARAMS', + 'REDIS_START_URLS_AS_SET', + 'REDIS_START_URLS_KEY', + 'REDIS_ENCODING' ] } diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 10c98427..72555bd7 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -1,5 +1,27 @@