diff --git a/.gitignore b/.gitignore index c366ea24..622a9c22 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,6 @@ node_modules/ # .DS_Store .DS_Store + +.docks +.docs \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..8c08a065 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,32 @@ +# 0.2.3 (unreleased) +### Features / Enhancement +- **CLI**. Allow user to use command-line interface to execute Crawlab programs. + +# 0.2.2 (2019-05-30) +### Features / Enhancement +- **Automatic Extract Fields**: Automatically extracting data fields in list pages for configurable spider. +- **Download Results**: Allow downloading results as csv file. +- **Baidu Tongji**: Allow users to choose to report usage info to Baidu Tongji. + +### Bug Fixes +- **Results Page Pagination**: Fixes so the pagination of results page is working correctly. [#45](https://github.com/tikazyq/crawlab/issues/45) +- **Schedule Tasks Duplicated Triggers**: Set Flask DEBUG as False so that schedule tasks won't trigger twice. [#32](https://github.com/tikazyq/crawlab/issues/32) +- **Frontend Environment**: Added `VUE_APP_BASE_URL` as production mode environment variable so the API call won't be always `localhost` in deployed env [#30](https://github.com/tikazyq/crawlab/issues/30) + +# 0.2.1 (2019-05-27) +- **Configurable Spider**: Allow users to create a spider to crawl data without coding. + +# 0.2 (2019-05-10) + +- **Advanced Stats**: Advanced analytics in spider detail view. +- **Sites Data**: Added sites list (China) for users to check info such as robots.txt and home page response time/code. + +# 0.1.1 (2019-04-23) + +- **Basic Stats**: User can view basic stats such as number of failed tasks and number of results in spiders and tasks pages. +- **Near Realtime Task Info**: Periodically (5 sec) polling data from server to allow view task info in a near-realtime fashion. +- **Scheduled Tasks**: Allow users to set up cron-like scheduled/periodical tasks using apscheduler. + +# 0.1 (2019-04-17) + +- **Initial Release** diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 00000000..5ca0b085 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,59 @@ +pipeline { + agent { + node { + label 'crawlab' + } + } + + environment { + HOME = '/home/yeqing' + NODE_HOME = '/home/yeqing/.nvm/versions/node/v8.12.0' + ROOT_DIR = "/home/yeqing/jenkins_home/workspace/crawlab_${GIT_BRANCH}" + PYTHON_HOME = '/home/yeqing/.pyenv/shims' + } + + stages { + stage('Setup') { + steps { + echo "Running Setup..." + // sh '. /home/yeqing/.profile' + script { + if (env.GIT_BRANCH == 'develop') { + env.MODE = 'test' + } else if (env.GIT_BRANCH == 'master') { + env.MODE = 'production' + } else { + env.MODE = 'test' + } + } + } + } + stage('Build Frontend') { + steps { + echo "Building frontend..." + // sh "${NODE_HOME}/bin/node ${NODE_HOME}/bin/npm install -g yarn pm2 --registry=http://registry.npm.taobao.org/" + sh "cd ${ROOT_DIR}/frontend && ${NODE_HOME}/bin/node ${NODE_HOME}/bin/yarn install --registry=http://registry.npm.taobao.org/ --scripts-prepend-node-path=${NODE_HOME}/bin/node" + sh "cd ${ROOT_DIR}/frontend && ${NODE_HOME}/bin/node ${ROOT_DIR}/frontend/node_modules/.bin/vue-cli-service build --mode=${MODE}" + } + } + stage('Build Backend') { + steps { + echo "Building backend..." + sh "${PYTHON_HOME}/pip install -r ${ROOT_DIR}/crawlab/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple" + } + } + stage('Test') { + steps { + echo 'Testing..' + } + } + stage('Deploy') { + steps { + echo 'Deploying....' + sh "${NODE_HOME}/bin/node ${NODE_HOME}/bin/pm2 restart app" + sh "${NODE_HOME}/bin/node ${NODE_HOME}/bin/pm2 restart run_flower" + sh "${NODE_HOME}/bin/node ${NODE_HOME}/bin/pm2 restart run_worker" + } + } + } +} \ No newline at end of file diff --git a/README-zh.md b/README-zh.md index ab6cbda7..2f407f00 100644 --- a/README-zh.md +++ b/README-zh.md @@ -1,18 +1,20 @@ # Crawlab -![](https://img.shields.io/badge/版本-v0.2.1-blue.svg) +![](http://114.67.75.98:8081/buildStatus/icon?job=crawlab%2Fdevelop) +![](https://img.shields.io/badge/版本-v0.2.3-blue.svg) +中文 | [English](https://github.com/tikazyq/crawlab/blob/master/README.md) + 基于Celery的爬虫分布式爬虫管理平台,支持多种编程语言以及多种爬虫框架. -[查看演示 Demo](http://139.129.230.98:8080) - -[English Documentation](https://github.com/tikazyq/crawlab/blob/master/README.md) +[查看演示 Demo](http://114.67.75.98:8080) | [文档](https://tikazyq.github.io/crawlab) ## 要求 -- Python3 +- Python 3.6+ +- Node.js 8.12+ - MongoDB - Redis @@ -35,20 +37,7 @@ npm install ## 快速开始 ```bash -# 启动后端API -python app.py - -# 启动Flower服务 -python ./bin/run_flower.py - -# 启动worker -python ./bin/run_worker.py -``` - -```bash -# 运行前端 -cd frontend -npm run serve +python manage.py serve ``` ## 截图 @@ -177,6 +166,8 @@ Crawlab使用起来很方便,也很通用,可以适用于几乎任何主流 - [ ] 登录和用户管理 - [ ] 全局搜索 +## 社区 & 赞助 + 如果您觉得Crawlab对您的日常开发或公司有帮助,请加作者微信 tikazyq1 并注明"Crawlab",作者会将你拉入群。或者,您可以扫下方支付宝二维码给作者打赏去升级团队协作软件或买一杯咖啡。

diff --git a/README.md b/README.md index da51832e..4eb0e9ca 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,20 @@ # Crawlab -![](https://img.shields.io/badge/version-v0.2.1-blue.svg) +![](http://114.67.75.98:8081/buildStatus/icon?job=crawlab%2Fdevelop) +![](https://img.shields.io/badge/version-v0.2.3-blue.svg) - + +[中文](https://github.com/tikazyq/crawlab/blob/master/README-zh.md) | English + Celery-based web crawler admin platform for managing distributed web spiders regardless of languages and frameworks. -[Demo](http://139.129.230.98:8080) - -[中文文档](https://github.com/tikazyq/crawlab/blob/master/README-zh.md) +[Demo](http://114.67.75.98:8080) | [Documentation](https://tikazyq.github.io/crawlab) ## Pre-requisite -- Python3 +- Python 3.6+ +- Node.js 8.12+ - MongoDB - Redis @@ -35,20 +37,7 @@ Please edit configuration file `config.py` to configure api and database connect ## Quick Start ```bash -# Start backend API -python app.py - -# Start Flower service -python ./bin/run_flower.py - -# Start worker -python ./bin/run_worker.py -``` - -```bash -# run frontend client -cd frontend -npm run serve +python manage.py serve ``` ## Screenshot @@ -166,12 +155,14 @@ Crawlab is easy to use, general enough to adapt spiders in any language and any - [ ] More spider examples ##### Frontend -- [ ] Task Stats/Analytics -- [ ] Table Filters +- [x] Task Stats/Analytics +- [x] Table Filters - [x] Multi-Language Support (中文) - [ ] Login & User Management - [ ] General Search +## Community & Sponsorship + If you feel Crawlab could benefit your daily work or your company, please add the author's Wechat account noting "Crawlab" to enter the discussion group. Or you scan the Alipay QR code below to give us a reward to upgrade our teamwork software or buy a coffee.

diff --git a/crawlab/__init__.py b/crawlab/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/crawlab/bin/run_app.py b/crawlab/bin/run_app.py deleted file mode 100644 index 999d7278..00000000 --- a/crawlab/bin/run_app.py +++ /dev/null @@ -1,17 +0,0 @@ -import sys -import os - -# make sure the working directory is in system path -file_dir = os.path.dirname(os.path.realpath(__file__)) -root_path = os.path.abspath(os.path.join(file_dir, '..')) -sys.path.append(root_path) - -from config import PROJECT_LOGS_FOLDER, FLASK_HOST, FLASK_PORT -from manage import app - -# create folder if it does not exist -if not os.path.exists(PROJECT_LOGS_FOLDER): - os.makedirs(PROJECT_LOGS_FOLDER) - -# run app instance -app.run(host=FLASK_HOST, port=FLASK_PORT) diff --git a/crawlab/bin/start_flower.sh b/crawlab/bin/start_flower.sh deleted file mode 100755 index 98339f7d..00000000 --- a/crawlab/bin/start_flower.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env bash -celery flower --broker=mongodb://localhost:27017 diff --git a/crawlab/config/config.py b/crawlab/config/config.py index 4f0a85db..e235d400 100644 --- a/crawlab/config/config.py +++ b/crawlab/config/config.py @@ -51,5 +51,5 @@ MONGO_DB = 'crawlab_test' # Flask 变量 DEBUG = False -FLASK_HOST = '127.0.0.1' +FLASK_HOST = '0.0.0.0' FLASK_PORT = 8000 diff --git a/crawlab/bin/run_flower.py b/crawlab/flower.py similarity index 100% rename from crawlab/bin/run_flower.py rename to crawlab/flower.py diff --git a/crawlab/requirements.txt b/crawlab/requirements.txt index 0a1b7f13..e9559317 100644 --- a/crawlab/requirements.txt +++ b/crawlab/requirements.txt @@ -10,6 +10,7 @@ Babel==2.6.0 beautifulsoup4==4.7.1 billiard==3.6.0.0 bs4==0.0.1 +bson==0.5.8 cachetools==3.1.0 celery==4.3.0 certifi==2019.3.9 @@ -20,9 +21,11 @@ coloredlogs==10.0 constantly==15.1.0 cryptography==2.6.1 cssselect==1.0.3 +csvalidate==1.1.1 Flask==1.0.2 Flask-APScheduler==1.11.0 Flask-Cors==3.0.7 +Flask-CSV==1.2.0 Flask-RESTful==0.3.7 flask-restplus==0.12.1 flower==0.9.3 @@ -42,6 +45,7 @@ jsonschema==3.0.1 kombu==4.5.0 lxml==4.3.3 MarkupSafe==1.1.1 +marshmallow==2.19.2 mongoengine==0.17.0 multidict==4.5.2 parsel==1.5.1 diff --git a/crawlab/routes/tasks.py b/crawlab/routes/tasks.py index 5ae7648b..a318803b 100644 --- a/crawlab/routes/tasks.py +++ b/crawlab/routes/tasks.py @@ -1,6 +1,9 @@ import json import os import sys +from time import time + +from flask_csv import send_csv try: from _signal import SIGKILL @@ -178,7 +181,7 @@ class TaskApi(BaseApi): if not col_name: return [] fields = get_spider_col_fields(col_name) - items = db_manager.list(col_name, {'task_id': id}) + items = db_manager.list(col_name, {'task_id': id}, skip=page_size * (page_num - 1), limit=page_size) return { 'status': 'ok', 'fields': jsonify(fields), @@ -213,3 +216,16 @@ class TaskApi(BaseApi): 'id': id, 'status': 'ok', } + + def download_results(self, id: str): + task = db_manager.get('tasks', id=id) + spider = db_manager.get('spiders', id=task['spider_id']) + col_name = spider.get('col') + if not col_name: + return send_csv([], f'results_{col_name}_{round(time())}.csv') + items = db_manager.list(col_name, {'task_id': id}, limit=999999999) + fields = get_spider_col_fields(col_name, task_id=id, limit=999999999) + return send_csv(items, + filename=f'results_{col_name}_{round(time())}.csv', + fields=fields, + encoding='utf-8') diff --git a/crawlab/utils/spider.py b/crawlab/utils/spider.py index 9a2b48df..d8995028 100644 --- a/crawlab/utils/spider.py +++ b/crawlab/utils/spider.py @@ -41,12 +41,17 @@ def get_spider_type(path: str) -> SpiderType: return SpiderType.SCRAPY -def get_spider_col_fields(col_name: str) -> list: +def get_spider_col_fields(col_name: str, task_id: str = None, limit: int = 100) -> list: """ Get spider collection fields :param col_name: collection name + :param task_id: task_id + :param limit: limit """ - items = db_manager.list(col_name, {}, limit=100, sort_key='_id') + filter_ = {} + if task_id is not None: + filter_['task_id'] = task_id + items = db_manager.list(col_name, filter_, limit=limit, sort_key='_id') fields = set() for item in items: for k in item.keys(): diff --git a/crawlab/bin/run_worker.py b/crawlab/worker.py similarity index 100% rename from crawlab/bin/run_worker.py rename to crawlab/worker.py diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 9cbf3ccd..00000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/_book/Architecture/App.html b/docs/Architecture/App.html similarity index 88% rename from docs/_book/Architecture/App.html rename to docs/Architecture/App.html index 9d763304..56e27371 100644 --- a/docs/_book/Architecture/App.html +++ b/docs/Architecture/App.html @@ -397,7 +397,7 @@ @@ -407,10 +407,6 @@ - - - - diff --git a/docs/_book/Architecture/Celery.html b/docs/Architecture/Celery.html similarity index 88% rename from docs/_book/Architecture/Celery.html rename to docs/Architecture/Celery.html index 38ef5b59..2f2bea93 100644 --- a/docs/_book/Architecture/Celery.html +++ b/docs/Architecture/Celery.html @@ -397,7 +397,7 @@ @@ -407,10 +407,6 @@ - - - - diff --git a/docs/_book/Architecture/index.html b/docs/Architecture/index.html similarity index 87% rename from docs/_book/Architecture/index.html rename to docs/Architecture/index.html index de925ce3..c271a5db 100644 --- a/docs/_book/Architecture/index.html +++ b/docs/Architecture/index.html @@ -397,7 +397,7 @@ @@ -407,10 +407,6 @@ - - - - diff --git a/docs/_book/Concept/Deploy.html b/docs/Concept/Deploy.html similarity index 88% rename from docs/_book/Concept/Deploy.html rename to docs/Concept/Deploy.html index 1026cf5a..f788cb78 100644 --- a/docs/_book/Concept/Deploy.html +++ b/docs/Concept/Deploy.html @@ -399,7 +399,7 @@ @@ -409,10 +409,6 @@ - - - - diff --git a/docs/_book/Concept/Node.html b/docs/Concept/Node.html similarity index 89% rename from docs/_book/Concept/Node.html rename to docs/Concept/Node.html index 80428986..4a40be7c 100644 --- a/docs/_book/Concept/Node.html +++ b/docs/Concept/Node.html @@ -398,7 +398,7 @@ @@ -408,10 +408,6 @@ - - - - diff --git a/docs/_book/Concept/Spider.html b/docs/Concept/Spider.html similarity index 89% rename from docs/_book/Concept/Spider.html rename to docs/Concept/Spider.html index 3021513d..1bfba7ec 100644 --- a/docs/_book/Concept/Spider.html +++ b/docs/Concept/Spider.html @@ -403,7 +403,7 @@ @@ -413,10 +413,6 @@ - - - - diff --git a/docs/_book/Concept/Task.html b/docs/Concept/Task.html similarity index 88% rename from docs/_book/Concept/Task.html rename to docs/Concept/Task.html index 6b2584cd..d31824cc 100644 --- a/docs/_book/Concept/Task.html +++ b/docs/Concept/Task.html @@ -398,7 +398,7 @@ @@ -408,10 +408,6 @@ - - - - diff --git a/docs/_book/Concept/index.html b/docs/Concept/index.html similarity index 88% rename from docs/_book/Concept/index.html rename to docs/Concept/index.html index 9af87425..92e359c1 100644 --- a/docs/_book/Concept/index.html +++ b/docs/Concept/index.html @@ -397,7 +397,7 @@ @@ -407,10 +407,6 @@ - - - - diff --git a/docs/_book/Examples/index.html b/docs/Examples/index.html similarity index 87% rename from docs/_book/Examples/index.html rename to docs/Examples/index.html index f936aeaa..af2ddff6 100644 --- a/docs/_book/Examples/index.html +++ b/docs/Examples/index.html @@ -397,7 +397,7 @@ @@ -407,10 +407,6 @@ - - - - diff --git a/docs/Functions/FunctionList.md b/docs/Functions/FunctionList.md new file mode 100644 index 00000000..636126fa --- /dev/null +++ b/docs/Functions/FunctionList.md @@ -0,0 +1,61 @@ +# 功能列表 + +类别 | 功能名称 | 已统计 | 备注 +--- | --- | --- | --- +全局 | 打开页面 | Y | _trackPageview +全局 | 切换中英文 | Y +全局 | 允许/禁止统计 | Y +节点 | 刷新 | Y +节点 | 查看 | Y +节点 | 删除 | Y +节点详情 | 保存 | Y +节点详情 | 切换节点 | Y +爬虫 | 部署所有爬虫 | Y +爬虫 | 导入爬虫 | Y +爬虫 | 添加爬虫-可配置爬虫 | Y +爬虫 | 添加爬虫-自定义爬虫 | Y +爬虫 | 刷新 | Y +爬虫 | 查看 | Y +爬虫 | 删除 | Y +爬虫 | 部署 | Y +爬虫 | 运行 | Y +爬虫 | 搜索网站 | Y +爬虫详情 | 切换爬虫 | Y +爬虫详情 | 切换标签 | Y +爬虫详情-概览 | 保存 | Y +爬虫详情-概览 | 部署 | Y +爬虫详情-概览 | 运行 | Y +爬虫详情-环境 | 添加 | Y +爬虫详情-环境 | 删除 | Y +爬虫详情-环境 | 保存 | Y +爬虫详情-配置 | 保存 | Y +爬虫详情-配置 | 预览 | Y +爬虫详情-配置 | 提取字段 | Y +爬虫详情-配置 | 运行 | Y +爬虫详情-配置 | 添加字段 | Y +爬虫详情-配置 | 更改字段 | Y +爬虫详情-配置 | 删除字段 | Y +爬虫详情-配置 | 设置详情页URL | Y +任务 | 选择节点 | Y +任务 | 选择爬虫 | Y +任务 | 点击爬虫详情 | Y +任务 | 点击节点详情 | Y +任务 | 搜索 | Y +任务 | 查看 | Y +任务 | 删除 | Y +任务详情 | 切换标签 | Y +任务详情-概览 | 点击爬虫详情 | Y +任务详情-概览 | 点击节点详情 | Y +任务详情-结果 | 下载CSV | Y +定时任务 | 添加 | Y +定时任务 | 修改 | Y +定时任务 | 删除 | Y +定时任务 | 提交 | Y +部署 | 刷新 | Y +网站 | 搜索 | Y +网站 | 选择主类别 | Y +网站 | 选择类别 | Y +网站 | 点击域名 | Y +网站 | 点击爬虫数 | Y +网站 | 点击Robots协议 | N + diff --git a/docs/_book/QuickStart/Installation.html b/docs/QuickStart/Installation.html similarity index 88% rename from docs/_book/QuickStart/Installation.html rename to docs/QuickStart/Installation.html index 49bad4b8..3e23c4a9 100644 --- a/docs/_book/QuickStart/Installation.html +++ b/docs/QuickStart/Installation.html @@ -408,7 +408,7 @@ npm install @@ -418,10 +418,6 @@ npm install - - - - diff --git a/docs/_book/QuickStart/Run.html b/docs/QuickStart/Run.html similarity index 91% rename from docs/_book/QuickStart/Run.html rename to docs/QuickStart/Run.html index 6018fc53..c8beac8f 100644 --- a/docs/_book/QuickStart/Run.html +++ b/docs/QuickStart/Run.html @@ -440,7 +440,7 @@ npm run serve @@ -450,10 +450,6 @@ npm run serve - - - - diff --git a/docs/_book/QuickStart/index.html b/docs/QuickStart/index.html similarity index 88% rename from docs/_book/QuickStart/index.html rename to docs/QuickStart/index.html index 9691dcad..0885c354 100644 --- a/docs/_book/QuickStart/index.html +++ b/docs/QuickStart/index.html @@ -401,7 +401,7 @@ @@ -411,10 +411,6 @@ - - - - diff --git a/docs/_book/img/crawlab-architecture 2.png b/docs/_book/img/crawlab-architecture 2.png deleted file mode 100644 index fcac460f..00000000 Binary files a/docs/_book/img/crawlab-architecture 2.png and /dev/null differ diff --git a/docs/_book/img/screenshot-home.png b/docs/_book/img/screenshot-home.png deleted file mode 100644 index 650dca47..00000000 Binary files a/docs/_book/img/screenshot-home.png and /dev/null differ diff --git a/docs/_book/img/screenshot-node-detail.png b/docs/_book/img/screenshot-node-detail.png deleted file mode 100644 index 3d323172..00000000 Binary files a/docs/_book/img/screenshot-node-detail.png and /dev/null differ diff --git a/docs/_book/img/screenshot-nodes.png b/docs/_book/img/screenshot-nodes.png deleted file mode 100644 index 88fc7489..00000000 Binary files a/docs/_book/img/screenshot-nodes.png and /dev/null differ diff --git a/docs/_book/img/screenshot-spider-detail-overview.png b/docs/_book/img/screenshot-spider-detail-overview.png deleted file mode 100644 index 8745c451..00000000 Binary files a/docs/_book/img/screenshot-spider-detail-overview.png and /dev/null differ diff --git a/docs/_book/img/screenshot-spider-import.png b/docs/_book/img/screenshot-spider-import.png deleted file mode 100644 index d2ca7c17..00000000 Binary files a/docs/_book/img/screenshot-spider-import.png and /dev/null differ diff --git a/docs/_book/img/screenshot-spiders.png b/docs/_book/img/screenshot-spiders.png deleted file mode 100644 index b23310d7..00000000 Binary files a/docs/_book/img/screenshot-spiders.png and /dev/null differ diff --git a/docs/_book/img/screenshot-task-detail-log.png b/docs/_book/img/screenshot-task-detail-log.png deleted file mode 100644 index 7e3ee387..00000000 Binary files a/docs/_book/img/screenshot-task-detail-log.png and /dev/null differ diff --git a/docs/_book/img/screenshot-task-detail-overview.png b/docs/_book/img/screenshot-task-detail-overview.png deleted file mode 100644 index fbb339e8..00000000 Binary files a/docs/_book/img/screenshot-task-detail-overview.png and /dev/null differ diff --git a/docs/_book/img/screenshot-task-detail-results.png b/docs/_book/img/screenshot-task-detail-results.png deleted file mode 100644 index 8623fb33..00000000 Binary files a/docs/_book/img/screenshot-task-detail-results.png and /dev/null differ diff --git a/docs/_book/img/screenshot-tasks.png b/docs/_book/img/screenshot-tasks.png deleted file mode 100644 index ab5585da..00000000 Binary files a/docs/_book/img/screenshot-tasks.png and /dev/null differ diff --git a/docs/_book/gitbook/fonts/fontawesome/FontAwesome.otf b/docs/gitbook/fonts/fontawesome/FontAwesome.otf similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/FontAwesome.otf rename to docs/gitbook/fonts/fontawesome/FontAwesome.otf diff --git a/docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.eot b/docs/gitbook/fonts/fontawesome/fontawesome-webfont.eot similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.eot rename to docs/gitbook/fonts/fontawesome/fontawesome-webfont.eot diff --git a/docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.svg b/docs/gitbook/fonts/fontawesome/fontawesome-webfont.svg similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.svg rename to docs/gitbook/fonts/fontawesome/fontawesome-webfont.svg diff --git a/docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.ttf b/docs/gitbook/fonts/fontawesome/fontawesome-webfont.ttf similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.ttf rename to docs/gitbook/fonts/fontawesome/fontawesome-webfont.ttf diff --git a/docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.woff b/docs/gitbook/fonts/fontawesome/fontawesome-webfont.woff similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.woff rename to docs/gitbook/fonts/fontawesome/fontawesome-webfont.woff diff --git a/docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.woff2 b/docs/gitbook/fonts/fontawesome/fontawesome-webfont.woff2 similarity index 100% rename from docs/_book/gitbook/fonts/fontawesome/fontawesome-webfont.woff2 rename to docs/gitbook/fonts/fontawesome/fontawesome-webfont.woff2 diff --git a/docs/_book/gitbook/gitbook-plugin-fontsettings/fontsettings.js b/docs/gitbook/gitbook-plugin-fontsettings/fontsettings.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-fontsettings/fontsettings.js rename to docs/gitbook/gitbook-plugin-fontsettings/fontsettings.js diff --git a/docs/_book/gitbook/gitbook-plugin-fontsettings/website.css b/docs/gitbook/gitbook-plugin-fontsettings/website.css similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-fontsettings/website.css rename to docs/gitbook/gitbook-plugin-fontsettings/website.css diff --git a/docs/_book/gitbook/gitbook-plugin-highlight/ebook.css b/docs/gitbook/gitbook-plugin-highlight/ebook.css similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-highlight/ebook.css rename to docs/gitbook/gitbook-plugin-highlight/ebook.css diff --git a/docs/_book/gitbook/gitbook-plugin-highlight/website.css b/docs/gitbook/gitbook-plugin-highlight/website.css similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-highlight/website.css rename to docs/gitbook/gitbook-plugin-highlight/website.css diff --git a/docs/_book/gitbook/gitbook-plugin-lunr/lunr.min.js b/docs/gitbook/gitbook-plugin-lunr/lunr.min.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-lunr/lunr.min.js rename to docs/gitbook/gitbook-plugin-lunr/lunr.min.js diff --git a/docs/_book/gitbook/gitbook-plugin-lunr/search-lunr.js b/docs/gitbook/gitbook-plugin-lunr/search-lunr.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-lunr/search-lunr.js rename to docs/gitbook/gitbook-plugin-lunr/search-lunr.js diff --git a/docs/_book/gitbook/gitbook-plugin-search/lunr.min.js b/docs/gitbook/gitbook-plugin-search/lunr.min.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-search/lunr.min.js rename to docs/gitbook/gitbook-plugin-search/lunr.min.js diff --git a/docs/_book/gitbook/gitbook-plugin-search/search-engine.js b/docs/gitbook/gitbook-plugin-search/search-engine.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-search/search-engine.js rename to docs/gitbook/gitbook-plugin-search/search-engine.js diff --git a/docs/_book/gitbook/gitbook-plugin-search/search.css b/docs/gitbook/gitbook-plugin-search/search.css similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-search/search.css rename to docs/gitbook/gitbook-plugin-search/search.css diff --git a/docs/_book/gitbook/gitbook-plugin-search/search.js b/docs/gitbook/gitbook-plugin-search/search.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-search/search.js rename to docs/gitbook/gitbook-plugin-search/search.js diff --git a/docs/_book/gitbook/gitbook-plugin-sharing/buttons.js b/docs/gitbook/gitbook-plugin-sharing/buttons.js similarity index 100% rename from docs/_book/gitbook/gitbook-plugin-sharing/buttons.js rename to docs/gitbook/gitbook-plugin-sharing/buttons.js diff --git a/docs/_book/gitbook/gitbook.js b/docs/gitbook/gitbook.js similarity index 100% rename from docs/_book/gitbook/gitbook.js rename to docs/gitbook/gitbook.js diff --git a/docs/_book/gitbook/images/apple-touch-icon-precomposed-152.png b/docs/gitbook/images/apple-touch-icon-precomposed-152.png similarity index 100% rename from docs/_book/gitbook/images/apple-touch-icon-precomposed-152.png rename to docs/gitbook/images/apple-touch-icon-precomposed-152.png diff --git a/docs/_book/gitbook/images/favicon.ico b/docs/gitbook/images/favicon.ico similarity index 100% rename from docs/_book/gitbook/images/favicon.ico rename to docs/gitbook/images/favicon.ico diff --git a/docs/_book/gitbook/style.css b/docs/gitbook/style.css similarity index 100% rename from docs/_book/gitbook/style.css rename to docs/gitbook/style.css diff --git a/docs/_book/gitbook/theme.js b/docs/gitbook/theme.js similarity index 100% rename from docs/_book/gitbook/theme.js rename to docs/gitbook/theme.js diff --git a/docs/_book/index.html b/docs/index.html similarity index 94% rename from docs/_book/index.html rename to docs/index.html index 02bbe10d..c9f4837f 100644 --- a/docs/_book/index.html +++ b/docs/index.html @@ -548,7 +548,7 @@ MONGO_DB = 'crawlab_test' @@ -558,10 +558,6 @@ MONGO_DB = 'crawlab_test' - - - - diff --git a/docs/_book/search_index.json b/docs/search_index.json similarity index 100% rename from docs/_book/search_index.json rename to docs/search_index.json diff --git a/frontend/.env.production b/frontend/.env.production index a8b89254..58fe7f33 100644 --- a/frontend/.env.production +++ b/frontend/.env.production @@ -1,2 +1,2 @@ NODE_ENV='production' -VUE_APP_BASE_URL=http://crawlab.cn:8000/api +VUE_APP_BASE_URL='http://114.67.75.98:8000/api' diff --git a/frontend/.env.test b/frontend/.env.test new file mode 100644 index 00000000..acff44de --- /dev/null +++ b/frontend/.env.test @@ -0,0 +1,2 @@ +NODE_ENV='test' +VUE_APP_BASE_URL='http://114.67.75.98:8000/api' diff --git a/frontend/package.json b/frontend/package.json index 701463a3..e751c9f9 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "crawlab", - "version": "0.2.1", + "version": "0.2.3", "private": true, "scripts": { "serve": "cross-env NODE_ENV=development vue-cli-service serve --ip=0.0.0.0", diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 975a627f..06a41cce 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -29,9 +29,11 @@ export default { localStorage.setItem('useStats', value) document.querySelector('.el-message__closeBtn').click() if (value === 1) { - window._hmt.push(['_trackPageview', '/allow_stats']) + this.$st.sendPv('/allow_stats') + this.$st.sendEv('全局', '允许/禁止统计', 'value', 'allow') } else { - window._hmt.push(['_trackPageview', '/disallow_stats']) + this.$st.sendPv('/disallow_stats') + this.$st.sendEv('全局', '允许/禁止统计', 'value', 'disallow') } } diff --git a/frontend/src/api/request.js b/frontend/src/api/request.js index 4aaa0c04..1bc4d57e 100644 --- a/frontend/src/api/request.js +++ b/frontend/src/api/request.js @@ -1,6 +1,6 @@ import axios from 'axios' -let baseUrl = process.env.VUE_APP_API_BASE_URL ? process.env.VUE_APP_API_BASE_URL : 'http://localhost:8000/api' +let baseUrl = process.env.VUE_APP_BASE_URL ? process.env.VUE_APP_BASE_URL : 'http://localhost:8000/api' // console.log(process.env) // const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api' diff --git a/frontend/src/components/Config/ConfigList.vue b/frontend/src/components/Config/ConfigList.vue index f147cdc0..7bef92d5 100644 --- a/frontend/src/components/Config/ConfigList.vue +++ b/frontend/src/components/Config/ConfigList.vue @@ -163,6 +163,7 @@ export default { this.spiderForm.crawl_type = value }, onSave () { + this.$st.sendEv('爬虫详情-配置', '保存') return new Promise((resolve, reject) => { this.saveLoading = true this.$store.dispatch('spider/updateSpiderFields') @@ -207,6 +208,7 @@ export default { .finally(() => { this.previewLoading = false }) + this.$st.sendEv('爬虫详情-配置', '预览') }) }, onCrawl () { @@ -219,6 +221,7 @@ export default { .then(() => { this.$message.success(this.$t(`Spider task has been scheduled`)) }) + this.$st.sendEv('爬虫详情-配置', '运行') }) }, onExtractFields () { @@ -239,6 +242,7 @@ export default { .finally(() => { this.extractFieldsLoading = false }) + this.$st.sendEv('爬虫详情-配置', '提取字段') }) }, renderHeader (h, { column }) { diff --git a/frontend/src/components/Environment/EnvironmentList.vue b/frontend/src/components/Environment/EnvironmentList.vue index 9e0b1413..a46432c0 100644 --- a/frontend/src/components/Environment/EnvironmentList.vue +++ b/frontend/src/components/Environment/EnvironmentList.vue @@ -49,10 +49,11 @@ export default { name: '', value: '' }) - console.log(this.spiderForm) + this.$st.sendEv('爬虫详情-环境', '添加') }, deleteEnv (index) { this.spiderForm.envs.splice(index, 1) + this.$st.sendEv('爬虫详情-环境', '删除') }, save () { this.$store.dispatch('spider/updateSpiderEnvs') @@ -62,6 +63,7 @@ export default { .catch(error => { this.$message.error(error) }) + this.$st.sendEv('爬虫详情-环境', '保存') } } } diff --git a/frontend/src/components/InfoView/NodeInfoView.vue b/frontend/src/components/InfoView/NodeInfoView.vue index 69a3601a..bc6e687a 100644 --- a/frontend/src/components/InfoView/NodeInfoView.vue +++ b/frontend/src/components/InfoView/NodeInfoView.vue @@ -55,6 +55,7 @@ export default { }) } }) + this.$st.sendEv('节点详情', '保存') } } } diff --git a/frontend/src/components/InfoView/SpiderInfoView.vue b/frontend/src/components/InfoView/SpiderInfoView.vue index e896bbc0..d095f81a 100644 --- a/frontend/src/components/InfoView/SpiderInfoView.vue +++ b/frontend/src/components/InfoView/SpiderInfoView.vue @@ -127,6 +127,7 @@ export default { .then(() => { this.$message.success(this.$t(`Spider task has been scheduled`)) }) + this.$st.sendEv('爬虫详情-概览', '运行') }) } }) @@ -149,6 +150,7 @@ export default { .then(() => { this.$message.success(this.$t(`Spider has been deployed`)) }) + this.$st.sendEv('爬虫详情-概览', '部署') }) } }) @@ -165,6 +167,7 @@ export default { }) } }) + this.$st.sendEv('爬虫详情-概览', '保存') }, fetchSiteSuggestions (keyword, callback) { this.$request.get('/sites', { diff --git a/frontend/src/components/Overview/TaskOverview.vue b/frontend/src/components/Overview/TaskOverview.vue index d306bb41..603f28ef 100644 --- a/frontend/src/components/Overview/TaskOverview.vue +++ b/frontend/src/components/Overview/TaskOverview.vue @@ -52,9 +52,11 @@ export default { methods: { onClickNodeTitle () { this.$router.push(`/nodes/${this.nodeForm._id}`) + this.$st.sendEv('任务详情-概览', '点击节点详情') }, onClickSpiderTitle () { this.$router.push(`/spiders/${this.spiderForm._id}`) + this.$st.sendEv('任务详情-概览', '点击爬虫详情') } }, created () { diff --git a/frontend/src/components/TableView/FieldsTableView.vue b/frontend/src/components/TableView/FieldsTableView.vue index ab2376f7..836a8f49 100644 --- a/frontend/src/components/TableView/FieldsTableView.vue +++ b/frontend/src/components/TableView/FieldsTableView.vue @@ -109,14 +109,17 @@ export default { type: 'css', extract_type: 'text' }) + this.$st.sendEv('爬虫详情-配置', '添加字段') }, deleteField (index) { this.fields.splice(index, 1) + this.$st.sendEv('爬虫详情-配置', '删除字段') }, onNameChange (row) { if (this.fields.filter(d => d.name === row.name).length > 1) { this.$message.error(this.$t(`Duplicated field names for ${row.name}`)) } + this.$st.sendEv('爬虫详情-配置', '更改字段') }, onCheck (row) { this.fields.forEach(d => { @@ -124,6 +127,7 @@ export default { this.$set(d, 'is_detail', false) } }) + this.$st.sendEv('爬虫详情-配置', '设置详情页URL') } } } diff --git a/frontend/src/components/TableView/GeneralTableView.vue b/frontend/src/components/TableView/GeneralTableView.vue index 97bd3b38..25cd1923 100644 --- a/frontend/src/components/TableView/GeneralTableView.vue +++ b/frontend/src/components/TableView/GeneralTableView.vue @@ -58,18 +58,18 @@ export default { computed: { filteredData () { return this.data - .map(d => d) - .filter((d, index) => { - // pagination - const pageNum = this.pageNum - const pageSize = this.pageSize - return (pageSize * (pageNum - 1) <= index) && (index < pageSize * pageNum) - }) + // .map(d => d) + // .filter((d, index) => { + // // pagination + // const pageNum = this.pageNum + // const pageSize = this.pageSize + // return (pageSize * (pageNum - 1) <= index) && (index < pageSize * pageNum) + // }) } }, methods: { onPageChange () { - this.$emit('page-change') + this.$emit('page-change', { pageNum: this.pageNum, pageSize: this.pageSize }) } } } diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index 35d7eaf8..18813c3a 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -56,6 +56,8 @@ export default { 'Stop': '停止', 'Preview': '预览', 'Extract Fields': '提取字段', + 'Download': '下载', + 'Download CSV': '下载CSV', // 主页 'Total Tasks': '总任务数', diff --git a/frontend/src/main.js b/frontend/src/main.js index 721f0f63..a3008fae 100644 --- a/frontend/src/main.js +++ b/frontend/src/main.js @@ -12,8 +12,6 @@ import 'font-awesome/scss/font-awesome.scss'// FontAwesome import 'codemirror/lib/codemirror.css' -// import ba from 'vue-ba' - import App from './App' import store from './store' import router from './router' @@ -23,6 +21,7 @@ import '@/permission' // permission control import request from './api/request' import i18n from './i18n' +import utils from './utils' Vue.use(ElementUI, { locale }) @@ -45,6 +44,12 @@ window._hmt = window._hmt || []; // inject request api Vue.prototype.$request = request +// inject utils +Vue.prototype.$utils = utils + +// inject stats +Vue.prototype.$st = utils.stats + const app = new Vue({ el: '#app', i18n, diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index 4eddc102..0bfae74f 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -1,6 +1,8 @@ import Vue from 'vue' import Router from 'vue-router' +import stats from '../utils/stats' + /* Layout */ import Layout from '../views/layout/Layout' @@ -224,9 +226,7 @@ router.beforeEach((to, from, next) => { router.afterEach((to, from, next) => { if (to.path) { - if (localStorage.getItem('useStats') !== '0') { - window._hmt.push(['_trackPageview', to.path]) - } + stats.sendPv(to.path) } }) diff --git a/frontend/src/store/modules/task.js b/frontend/src/store/modules/task.js index 02a23841..906f9f69 100644 --- a/frontend/src/store/modules/task.js +++ b/frontend/src/store/modules/task.js @@ -18,7 +18,7 @@ const state = { pageNum: 0, pageSize: 10, // results - resultsPageNum: 0, + resultsPageNum: 1, resultsPageSize: 10 } diff --git a/frontend/src/utils/index.js b/frontend/src/utils/index.js new file mode 100644 index 00000000..d5a118af --- /dev/null +++ b/frontend/src/utils/index.js @@ -0,0 +1,5 @@ +import stats from './stats' + +export default { + stats +} diff --git a/frontend/src/utils/stats.js b/frontend/src/utils/stats.js new file mode 100644 index 00000000..f2fa0186 --- /dev/null +++ b/frontend/src/utils/stats.js @@ -0,0 +1,12 @@ +export default { + sendPv (page) { + if (localStorage.getItem('useStats') !== '0') { + window._hmt.push(['_trackPageview', page]) + } + }, + sendEv (category, eventName, optLabel, optValue) { + if (localStorage.getItem('useStats') !== '0') { + window._hmt.push(['_trackEvent', category, eventName, optLabel, optValue]) + } + } +} diff --git a/frontend/src/views/deploy/DeployList.vue b/frontend/src/views/deploy/DeployList.vue index 5cc6d118..16c8d319 100644 --- a/frontend/src/views/deploy/DeployList.vue +++ b/frontend/src/views/deploy/DeployList.vue @@ -134,6 +134,7 @@ export default { }, onRefresh () { this.$store.dispatch('deploy/getDeployList') + this.$st.sendEv('部署', '刷新') }, onView (row) { this.$router.push(`/deploys/${row._id}`) diff --git a/frontend/src/views/layout/components/Navbar.vue b/frontend/src/views/layout/components/Navbar.vue index 5b1f97f8..eae27a91 100644 --- a/frontend/src/views/layout/components/Navbar.vue +++ b/frontend/src/views/layout/components/Navbar.vue @@ -57,6 +57,8 @@ export default { window.localStorage.setItem('lang', lang) this.$i18n.locale = lang this.$store.commit('lang/SET_LANG', lang) + + this.$st.sendEv('全局', '切换中英文', 'lang', lang) } } } diff --git a/frontend/src/views/node/NodeDetail.vue b/frontend/src/views/node/NodeDetail.vue index e95eb472..7de7a677 100644 --- a/frontend/src/views/node/NodeDetail.vue +++ b/frontend/src/views/node/NodeDetail.vue @@ -47,6 +47,7 @@ export default { }, onNodeChange (id) { this.$router.push(`/nodes/${id}`) + this.$st.sendEv('节点详情', '切换节点') } }, created () { diff --git a/frontend/src/views/node/NodeList.vue b/frontend/src/views/node/NodeList.vue index 35dc7d18..be916f44 100644 --- a/frontend/src/views/node/NodeList.vue +++ b/frontend/src/views/node/NodeList.vue @@ -123,8 +123,7 @@ export default { } }, methods: { - onSearch (value) { - console.log(value) + onSearch () { }, onAdd () { this.$store.commit('node/SET_NODE_FORM', []) @@ -133,6 +132,7 @@ export default { }, onRefresh () { this.$store.dispatch('node/getNodeList') + this.$st.sendEv('节点', '刷新') }, onSubmit () { const vm = this @@ -159,7 +159,6 @@ export default { this.dialogVisible = false }, onEdit (row) { - console.log(row) this.isEditMode = true this.$store.commit('node/SET_NODE_FORM', row) this.dialogVisible = true @@ -177,10 +176,13 @@ export default { message: 'Deleted successfully' }) }) + this.$st.sendEv('节点', '删除', 'id', row._id) }) }, onView (row) { this.$router.push(`/nodes/${row._id}`) + + this.$st.sendEv('节点', '查看', 'id', row._id) }, onPageChange () { this.$store.dispatch('node/getNodeList') diff --git a/frontend/src/views/schedule/ScheduleList.vue b/frontend/src/views/schedule/ScheduleList.vue index e9fe0bce..7a1fc9ba 100644 --- a/frontend/src/views/schedule/ScheduleList.vue +++ b/frontend/src/views/schedule/ScheduleList.vue @@ -160,6 +160,7 @@ export default { this.isEdit = false this.dialogVisible = true this.$store.commit('schedule/SET_SCHEDULE_FORM', {}) + this.$st.sendEv('定时任务', '添加') }, onAddSubmit () { this.$refs.scheduleForm.validate(res => { @@ -179,6 +180,7 @@ export default { }) } }) + this.$st.sendEv('定时任务', '提交') }, isShowRun () { }, @@ -186,6 +188,7 @@ export default { this.$store.commit('schedule/SET_SCHEDULE_FORM', row) this.dialogVisible = true this.isEdit = true + this.$st.sendEv('定时任务', '修改', 'id', row._id) }, onRemove (row) { this.$store.dispatch('schedule/removeSchedule', row._id) @@ -195,6 +198,7 @@ export default { this.$message.success(`Schedule "${row.name}" has been removed`) }, 100) }) + this.$st.sendEv('定时任务', '删除', 'id', row._id) }, onCrawl () { } diff --git a/frontend/src/views/site/SiteList.vue b/frontend/src/views/site/SiteList.vue index 98fa1365..450ec16b 100644 --- a/frontend/src/views/site/SiteList.vue +++ b/frontend/src/views/site/SiteList.vue @@ -8,11 +8,11 @@ v-model="keyword"> + clearable filterable @change="onSelectMainCategory"> + clearable filterable @change="onSelectCategory"> @@ -84,7 +85,7 @@