diff --git a/README-zh.md b/README-zh.md index 701b314a..a4f19b13 100644 --- a/README-zh.md +++ b/README-zh.md @@ -62,6 +62,10 @@ npm run serve  +## 使用流程 + + + ## 架构 Crawlab的架构跟Celery非常相似,但是加入了包括前端、爬虫、Flower在内的额外模块,以支持爬虫管理的功能。 diff --git a/crawlab/config.py b/crawlab/config.py index 7467582e..4ede83b7 100644 --- a/crawlab/config.py +++ b/crawlab/config.py @@ -2,7 +2,7 @@ # 爬虫源码路径 PROJECT_SOURCE_FILE_FOLDER = '../spiders' # 配置python虚拟环境的路径 -PYTHON_ENV_PATH="/Users/chennan/Desktop/2019/env/bin/python" +PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python' # 爬虫部署路径 PROJECT_DEPLOY_FILE_FOLDER = '../deployfile' @@ -31,4 +31,3 @@ MONGO_DB = 'crawlab_test' DEBUG = True FLASK_HOST = '127.0.0.1' FLASK_PORT = 8000 - diff --git a/crawlab/requirements.txt b/crawlab/requirements.txt new file mode 100644 index 00000000..599ffcd2 --- /dev/null +++ b/crawlab/requirements.txt @@ -0,0 +1,31 @@ +amqp==2.4.2 +aniso8601==6.0.0 +APScheduler==3.6.0 +Babel==2.6.0 +billiard==3.6.0.0 +celery==4.3.0 +certifi==2019.3.9 +chardet==3.0.4 +Click==7.0 +coloredlogs==10.0 +Flask==1.0.2 +Flask-Cors==3.0.7 +Flask-RESTful==0.3.7 +flower==0.9.3 +humanfriendly==4.18 +idna==2.8 +itsdangerous==1.1.0 +Jinja2==2.10 +kombu==4.5.0 +MarkupSafe==1.1.1 +mongoengine==0.17.0 +pymongo==3.7.2 +pytz==2018.9 +redis==3.2.1 +requests==2.21.0 +six==1.12.0 +tornado==5.1.1 +tzlocal==1.5.1 +urllib3==1.24.1 +vine==1.3.0 +Werkzeug==0.15.2 diff --git a/crawlab/routes/tasks.py b/crawlab/routes/tasks.py index 7d0a6376..769b0558 100644 --- a/crawlab/routes/tasks.py +++ b/crawlab/routes/tasks.py @@ -10,6 +10,7 @@ from utils import jsonify from utils.spider import get_spider_col_fields from utils.log import other + class TaskApi(BaseApi): col_name = 'tasks' diff --git a/docs/img/Crawlab用户使用流程图.png b/docs/img/Crawlab用户使用流程图.png deleted file mode 100644 index ea1ae46f..00000000 Binary files a/docs/img/Crawlab用户使用流程图.png and /dev/null differ diff --git a/frontend/src/components/BackToTop/index 2.vue b/frontend/src/components/BackToTop/index 2.vue deleted file mode 100644 index 39977178..00000000 --- a/frontend/src/components/BackToTop/index 2.vue +++ /dev/null @@ -1,116 +0,0 @@ - - - - - 回到顶部 - - - - - - - - - - - diff --git a/frontend/src/components/Breadcrumb/index 2.vue b/frontend/src/components/Breadcrumb/index 2.vue deleted file mode 100644 index 87a389b8..00000000 --- a/frontend/src/components/Breadcrumb/index 2.vue +++ /dev/null @@ -1,86 +0,0 @@ - - - - - {{$t(item.meta.title) }} - {{ $t(item.meta.title) }} - - - - - - - - diff --git a/frontend/src/components/Charts/keyboard 2.vue b/frontend/src/components/Charts/keyboard 2.vue deleted file mode 100644 index 857b26ae..00000000 --- a/frontend/src/components/Charts/keyboard 2.vue +++ /dev/null @@ -1,156 +0,0 @@ - - - - - diff --git a/frontend/src/components/Charts/mixins/resize 2.js b/frontend/src/components/Charts/mixins/resize 2.js deleted file mode 100644 index c4c432f4..00000000 --- a/frontend/src/components/Charts/mixins/resize 2.js +++ /dev/null @@ -1,32 +0,0 @@ -import { debounce } from '@/utils' - -export default { - data() { - return { - sidebarElm: null - } - }, - mounted() { - this.__resizeHandler = debounce(() => { - if (this.chart) { - this.chart.resize() - } - }, 100) - window.addEventListener('resize', this.__resizeHandler) - - this.sidebarElm = document.getElementsByClassName('sidebar-container')[0] - this.sidebarElm && this.sidebarElm.addEventListener('transitionend', this.sidebarResizeHandler) - }, - beforeDestroy() { - window.removeEventListener('resize', this.__resizeHandler) - - this.sidebarElm && this.sidebarElm.removeEventListener('transitionend', this.sidebarResizeHandler) - }, - methods: { - sidebarResizeHandler(e) { - if (e.propertyName === 'width') { - this.__resizeHandler() - } - } - } -} diff --git a/frontend/src/components/Common/DialogView 2.vue b/frontend/src/components/Common/DialogView 2.vue deleted file mode 100644 index 7976171e..00000000 --- a/frontend/src/components/Common/DialogView 2.vue +++ /dev/null @@ -1,162 +0,0 @@ - - - - - {{message}} - - - - - - - - - - - - - - Cancel - Confirm - - - - - - - - - diff --git a/frontend/src/components/DndList/index 2.vue b/frontend/src/components/DndList/index 2.vue deleted file mode 100644 index 7587daa7..00000000 --- a/frontend/src/components/DndList/index 2.vue +++ /dev/null @@ -1,157 +0,0 @@ - - - - {{ list1Title }} - - - {{ element.id }}[{{ element.author }}] {{ element.title }} - - - - - - - - - - {{ list2Title }} - - - {{ element.id }} [{{ element.author }}] {{ element.title }} - - - - - - - - - diff --git a/frontend/src/components/DragSelect/index 2.vue b/frontend/src/components/DragSelect/index 2.vue deleted file mode 100644 index 513be006..00000000 --- a/frontend/src/components/DragSelect/index 2.vue +++ /dev/null @@ -1,61 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/Dropzone/index 2.vue b/frontend/src/components/Dropzone/index 2.vue deleted file mode 100644 index 15d811d9..00000000 --- a/frontend/src/components/Dropzone/index 2.vue +++ /dev/null @@ -1,297 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/ErrorLog/index 2.vue b/frontend/src/components/ErrorLog/index 2.vue deleted file mode 100644 index c46cf110..00000000 --- a/frontend/src/components/ErrorLog/index 2.vue +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - Msg: - {{ scope.row.err.message }} - - - - Info: - {{ scope.row.vm.$vnode.tag }} error in {{ scope.row.info }} - - - - Url: - {{ scope.row.url }} - - - - - - {{ scope.row.err.stack }} - - - - - - - - - - - diff --git a/frontend/src/components/FileList/FileList 2.vue b/frontend/src/components/FileList/FileList 2.vue deleted file mode 100644 index 20a43bec..00000000 --- a/frontend/src/components/FileList/FileList 2.vue +++ /dev/null @@ -1,200 +0,0 @@ - - - - - - - - {{currentPath}} - - - - - - - - {{$t('Choose Folder')}} - - - - - - - - - - - - - - {{item.path}} - - - - - - - - - - - - - - diff --git a/frontend/src/components/GithubCorner/index 2.vue b/frontend/src/components/GithubCorner/index 2.vue deleted file mode 100644 index b5da8874..00000000 --- a/frontend/src/components/GithubCorner/index 2.vue +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - diff --git a/frontend/src/components/Hamburger/index 2.vue b/frontend/src/components/Hamburger/index 2.vue deleted file mode 100644 index 220d67ec..00000000 --- a/frontend/src/components/Hamburger/index 2.vue +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - diff --git a/frontend/src/components/HeaderSearch/index 2.vue b/frontend/src/components/HeaderSearch/index 2.vue deleted file mode 100644 index ab0d556a..00000000 --- a/frontend/src/components/HeaderSearch/index 2.vue +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - - - - - diff --git a/frontend/src/components/ImageCropper/index 2.vue b/frontend/src/components/ImageCropper/index 2.vue deleted file mode 100644 index 04b1ede9..00000000 --- a/frontend/src/components/ImageCropper/index 2.vue +++ /dev/null @@ -1,1420 +0,0 @@ - - - - - - - - - - - - - - - {{ lang.hint }} - {{ lang.noSupported }} - - - - {{ errorMsg }} - - - {{ lang.btn.off }} - - - - - - - - - - - - - - - - - - - - ↺ - ↻ - - - - - - - {{ lang.preview }} - - - - {{ lang.preview }} - - - - - - {{ lang.btn.back }} - {{ lang.btn.save }} - - - - - - {{ lang.loading }} - - - - - {{ errorMsg }} - - - {{ lang.success }} - - - - {{ lang.btn.back }} - {{ lang.btn.close }} - - - - - - - - - - - - diff --git a/frontend/src/components/InfoView/NodeInfoView 2.vue b/frontend/src/components/InfoView/NodeInfoView 2.vue deleted file mode 100644 index 69a3601a..00000000 --- a/frontend/src/components/InfoView/NodeInfoView 2.vue +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - {{$t('Save')}} - - - - - - - diff --git a/frontend/src/components/JsonEditor/index 2.vue b/frontend/src/components/JsonEditor/index 2.vue deleted file mode 100644 index d68b8c05..00000000 --- a/frontend/src/components/JsonEditor/index 2.vue +++ /dev/null @@ -1,72 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/Kanban/index 2.vue b/frontend/src/components/Kanban/index 2.vue deleted file mode 100644 index dadeb648..00000000 --- a/frontend/src/components/Kanban/index 2.vue +++ /dev/null @@ -1,89 +0,0 @@ - - - - {{ headerText }} - - - - {{ element.name }} {{ element.id }} - - - - - - - diff --git a/frontend/src/components/LangSelect/index 2.vue b/frontend/src/components/LangSelect/index 2.vue deleted file mode 100644 index fea7ba71..00000000 --- a/frontend/src/components/LangSelect/index 2.vue +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - 中文 - English - Español - - - - - diff --git a/frontend/src/components/MDinput/index 2.vue b/frontend/src/components/MDinput/index 2.vue deleted file mode 100644 index 7ede73db..00000000 --- a/frontend/src/components/MDinput/index 2.vue +++ /dev/null @@ -1,354 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/frontend/src/components/MarkdownEditor/defaultOptions 2.js b/frontend/src/components/MarkdownEditor/defaultOptions 2.js deleted file mode 100644 index 303aa13d..00000000 --- a/frontend/src/components/MarkdownEditor/defaultOptions 2.js +++ /dev/null @@ -1,31 +0,0 @@ -// doc: https://nhnent.github.io/tui.editor/api/latest/ToastUIEditor.html#ToastUIEditor -export default { - minHeight: '200px', - previewStyle: 'vertical', - useCommandShortcut: true, - useDefaultHTMLSanitizer: true, - usageStatistics: false, - hideModeSwitch: false, - toolbarItems: [ - 'heading', - 'bold', - 'italic', - 'strike', - 'divider', - 'hr', - 'quote', - 'divider', - 'ul', - 'ol', - 'task', - 'indent', - 'outdent', - 'divider', - 'table', - 'image', - 'link', - 'divider', - 'code', - 'codeblock' - ] -} diff --git a/frontend/src/components/Overview/NodeOverview 2.vue b/frontend/src/components/Overview/NodeOverview 2.vue deleted file mode 100644 index e6412343..00000000 --- a/frontend/src/components/Overview/NodeOverview 2.vue +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/frontend/src/components/Pagination/index 2.vue b/frontend/src/components/Pagination/index 2.vue deleted file mode 100644 index 8d104e84..00000000 --- a/frontend/src/components/Pagination/index 2.vue +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/PanThumb/index 2.vue b/frontend/src/components/PanThumb/index 2.vue deleted file mode 100644 index ec549f43..00000000 --- a/frontend/src/components/PanThumb/index 2.vue +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - - - - - - - - diff --git a/frontend/src/components/Screenfull/index 2.vue b/frontend/src/components/Screenfull/index 2.vue deleted file mode 100644 index 479bf3e3..00000000 --- a/frontend/src/components/Screenfull/index 2.vue +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/ScrollPane/index 2.vue b/frontend/src/components/ScrollPane/index 2.vue deleted file mode 100644 index a55f799f..00000000 --- a/frontend/src/components/ScrollPane/index 2.vue +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/Share/dropdownMenu 2.vue b/frontend/src/components/Share/dropdownMenu 2.vue deleted file mode 100644 index f7b1dd2c..00000000 --- a/frontend/src/components/Share/dropdownMenu 2.vue +++ /dev/null @@ -1,100 +0,0 @@ - - - - {{ title }} - - {{ item.title }} - {{ item.title }} - - - - - - - - diff --git a/frontend/src/components/SizeSelect/index 2.vue b/frontend/src/components/SizeSelect/index 2.vue deleted file mode 100644 index 6d3cd43a..00000000 --- a/frontend/src/components/SizeSelect/index 2.vue +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - - {{ - item.label }} - - - - - diff --git a/frontend/src/components/Sticky/index 2.vue b/frontend/src/components/Sticky/index 2.vue deleted file mode 100644 index 5624a989..00000000 --- a/frontend/src/components/Sticky/index 2.vue +++ /dev/null @@ -1,88 +0,0 @@ - - - - - sticky - - - - - - diff --git a/frontend/src/components/SvgIcon/index 2.vue b/frontend/src/components/SvgIcon/index 2.vue deleted file mode 100644 index 56972328..00000000 --- a/frontend/src/components/SvgIcon/index 2.vue +++ /dev/null @@ -1,43 +0,0 @@ - - - - - - - - - diff --git a/frontend/src/components/TableView/DeployTableView 2.vue b/frontend/src/components/TableView/DeployTableView 2.vue deleted file mode 100644 index 3b6666bf..00000000 --- a/frontend/src/components/TableView/DeployTableView 2.vue +++ /dev/null @@ -1,73 +0,0 @@ - - - - {{title}} - - - - - - - {{scope.row.node_id}} - - - - - {{scope.row.spider_name}} - - - - - - - - - - - diff --git a/frontend/src/components/TextHoverEffect/Mallki 2.vue b/frontend/src/components/TextHoverEffect/Mallki 2.vue deleted file mode 100644 index 4ea29fc2..00000000 --- a/frontend/src/components/TextHoverEffect/Mallki 2.vue +++ /dev/null @@ -1,113 +0,0 @@ - - - {{ text }} - - - - - - - - diff --git a/frontend/src/components/ThemePicker/index 2.vue b/frontend/src/components/ThemePicker/index 2.vue deleted file mode 100644 index 332b07e7..00000000 --- a/frontend/src/components/ThemePicker/index 2.vue +++ /dev/null @@ -1,148 +0,0 @@ - - - - - - - diff --git a/frontend/src/components/TreeTable/eval 2.js b/frontend/src/components/TreeTable/eval 2.js deleted file mode 100644 index d9b89e1c..00000000 --- a/frontend/src/components/TreeTable/eval 2.js +++ /dev/null @@ -1,29 +0,0 @@ -/** -* @Author: jianglei -* @Date: 2017-10-12 12:06:49 -*/ -'use strict' -import Vue from 'vue' -export default function treeToArray(data, expandAll, parent = null, level = null) { - let tmp = [] - Array.from(data).forEach(function(record) { - if (record._expanded === undefined) { - Vue.set(record, '_expanded', expandAll) - } - let _level = 1 - if (level !== undefined && level !== null) { - _level = level + 1 - } - Vue.set(record, '_level', _level) - // 如果有父元素 - if (parent) { - Vue.set(record, 'parent', parent) - } - tmp.push(record) - if (record.children && record.children.length > 0) { - const children = treeToArray(record.children, expandAll, record, _level) - tmp = tmp.concat(children) - } - }) - return tmp -} diff --git a/frontend/src/components/Upload/singleImage 2.vue b/frontend/src/components/Upload/singleImage 2.vue deleted file mode 100644 index 291e4cf1..00000000 --- a/frontend/src/components/Upload/singleImage 2.vue +++ /dev/null @@ -1,132 +0,0 @@ - - - - - 将文件拖到此处,或点击上传 - - - - - - - - - - - - - - - diff --git a/frontend/src/components/Upload/singleImage2.vue b/frontend/src/components/Upload/singleImage2.vue deleted file mode 100644 index cf4dc0b7..00000000 --- a/frontend/src/components/Upload/singleImage2.vue +++ /dev/null @@ -1,127 +0,0 @@ - - - - - Drag或点击上传 - - - - - - - - - - - - - - - diff --git a/frontend/src/components/Upload/singleImage3.vue b/frontend/src/components/Upload/singleImage3.vue deleted file mode 100644 index 2cce98da..00000000 --- a/frontend/src/components/Upload/singleImage3.vue +++ /dev/null @@ -1,154 +0,0 @@ - - - - - 将文件拖到此处,或点击上传 - - - - - - - - - - - - - - - - - - - - - - - diff --git a/frontend/src/components/UploadExcel/index 2.vue b/frontend/src/components/UploadExcel/index 2.vue deleted file mode 100644 index a6b8dbce..00000000 --- a/frontend/src/components/UploadExcel/index 2.vue +++ /dev/null @@ -1,136 +0,0 @@ - - - - - Drop excel file here or - Browse - - - - - - - diff --git a/frontend/src/i18n/en 2.js b/frontend/src/i18n/en 2.js deleted file mode 100644 index b1c6ea43..00000000 --- a/frontend/src/i18n/en 2.js +++ /dev/null @@ -1 +0,0 @@ -export default {} diff --git a/frontend/src/i18n/zh.js b/frontend/src/i18n/zh.js index b044e7ba..25176002 100644 --- a/frontend/src/i18n/zh.js +++ b/frontend/src/i18n/zh.js @@ -126,5 +126,5 @@ export default { 'Node info has been saved successfully': '节点信息已成功保存', 'Are you sure to deploy this spider?': '你确定要部署该爬虫?', 'Are you sure to delete this spider?': '你确定要删除该爬虫?', - 'Spider info has been saved successfully': '爬虫信息已成功保存', + 'Spider info has been saved successfully': '爬虫信息已成功保存' } diff --git a/frontend/src/icons/index 2.js b/frontend/src/icons/index 2.js deleted file mode 100644 index d9fe4d86..00000000 --- a/frontend/src/icons/index 2.js +++ /dev/null @@ -1,9 +0,0 @@ -import Vue from 'vue' -import SvgIcon from '@/components/SvgIcon' // svg组件 - -// register globally -Vue.component('svg-icon', SvgIcon) - -const requireAll = requireContext => requireContext.keys().map(requireContext) -const req = require.context('./svg', false, /\.svg$/) -requireAll(req) diff --git a/frontend/src/icons/svg/example 2.svg b/frontend/src/icons/svg/example 2.svg deleted file mode 100644 index 46f42b53..00000000 --- a/frontend/src/icons/svg/example 2.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/frontend/src/router/index 2.js b/frontend/src/router/index 2.js deleted file mode 100644 index 46b1e741..00000000 --- a/frontend/src/router/index 2.js +++ /dev/null @@ -1,205 +0,0 @@ -import Vue from 'vue' -import Router from 'vue-router' - -/* Layout */ -import Layout from '../views/layout/Layout' - -// in development-env not use lazy-loading, because lazy-loading too many pages will cause webpack hot update too slow. so only in production use lazy-loading; -// detail: https://panjiachen.github.io/vue-element-admin-site/#/lazy-loading - -Vue.use(Router) - -/** - * hidden: true if `hidden:true` will not show in the sidebar(default is false) - * alwaysShow: true if set true, will always show the root menu, whatever its child routes length - * if not set alwaysShow, only more than one route under the children - * it will becomes nested mode, otherwise not show the root menu - * redirect: noredirect if `redirect:noredirect` will no redirect in the breadcrumb - * name:'router-name' the name is used by (must set!!!) - * meta : { - title: 'title' the name show in submenu and breadcrumb (recommend set) - icon: 'svg-name' the icon show in the sidebar - breadcrumb: false if false, the item will hidden in breadcrumb(default is true) - } - **/ -export const constantRouterMap = [ - { path: '/login', component: () => import('../views/login/index'), hidden: true }, - { path: '/404', component: () => import('../views/404'), hidden: true }, - { path: '/', redirect: '/home' }, - - // Crawlab Pages - { - path: '/home', - component: Layout, - children: [ - { - path: '', - component: () => import('../views/home/Home'), - meta: { - title: 'Home', - icon: 'fa fa-home' - } - } - ] - }, - { - name: 'Node', - path: '/nodes', - component: Layout, - meta: { - title: 'Node', - icon: 'fa fa-server' - }, - children: [ - { - path: '', - name: 'NodeList', - component: () => import('../views/node/NodeList'), - meta: { - title: 'Nodes', - icon: 'fa fa-server' - } - }, - { - path: ':id', - name: 'NodeDetail', - component: () => import('../views/node/NodeDetail'), - meta: { - title: 'Node Detail', - icon: 'fa fa-circle-o' - }, - hidden: true - } - ] - }, - { - name: 'Spider', - path: '/spiders', - component: Layout, - meta: { - title: 'Spider', - icon: 'fa fa-bug' - }, - children: [ - { - path: '', - name: 'SpiderList', - component: () => import('../views/spider/SpiderList'), - meta: { - title: 'Spiders', - icon: 'fa fa-bug' - } - }, - { - path: ':id', - name: 'SpiderDetail', - component: () => import('../views/spider/SpiderDetail'), - meta: { - title: 'Spider Detail', - icon: 'fa fa-circle-o' - }, - hidden: true - } - ] - }, - { - name: 'Task', - path: '/tasks', - component: Layout, - meta: { - title: 'Task', - icon: 'fa fa-list' - }, - children: [ - { - path: '', - name: 'TaskList', - component: () => import('../views/task/TaskList'), - meta: { - title: 'Tasks', - icon: 'fa fa-list' - } - }, - { - path: ':id', - name: 'TaskDetail', - component: () => import('../views/task/TaskDetail'), - meta: { - title: 'Task Detail', - icon: 'fa fa-circle-o' - }, - hidden: true - } - ] - }, - { - name: 'Schedule', - path: '/schedules', - component: Layout, - meta: { - title: 'Schedules', - icon: 'fa fa-calendar' - }, - hidden: true, - children: [ - { - path: '', - name: 'ScheduleList', - component: () => import('../views/schedule/ScheduleList'), - meta: { - title: 'Schedules', - icon: 'fa fa-calendar' - } - } - ] - }, - { - name: 'Deploy', - path: '/deploys', - component: Layout, - meta: { - title: 'Deploy', - icon: 'fa fa-cloud' - }, - children: [ - { - path: '', - name: 'DeployList', - component: () => import('../views/deploy/DeployList'), - meta: { - title: 'Deploys', - icon: 'fa fa-cloud' - } - }, - { - path: ':id', - name: 'DeployDetail', - component: () => import('../views/deploy/DeployDetail'), - meta: { - title: 'Deploy Detail', - icon: 'fa fa-circle-o' - }, - hidden: true - } - ] - }, - - { path: '*', redirect: '/404', hidden: true } -] - -const router = new Router({ - // mode: 'history', //后端支持可开 - scrollBehavior: () => ({ y: 0 }), - routes: constantRouterMap -}) - -router.beforeEach((to, from, next) => { - if (to.meta && to.meta.title) { - window.document.title = `Crawlab - ${to.meta.title}` - } else { - window.document.title = 'Crawlab' - } - next() -}) - -export default router diff --git a/frontend/src/store/getters 2.js b/frontend/src/store/getters 2.js deleted file mode 100644 index 7fbf1f4f..00000000 --- a/frontend/src/store/getters 2.js +++ /dev/null @@ -1,9 +0,0 @@ -const getters = { - sidebar: state => state.app.sidebar, - device: state => state.app.device, - token: state => state.user.token, - avatar: state => state.user.avatar, - name: state => state.user.name, - roles: state => state.user.roles -} -export default getters diff --git a/frontend/src/store/modules/app 2.js b/frontend/src/store/modules/app 2.js deleted file mode 100644 index 5f5c8adf..00000000 --- a/frontend/src/store/modules/app 2.js +++ /dev/null @@ -1,43 +0,0 @@ -import Cookies from 'js-cookie' - -const app = { - state: { - sidebar: { - opened: !+Cookies.get('sidebarStatus'), - withoutAnimation: false - }, - device: 'desktop' - }, - mutations: { - TOGGLE_SIDEBAR: state => { - if (state.sidebar.opened) { - Cookies.set('sidebarStatus', 1) - } else { - Cookies.set('sidebarStatus', 0) - } - state.sidebar.opened = !state.sidebar.opened - state.sidebar.withoutAnimation = false - }, - CLOSE_SIDEBAR: (state, withoutAnimation) => { - Cookies.set('sidebarStatus', 1) - state.sidebar.opened = false - state.sidebar.withoutAnimation = withoutAnimation - }, - TOGGLE_DEVICE: (state, device) => { - state.device = device - } - }, - actions: { - ToggleSideBar: ({ commit }) => { - commit('TOGGLE_SIDEBAR') - }, - CloseSideBar ({ commit }, { withoutAnimation }) { - commit('CLOSE_SIDEBAR', withoutAnimation) - }, - ToggleDevice ({ commit }, device) { - commit('TOGGLE_DEVICE', device) - } - } -} - -export default app diff --git a/frontend/src/styles/element-ui 2.scss b/frontend/src/styles/element-ui 2.scss deleted file mode 100644 index e60a687f..00000000 --- a/frontend/src/styles/element-ui 2.scss +++ /dev/null @@ -1,30 +0,0 @@ -//to reset element-ui default css -.el-upload { - input[type="file"] { - display: none !important; - } -} - -.el-upload__input { - display: none; -} - -//暂时性解决diolag 问题 https://github.com/ElemeFE/element/issues/2461 -.el-dialog { - transform: none; - left: 0; - position: relative; - margin: 0 auto; -} - -//element ui upload -.upload-container { - .el-upload { - width: 100%; - - .el-upload-dragger { - width: 100%; - height: 200px; - } - } -} diff --git a/frontend/src/utils/auth 2.js b/frontend/src/utils/auth 2.js deleted file mode 100644 index 87696c7d..00000000 --- a/frontend/src/utils/auth 2.js +++ /dev/null @@ -1,15 +0,0 @@ -import Cookies from 'js-cookie' - -const TokenKey = 'Admin-Token' - -export function getToken () { - return Cookies.get(TokenKey) -} - -export function setToken (token) { - return Cookies.set(TokenKey, token) -} - -export function removeToken () { - return Cookies.remove(TokenKey) -} diff --git a/frontend/src/views/404 2.vue b/frontend/src/views/404 2.vue deleted file mode 100644 index 7d7a670e..00000000 --- a/frontend/src/views/404 2.vue +++ /dev/null @@ -1,228 +0,0 @@ - - - - - - - - - - - OOPS! - 版权所有 - 华尔街见闻 - - {{ message }} - 请检查您输入的网址是否正确,请点击以下按钮返回主页或者发送错误报告 - 返回首页 - - - - - - - - diff --git a/frontend/src/views/dashboard/index 2.vue b/frontend/src/views/dashboard/index 2.vue deleted file mode 100644 index c411c98f..00000000 --- a/frontend/src/views/dashboard/index 2.vue +++ /dev/null @@ -1,32 +0,0 @@ - - - name:{{ name }} - roles:{{ role }} - - - - - - diff --git a/frontend/src/views/dashboard/index.vue b/frontend/src/views/dashboard/index.vue deleted file mode 100644 index c411c98f..00000000 --- a/frontend/src/views/dashboard/index.vue +++ /dev/null @@ -1,32 +0,0 @@ - - - name:{{ name }} - roles:{{ role }} - - - - - - diff --git a/frontend/src/views/deploy/DeployDetail 2.vue b/frontend/src/views/deploy/DeployDetail 2.vue deleted file mode 100644 index df930ccc..00000000 --- a/frontend/src/views/deploy/DeployDetail 2.vue +++ /dev/null @@ -1,15 +0,0 @@ - - - NodeDetail - - - - - - diff --git a/frontend/src/views/form/index 2.vue b/frontend/src/views/form/index 2.vue deleted file mode 100644 index 7fd42157..00000000 --- a/frontend/src/views/form/index 2.vue +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create - Cancel - - - - - - - - diff --git a/frontend/src/views/form/index.vue b/frontend/src/views/form/index.vue deleted file mode 100644 index 7fd42157..00000000 --- a/frontend/src/views/form/index.vue +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Create - Cancel - - - - - - - - diff --git a/frontend/src/views/home/Home 2.vue b/frontend/src/views/home/Home 2.vue deleted file mode 100644 index e721a95c..00000000 --- a/frontend/src/views/home/Home 2.vue +++ /dev/null @@ -1,160 +0,0 @@ - - - - - - - - - - - - {{$t(m.label)}} - - - {{overviewStats[m.name]}} - - - - - - - - - {{$t('Daily New Tasks')}} - - - - - - - - - diff --git a/frontend/src/views/layout/Layout 2.vue b/frontend/src/views/layout/Layout 2.vue deleted file mode 100644 index bf284ee6..00000000 --- a/frontend/src/views/layout/Layout 2.vue +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/frontend/src/views/layout/components/AppMain 2.vue b/frontend/src/views/layout/components/AppMain 2.vue deleted file mode 100644 index fae53f18..00000000 --- a/frontend/src/views/layout/components/AppMain 2.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - diff --git a/frontend/src/views/layout/components/Sidebar/Item 2.vue b/frontend/src/views/layout/components/Sidebar/Item 2.vue deleted file mode 100644 index 148b4eee..00000000 --- a/frontend/src/views/layout/components/Sidebar/Item 2.vue +++ /dev/null @@ -1,34 +0,0 @@ - diff --git a/frontend/src/views/layout/mixin/ResizeHandler 2.js b/frontend/src/views/layout/mixin/ResizeHandler 2.js deleted file mode 100644 index e4f2c097..00000000 --- a/frontend/src/views/layout/mixin/ResizeHandler 2.js +++ /dev/null @@ -1,41 +0,0 @@ -import store from '@/store' - -const { body } = document -const WIDTH = 1024 -const RATIO = 3 - -export default { - watch: { - $route (route) { - if (this.device === 'mobile' && this.sidebar.opened) { - store.dispatch('CloseSideBar', { withoutAnimation: false }) - } - } - }, - beforeMount () { - window.addEventListener('resize', this.resizeHandler) - }, - mounted () { - const isMobile = this.isMobile() - if (isMobile) { - store.dispatch('ToggleDevice', 'mobile') - store.dispatch('CloseSideBar', { withoutAnimation: true }) - } - }, - methods: { - isMobile () { - const rect = body.getBoundingClientRect() - return rect.width - RATIO < WIDTH - }, - resizeHandler () { - if (!document.hidden) { - const isMobile = this.isMobile() - store.dispatch('ToggleDevice', isMobile ? 'mobile' : 'desktop') - - if (isMobile) { - store.dispatch('CloseSideBar', { withoutAnimation: true }) - } - } - } - } -} diff --git a/frontend/src/views/login/index 2.vue b/frontend/src/views/login/index 2.vue deleted file mode 100644 index 70e28313..00000000 --- a/frontend/src/views/login/index 2.vue +++ /dev/null @@ -1,209 +0,0 @@ - - - - Crawlab - - - - - - - - - - - - - - - - - - Sign in - - - - username: admin - password: admin - - - - - - - - - - diff --git a/frontend/src/views/nested/menu1/index 2.vue b/frontend/src/views/nested/menu1/index 2.vue deleted file mode 100644 index fdba73a4..00000000 --- a/frontend/src/views/nested/menu1/index 2.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/index.vue b/frontend/src/views/nested/menu1/index.vue deleted file mode 100644 index fdba73a4..00000000 --- a/frontend/src/views/nested/menu1/index.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-1/index 2.vue b/frontend/src/views/nested/menu1/menu1-1/index 2.vue deleted file mode 100644 index 824b2cb6..00000000 --- a/frontend/src/views/nested/menu1/menu1-1/index 2.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-1/index.vue b/frontend/src/views/nested/menu1/menu1-1/index.vue deleted file mode 100644 index 824b2cb6..00000000 --- a/frontend/src/views/nested/menu1/menu1-1/index.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/index 2.vue b/frontend/src/views/nested/menu1/menu1-2/index 2.vue deleted file mode 100644 index 0c86276e..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/index 2.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/index.vue b/frontend/src/views/nested/menu1/menu1-2/index.vue deleted file mode 100644 index 0c86276e..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/index.vue +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index 2.vue b/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index 2.vue deleted file mode 100644 index f87d88f4..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index 2.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index.vue b/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index.vue deleted file mode 100644 index f87d88f4..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/menu1-2-1/index.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index 2.vue b/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index 2.vue deleted file mode 100644 index d88789f2..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index 2.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index.vue b/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index.vue deleted file mode 100644 index d88789f2..00000000 --- a/frontend/src/views/nested/menu1/menu1-2/menu1-2-2/index.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-3/index 2.vue b/frontend/src/views/nested/menu1/menu1-3/index 2.vue deleted file mode 100644 index f7cd0738..00000000 --- a/frontend/src/views/nested/menu1/menu1-3/index 2.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu1/menu1-3/index.vue b/frontend/src/views/nested/menu1/menu1-3/index.vue deleted file mode 100644 index f7cd0738..00000000 --- a/frontend/src/views/nested/menu1/menu1-3/index.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu2/index 2.vue b/frontend/src/views/nested/menu2/index 2.vue deleted file mode 100644 index 19dd48f0..00000000 --- a/frontend/src/views/nested/menu2/index 2.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/nested/menu2/index.vue b/frontend/src/views/nested/menu2/index.vue deleted file mode 100644 index 19dd48f0..00000000 --- a/frontend/src/views/nested/menu2/index.vue +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/frontend/src/views/node/NodeDetail 2.vue b/frontend/src/views/node/NodeDetail 2.vue deleted file mode 100644 index e95eb472..00000000 --- a/frontend/src/views/node/NodeDetail 2.vue +++ /dev/null @@ -1,98 +0,0 @@ - - - - - {{$t('Node')}}: - - - - - - - - - - - - {{$t('Deployed Spiders')}} - - - - - - - - - diff --git a/frontend/src/views/result/ResultDetail 2.vue b/frontend/src/views/result/ResultDetail 2.vue deleted file mode 100644 index 339d56b0..00000000 --- a/frontend/src/views/result/ResultDetail 2.vue +++ /dev/null @@ -1,95 +0,0 @@ - - - - - Spider: - - - - - - - - - - - - - - - - - - - - diff --git a/frontend/src/views/schedule/ScheduleList 2.vue b/frontend/src/views/schedule/ScheduleList 2.vue deleted file mode 100644 index d1b8a5bb..00000000 --- a/frontend/src/views/schedule/ScheduleList 2.vue +++ /dev/null @@ -1,15 +0,0 @@ - - - Schedule List - - - - - - diff --git a/frontend/src/views/spider/SpiderDetail 2.vue b/frontend/src/views/spider/SpiderDetail 2.vue deleted file mode 100644 index ff6a1333..00000000 --- a/frontend/src/views/spider/SpiderDetail 2.vue +++ /dev/null @@ -1,99 +0,0 @@ - - - - - {{$t('Spider')}}: - - - - - - - - - - - - - - - - - - - - diff --git a/frontend/src/views/table/index 2.vue b/frontend/src/views/table/index 2.vue deleted file mode 100644 index 2dd6e81f..00000000 --- a/frontend/src/views/table/index 2.vue +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - {{ scope.$index }} - - - - - {{ scope.row.title }} - - - - - {{ scope.row.author }} - - - - - {{ scope.row.pageviews }} - - - - - {{ scope.row.status }} - - - - - - {{ scope.row.display_time }} - - - - - - - diff --git a/frontend/src/views/table/index.vue b/frontend/src/views/table/index.vue deleted file mode 100644 index 2dd6e81f..00000000 --- a/frontend/src/views/table/index.vue +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - {{ scope.$index }} - - - - - {{ scope.row.title }} - - - - - {{ scope.row.author }} - - - - - {{ scope.row.pageviews }} - - - - - {{ scope.row.status }} - - - - - - {{ scope.row.display_time }} - - - - - - - diff --git a/frontend/src/views/task/TaskDetail 2.vue b/frontend/src/views/task/TaskDetail 2.vue deleted file mode 100644 index d9e89826..00000000 --- a/frontend/src/views/task/TaskDetail 2.vue +++ /dev/null @@ -1,104 +0,0 @@ - - - - - - - - - - - {{taskLog}} - - - - - - - - - - - - - diff --git a/frontend/src/views/tree/index 2.vue b/frontend/src/views/tree/index 2.vue deleted file mode 100644 index dfabddea..00000000 --- a/frontend/src/views/tree/index 2.vue +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - - - diff --git a/frontend/src/views/tree/index.vue b/frontend/src/views/tree/index.vue deleted file mode 100644 index dfabddea..00000000 --- a/frontend/src/views/tree/index.vue +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - - - diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index a45310f5..00000000 --- a/requirements.txt +++ /dev/null @@ -1,66 +0,0 @@ -amqp==2.4.1 -aniso8601==4.1.0 -APScheduler==3.5.3 -asn1crypto==0.24.0 -attrs==18.2.0 -Automat==0.7.0 -Babel==2.6.0 -billiard==3.5.0.5 -celery==4.2.1 -certifi==2018.11.29 -cffi==1.11.5 -chardet==3.0.4 -Click==7.0 -constantly==15.1.0 -cryptography==2.5 -cssselect==1.0.3 -Django==2.1.7 -django-cors-headers==2.4.0 -dnspython==1.16.0 -docopt==0.6.2 -eventlet==0.24.1 -Flask==1.0.2 -Flask-Cors==3.0.7 -Flask-RESTful==0.3.7 -Flask-Uploads==0.2.1 -flower==0.9.2 -gerapy==0.8.5 -greenlet==0.4.15 -gunicorn==19.9.0 -hyperlink==18.0.0 -idna==2.8 -incremental==17.5.0 -itsdangerous==1.1.0 -Jinja2==2.10 -kombu==4.3.0 -lxml==4.3.1 -MarkupSafe==1.1.0 -mongoengine==0.16.3 -monotonic==1.5 -parsel==1.5.1 -pyasn1==0.4.5 -pyasn1-modules==0.2.4 -pycparser==2.19 -PyDispatcher==2.0.5 -PyHamcrest==1.9.0 -pymongo==3.7.2 -PyMySQL==0.9.3 -pyOpenSSL==19.0.0 -python-scrapyd-api==2.1.2 -pytz==2018.9 -queuelib==1.5.0 -redis==3.1.0 -requests==2.21.0 -Scrapy==1.6.0 -scrapy-redis==0.6.8 -scrapy-splash==0.7.2 -service-identity==18.1.0 -six==1.12.0 -tornado==5.1.1 -Twisted==18.9.0 -tzlocal==1.5.1 -urllib3==1.24.1 -vine==1.2.0 -w3lib==1.20.0 -Werkzeug==0.14.1 -zope.interface==4.6.0 diff --git a/spiders/article/article_spider 2.js b/spiders/article/article_spider 2.js deleted file mode 100644 index c94f2820..00000000 --- a/spiders/article/article_spider 2.js +++ /dev/null @@ -1,61 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // page - const page = await browser.newPage(); - - // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); - const colName = process.env.CRAWLAB_COLLECTION || 'results'; - const col = db.collection(colName); - const col_src = db.collection('results'); - - const results = await col_src.find({content: {$exists: false}}).toArray(); - for (let i = 0; i < results.length; i++) { - let item = results[i]; - - // define article anchor - let anchor; - if (item.source === 'juejin') { - anchor = '.article-content'; - } else if (item.source === 'segmentfault') { - anchor = '.article'; - } else if (item.source === 'csdn') { - anchor = '#content_views'; - } else { - continue; - } - - console.log(`anchor: ${anchor}`); - - // navigate to the article - try { - await page.goto(item.url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - continue; - } - - // scrape article content - item.content = await page.$eval(anchor, el => el.innerHTML); - - // save to database - await col.save(item); - console.log(`saved item: ${JSON.stringify(item)}`) - } - - // close mongodb - client.close(); - - // close browser - browser.close(); - -})(); \ No newline at end of file diff --git a/spiders/csdn/csdn_spider 2.js b/spiders/csdn/csdn_spider 2.js deleted file mode 100644 index edda3b00..00000000 --- a/spiders/csdn/csdn_spider 2.js +++ /dev/null @@ -1,83 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://www.csdn.net'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 100; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(100); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('#feedlist_id > li').forEach(el => { - const $a = el.querySelector('.title > h2 > a'); - if (!$a) return; - results.push({ - url: $a.getAttribute('href'), - title: $a.innerText - }); - }); - return results; - }); - - // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); - const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'csdn'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file diff --git a/spiders/example_juejin/juejin/spiders/__init__ 2.py b/spiders/example_juejin/juejin/spiders/__init__ 2.py deleted file mode 100644 index ebd689ac..00000000 --- a/spiders/example_juejin/juejin/spiders/__init__ 2.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/spiders/juejin_node/juejin_spider 2.js b/spiders/juejin_node/juejin_spider 2.js deleted file mode 100644 index 3cf2bcac..00000000 --- a/spiders/juejin_node/juejin_spider 2.js +++ /dev/null @@ -1,82 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://juejin.im'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 100; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(100); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('.entry-list > .item').forEach(el => { - if (!el.querySelector('.title')) return; - results.push({ - url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'), - title: el.querySelector('.title').innerText - }); - }); - return results; - }); - - // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); - const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'juejin'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file diff --git a/spiders/segmentfault/segmentfault_spider 2.js b/spiders/segmentfault/segmentfault_spider 2.js deleted file mode 100644 index 834b61cc..00000000 --- a/spiders/segmentfault/segmentfault_spider 2.js +++ /dev/null @@ -1,81 +0,0 @@ -const puppeteer = require('puppeteer'); -const MongoClient = require('mongodb').MongoClient; - -(async () => { - // browser - const browser = await (puppeteer.launch({ - headless: true - })); - - // define start url - const url = 'https://segmentfault.com/newest'; - - // start a new page - const page = await browser.newPage(); - - // navigate to url - try { - await page.goto(url, {waitUntil: 'domcontentloaded'}); - await page.waitFor(2000); - } catch (e) { - console.error(e); - - // close browser - browser.close(); - - // exit code 1 indicating an error happened - code = 1; - process.emit("exit "); - process.reallyExit(code); - - return - } - - // scroll down to fetch more data - for (let i = 0; i < 10; i++) { - console.log('Pressing PageDown...'); - await page.keyboard.press('PageDown', 200); - await page.waitFor(500); - } - - // scrape data - const results = await page.evaluate(() => { - let results = []; - document.querySelectorAll('.news-list .news-item').forEach(el => { - results.push({ - url: 'https://segmentfault.com' + el.querySelector('.news__item-info > a').getAttribute('href'), - title: el.querySelector('.news__item-title').innerText - }) - }); - return results; - }); - - // open database connection - const client = await MongoClient.connect('mongodb://127.0.0.1:27017'); - let db = await client.db('crawlab_test'); - const colName = process.env.CRAWLAB_COLLECTION || 'results_segmentfault'; - const taskId = process.env.CRAWLAB_TASK_ID; - const col = db.collection(colName); - - // save to database - for (let i = 0; i < results.length; i++) { - // de-duplication - const r = await col.findOne({url: results[i]}); - if (r) continue; - - // assign taskID - results[i].task_id = taskId; - results[i].source = 'segmentfault'; - - // insert row - await col.insertOne(results[i]); - } - - console.log(`results.length: ${results.length}`); - - // close database connection - client.close(); - - // shutdown browser - browser.close(); -})(); \ No newline at end of file
- {{taskLog}} -