Merge pull request #493 from crawlab-team/develop

Develop
This commit is contained in:
Marvin Zhang
2020-02-01 10:27:41 +08:00
committed by GitHub
10 changed files with 205 additions and 65 deletions

View File

@@ -1,3 +1,16 @@
# 0.4.5 (unkown)
### 功能 / 优化
- **交互式教程**. 引导用户了解 Crawlab 的主要功能.
### Bug 修复
- **点击删除按钮导致跳转**. [#480](https://github.com/crawlab-team/crawlab/issues/480)
- **无法在空爬虫里创建文件**. [#479](https://github.com/crawlab-team/crawlab/issues/479)
- **下载结果错误**. [#465](https://github.com/crawlab-team/crawlab/issues/465)
- **crawlab-sdk CLI 错误**. [#458](https://github.com/crawlab-team/crawlab/issues/458)
- **页面刷新问题**. [#441](https://github.com/crawlab-team/crawlab/issues/441)
- **修复“删除爬虫后获取所有爬虫”错误**.
- **修复 i18n warning 警告**.
# 0.4.4 (2020-01-17)
### 功能 / 优化

View File

@@ -1,3 +1,16 @@
# 0.4.5 (unkown)
### Features / Enhancement
- **Interactive Tutorial**. Guide users through the main functionalities of Crawlab.
### Bug Fixes
- **Click delete button results in redirect**. [#480](https://github.com/crawlab-team/crawlab/issues/480)
- **Unable to create files in an empty spider**. [#479](https://github.com/crawlab-team/crawlab/issues/479)
- **Download results error**. [#465](https://github.com/crawlab-team/crawlab/issues/465)
- **crawlab-sdk CLI error**. [#458](https://github.com/crawlab-team/crawlab/issues/458)
- **Page refresh issue**. [#441](https://github.com/crawlab-team/crawlab/issues/441)
- **Getting all spider after deleting a spider**.
- **i18n warning**.
# 0.4.4 (2020-01-17)
### Features / Enhancement
- **Email Notification**. Allow users to send email notifications.

View File

@@ -2,11 +2,14 @@
import scrapy
import re
from config_spider.items import Item
from urllib.parse import urljoin
from urllib.parse import urljoin, urlparse
def get_real_url(response, url):
if re.search(r'^https?|^\/\/', url):
if re.search(r'^https?', url):
return url
elif re.search(r'^\/\/', url):
u = urlparse(response.url)
return u.scheme + url
return urljoin(response.url, url)
class ConfigSpider(scrapy.Spider):

View File

@@ -1,5 +1,14 @@
<template>
<div class="config-list">
<!--tour-->
<v-tour
name="spider-detail-config"
:steps="tourSteps"
:callbacks="tourCallbacks"
:options="$utils.tour.getOptions(true)"
/>
<!--./tour-->
<!--preview results-->
<el-dialog :visible.sync="dialogVisible"
:title="$t('Preview Results')"
@@ -57,6 +66,7 @@
<li class="item">
<label>{{$t('Start URL')}}: </label>
<el-input
id="start-url"
v-model="spiderForm.config.start_url"
:placeholder="$t('Start URL')"
:class="startUrlClass"
@@ -65,6 +75,7 @@
<li class="item">
<label>{{$t('Start Stage')}}: </label>
<el-select
id="start-stage"
v-model="spiderForm.config.start_stage"
:placeholder="$t('Start Stage')"
:class="startStageClass"
@@ -120,12 +131,12 @@
<div class="button-group-container">
<div class="button-group">
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
<el-button id="btn-run" type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
<!-- <el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">-->
<!-- {{$t('ExtractFields')}}-->
<!-- </el-button>-->
<!-- <el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>-->
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
<el-button id="btn-save" type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
</div>
</div>
</el-row>
@@ -141,7 +152,7 @@
<template slot="title">
<ul class="stage-list">
<!--actions-->
<li class="stage-item" style="min-width: 80px; flex-basis: 80px; justify-content: flex-end"
<li class="stage-item actions" style="min-width: 80px; flex-basis: 80px; justify-content: flex-end"
@click="$event.stopPropagation()">
<i class="action-item el-icon-copy-document" @click="onCopyStage(stage)"></i>
<i class="action-item el-icon-remove-outline" @click="onRemoveStage(stage)"></i>
@@ -377,6 +388,105 @@ export default {
lineNumbers: true,
line: true,
matchBrackets: true
},
tourSteps: [
// stage
{
target: '.config-list .el-tabs__nav.is-top',
content: this.$t('You can switch to each section of configurable spider.')
},
{
target: '#start-url',
content: this.$t('Here is the starting URL of the spider.')
},
{
target: '#start-stage',
content: this.$t('Here is the starting stage of the spider.<br><br>A <strong>Stage</strong> is basically a callback in the Scrapy spider.')
},
{
target: '#btn-run',
content: this.$t('You can run a spider task.<br><br>Spider will be automatically saved when clicking on this button.')
},
{
target: '.stage-item.actions',
content: this.$t('Add/duplicate/delete a stage.'),
params: {
placement: 'right'
}
},
{
target: '.fields-table-view td.action',
content: this.$t('Add/duplicate/delete an extract field in the stage.'),
params: {
placement: 'right'
}
},
{
target: '.stage-item:nth-child(3)',
content: this.$t('You can decide whether this is a list page.<br><br>Click on the CSS/XPath tag to enter the selector expression for list items.<br>For example, "<code>ul > li</code>"'),
params: {
placement: 'top'
}
},
{
target: '.stage-item:nth-child(4)',
content: this.$t('You can decide whether this is a list page with pagination.<br><br>Click on the CSS/XPath tag to enter the selector expression for the pagination.<br>For example, "<code>a.next</code>"'),
params: {
placement: 'top'
}
},
{
target: '.fields-table-view',
content: this.$t('You should enter necessary information for all fields in the stage.'),
params: {
placement: 'top'
}
},
{
target: '.fields-table-view tr:nth-child(1) td:nth-child(7)',
content: this.$t('If you have multiple stages, e.g. list page + detail page, you should select the next stage in the detail link\'s field.'),
params: {
placement: 'top'
}
},
// process
{
target: '#tab-process',
content: this.$t('You can view the<br> visualization of the stage<br> workflow.')
},
// settings
{
target: '#tab-settings',
content: this.$t('You can add the settings here, which will be loaded in the Scrapy\'s <code>settings.py</code> file.<br><br>JSON and Array data are supported.')
},
// Spiderfile
{
target: '#tab-spiderfile',
content: this.$t('You can edit the <code>Spiderfile</code> here.<br><br>For more information, please refer to the <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">Documentation (Chinese)</a>.')
}
],
tourCallbacks: {
onStop: () => {
this.$utils.tour.finishTour('spider-detail-config')
},
onPreviousStep: (currentStep) => {
if (currentStep === 10) {
this.activeTab = 'stages'
} else if (currentStep === 11) {
this.activeTab = 'process'
} else if (currentStep === 12) {
this.activeTab = 'settings'
}
},
onNextStep: (currentStep) => {
if (currentStep === 9) {
this.activeTab = 'process'
} else if (currentStep === 10) {
this.activeTab = 'settings'
} else if (currentStep === 11) {
this.activeTab = 'spiderfile'
}
}
}
}
},

View File

@@ -10,7 +10,8 @@ const i18n = new VueI18n({
messages: {
en,
zh
}
},
silentTranslationWarn: true
})
export default i18n

View File

@@ -434,6 +434,19 @@ docker run -d --restart always --name crawlab_worker \\
'You can edit, save, rename<br> and delete the selected file <br>in this box.': '在这个栏位中,您可以<br>编辑、保存、重命名、<br>删除所选择的文件',
'Here you can add environment variables that will be passed to the spider program when running a task.': '这里您可以添加环境变量,这些环境变量会被传入运行的爬虫程序中',
'You can add, edit and delete schedules (cron jobs) for the spider.': '您可以添加、修改、删除爬虫的定时任务',
'You can switch to each section of configurable spider.': '您可以切换到可配置爬虫的每一个部分',
'Here is the starting URL of the spider.': '这里是爬虫的起始URL',
'Here is the starting stage of the spider.<br><br>A <strong>Stage</strong> is basically a callback in the Scrapy spider.': '这里是爬虫的起始阶段<br><br><strong>阶段</strong>就是 Scrapy 爬虫中的回调函数',
'You can run a spider task.<br><br>Spider will be automatically saved when clicking on this button.': '您可以运行爬虫任务<br><br>点击该按钮会自动保存爬虫',
'Add/duplicate/delete a stage.': '添加/复制/删除阶段',
'Add/duplicate/delete an extract field in the stage.': '添加/复制/删除该阶段下的抓取字段',
'You can decide whether this is a list page.<br><br>Click on the CSS/XPath tag to enter the selector expression for list items.<br>For example, "<code>ul > li</code>"': '您可以决定这是否为一个列表页<br><br>点击 CSS/XPath 标签来输入列表元素的选择器表达式<br>例如 "<code>ul > li</code>"',
'You can decide whether this is a list page with pagination.<br><br>Click on the CSS/XPath tag to enter the selector expression for the pagination.<br>For example, "<code>a.next</code>"': '您可以决定这是否为一个含分页的列表页<br><br>点击 CSS/XPath 标签来输入分页的选择器表达式<br>例如 "<code>a.next</code>"',
'You should enter necessary information for all fields in the stage.': '您应该输入该阶段下所有字段的信息',
'If you have multiple stages, e.g. list page + detail page, you should select the next stage in the detail link\'s field.': '如果您有多个阶段,例如列表页+详情页,您应该在详情页链接字段中选择下一个阶段',
'You can view the<br> visualization of the stage<br> workflow.': '您可以查看阶段工作流的<br>可视化界面',
'You can add the settings here, which will be loaded in the Scrapy\'s <code>settings.py</code> file.<br><br>JSON and Array data are supported.': '您可以在这里添加设置,它们会在 Scrapy 中的 <code>settings.py</code> 中被加载<br><br>JSON 和数组都支持',
'You can edit the <code>Spiderfile</code> here.<br><br>For more information, please refer to the <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">Documentation (Chinese)</a>.': '您可以在这里编辑 <code>Spiderfile</code><br><br>更多信息, 请参考 <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">文档</a>.',
// 其他
'Star crawlab-team/crawlab on GitHub': '在 GitHub 上为 Crawlab 加星吧'

View File

@@ -47,35 +47,6 @@ export const constantRouterMap = [
}
]
},
{
path: '/nodes',
component: Layout,
meta: {
title: 'Node',
icon: 'fa fa-server'
},
children: [
{
path: '',
name: 'NodeList',
component: () => import('../views/node/NodeList'),
meta: {
title: 'Nodes',
icon: 'fa fa-server'
}
},
{
path: ':id',
name: 'NodeDetail',
component: () => import('../views/node/NodeDetail'),
meta: {
title: 'Node Detail',
icon: 'fa fa-circle-o'
},
hidden: true
}
]
},
{
path: '/spiders',
component: Layout,
@@ -155,21 +126,49 @@ export const constantRouterMap = [
]
},
{
path: '/sites',
path: '/nodes',
component: Layout,
hidden: true,
meta: {
title: 'Site',
icon: 'fa fa-sitemap'
title: 'Node',
icon: 'fa fa-server'
},
children: [
{
path: '',
name: 'SiteList',
component: () => import('../views/site/SiteList'),
name: 'NodeList',
component: () => import('../views/node/NodeList'),
meta: {
title: 'Sites',
icon: 'fa fa-sitemap'
title: 'Nodes',
icon: 'fa fa-server'
}
},
{
path: ':id',
name: 'NodeDetail',
component: () => import('../views/node/NodeDetail'),
meta: {
title: 'Node Detail',
icon: 'fa fa-circle-o'
},
hidden: true
}
]
},
{
path: '/disclaimer',
component: Layout,
meta: {
title: 'Disclaimer',
icon: 'fa fa-exclamation-triangle'
},
children: [
{
path: '',
name: 'Disclaimer',
component: () => import('../views/doc/Disclaimer'),
meta: {
title: 'Disclaimer',
icon: 'fa fa-exclamation-triangle'
}
}
]
@@ -193,25 +192,6 @@ export const constantRouterMap = [
}
]
},
{
path: '/disclaimer',
component: Layout,
meta: {
title: 'Disclaimer',
icon: 'fa fa-exclamation-triangle'
},
children: [
{
path: '',
name: 'Disclaimer',
component: () => import('../views/doc/Disclaimer'),
meta: {
title: 'Disclaimer',
icon: 'fa fa-exclamation-triangle'
}
}
]
},
{
path: '/setting',
component: Layout,

View File

@@ -184,6 +184,12 @@ export default {
setTimeout(() => {
this.$refs['config'].update()
}, 0)
if (!this.$utils.tour.isFinishedTour('spider-detail-config')) {
setTimeout(() => {
this.$tours['spider-detail-config'].start()
}, 100)
}
}
this.$st.sendEv('爬虫详情', '切换标签', tab.name)
},

View File

@@ -38,13 +38,15 @@
<!--table list-->
<el-table :data="filteredTableData"
ref="table"
class="table"
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
border
row-key="_id"
@row-click="onRowClick"
@selection-change="onSelectionChange">
>
<el-table-column type="selection" width="55"/>
<el-table-column type="selection" width="55" reserve-selection/>
<template v-for="col in columns">
<el-table-column v-if="col.name === 'spider_name'"
:key="col.name"
@@ -240,7 +242,6 @@ export default {
},
methods: {
onSearch (value) {
console.log(value)
},
onRefresh () {
this.$store.dispatch('task/getTaskList')
@@ -267,6 +268,7 @@ export default {
message: '删除任务成功'
})
this.$store.dispatch('task/getTaskList')
this.$refs['table'].clearSelection()
return
}
this.$message({

View File

@@ -41,6 +41,5 @@ services:
- "/opt/crawlab/redis/data:/data"
splash: # use Splash to run spiders on dynamic pages
image: scrapinghub/splash
container_name: splash
# ports:
# - "8050:8050"