mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
@@ -1,3 +1,16 @@
|
||||
# 0.4.5 (unkown)
|
||||
### 功能 / 优化
|
||||
- **交互式教程**. 引导用户了解 Crawlab 的主要功能.
|
||||
|
||||
### Bug 修复
|
||||
- **点击删除按钮导致跳转**. [#480](https://github.com/crawlab-team/crawlab/issues/480)
|
||||
- **无法在空爬虫里创建文件**. [#479](https://github.com/crawlab-team/crawlab/issues/479)
|
||||
- **下载结果错误**. [#465](https://github.com/crawlab-team/crawlab/issues/465)
|
||||
- **crawlab-sdk CLI 错误**. [#458](https://github.com/crawlab-team/crawlab/issues/458)
|
||||
- **页面刷新问题**. [#441](https://github.com/crawlab-team/crawlab/issues/441)
|
||||
- **修复“删除爬虫后获取所有爬虫”错误**.
|
||||
- **修复 i18n warning 警告**.
|
||||
|
||||
# 0.4.4 (2020-01-17)
|
||||
|
||||
### 功能 / 优化
|
||||
|
||||
13
CHANGELOG.md
13
CHANGELOG.md
@@ -1,3 +1,16 @@
|
||||
# 0.4.5 (unkown)
|
||||
### Features / Enhancement
|
||||
- **Interactive Tutorial**. Guide users through the main functionalities of Crawlab.
|
||||
|
||||
### Bug Fixes
|
||||
- **Click delete button results in redirect**. [#480](https://github.com/crawlab-team/crawlab/issues/480)
|
||||
- **Unable to create files in an empty spider**. [#479](https://github.com/crawlab-team/crawlab/issues/479)
|
||||
- **Download results error**. [#465](https://github.com/crawlab-team/crawlab/issues/465)
|
||||
- **crawlab-sdk CLI error**. [#458](https://github.com/crawlab-team/crawlab/issues/458)
|
||||
- **Page refresh issue**. [#441](https://github.com/crawlab-team/crawlab/issues/441)
|
||||
- **Getting all spider after deleting a spider**.
|
||||
- **i18n warning**.
|
||||
|
||||
# 0.4.4 (2020-01-17)
|
||||
### Features / Enhancement
|
||||
- **Email Notification**. Allow users to send email notifications.
|
||||
|
||||
@@ -2,11 +2,14 @@
|
||||
import scrapy
|
||||
import re
|
||||
from config_spider.items import Item
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
def get_real_url(response, url):
|
||||
if re.search(r'^https?|^\/\/', url):
|
||||
if re.search(r'^https?', url):
|
||||
return url
|
||||
elif re.search(r'^\/\/', url):
|
||||
u = urlparse(response.url)
|
||||
return u.scheme + url
|
||||
return urljoin(response.url, url)
|
||||
|
||||
class ConfigSpider(scrapy.Spider):
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
<template>
|
||||
<div class="config-list">
|
||||
<!--tour-->
|
||||
<v-tour
|
||||
name="spider-detail-config"
|
||||
:steps="tourSteps"
|
||||
:callbacks="tourCallbacks"
|
||||
:options="$utils.tour.getOptions(true)"
|
||||
/>
|
||||
<!--./tour-->
|
||||
|
||||
<!--preview results-->
|
||||
<el-dialog :visible.sync="dialogVisible"
|
||||
:title="$t('Preview Results')"
|
||||
@@ -57,6 +66,7 @@
|
||||
<li class="item">
|
||||
<label>{{$t('Start URL')}}: </label>
|
||||
<el-input
|
||||
id="start-url"
|
||||
v-model="spiderForm.config.start_url"
|
||||
:placeholder="$t('Start URL')"
|
||||
:class="startUrlClass"
|
||||
@@ -65,6 +75,7 @@
|
||||
<li class="item">
|
||||
<label>{{$t('Start Stage')}}: </label>
|
||||
<el-select
|
||||
id="start-stage"
|
||||
v-model="spiderForm.config.start_stage"
|
||||
:placeholder="$t('Start Stage')"
|
||||
:class="startStageClass"
|
||||
@@ -120,12 +131,12 @@
|
||||
|
||||
<div class="button-group-container">
|
||||
<div class="button-group">
|
||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<el-button id="btn-run" type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<!-- <el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">-->
|
||||
<!-- {{$t('ExtractFields')}}-->
|
||||
<!-- </el-button>-->
|
||||
<!-- <el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>-->
|
||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
<el-button id="btn-save" type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
</div>
|
||||
</div>
|
||||
</el-row>
|
||||
@@ -141,7 +152,7 @@
|
||||
<template slot="title">
|
||||
<ul class="stage-list">
|
||||
<!--actions-->
|
||||
<li class="stage-item" style="min-width: 80px; flex-basis: 80px; justify-content: flex-end"
|
||||
<li class="stage-item actions" style="min-width: 80px; flex-basis: 80px; justify-content: flex-end"
|
||||
@click="$event.stopPropagation()">
|
||||
<i class="action-item el-icon-copy-document" @click="onCopyStage(stage)"></i>
|
||||
<i class="action-item el-icon-remove-outline" @click="onRemoveStage(stage)"></i>
|
||||
@@ -377,6 +388,105 @@ export default {
|
||||
lineNumbers: true,
|
||||
line: true,
|
||||
matchBrackets: true
|
||||
},
|
||||
tourSteps: [
|
||||
// stage
|
||||
{
|
||||
target: '.config-list .el-tabs__nav.is-top',
|
||||
content: this.$t('You can switch to each section of configurable spider.')
|
||||
},
|
||||
{
|
||||
target: '#start-url',
|
||||
content: this.$t('Here is the starting URL of the spider.')
|
||||
},
|
||||
{
|
||||
target: '#start-stage',
|
||||
content: this.$t('Here is the starting stage of the spider.<br><br>A <strong>Stage</strong> is basically a callback in the Scrapy spider.')
|
||||
},
|
||||
{
|
||||
target: '#btn-run',
|
||||
content: this.$t('You can run a spider task.<br><br>Spider will be automatically saved when clicking on this button.')
|
||||
},
|
||||
{
|
||||
target: '.stage-item.actions',
|
||||
content: this.$t('Add/duplicate/delete a stage.'),
|
||||
params: {
|
||||
placement: 'right'
|
||||
}
|
||||
},
|
||||
{
|
||||
target: '.fields-table-view td.action',
|
||||
content: this.$t('Add/duplicate/delete an extract field in the stage.'),
|
||||
params: {
|
||||
placement: 'right'
|
||||
}
|
||||
},
|
||||
{
|
||||
target: '.stage-item:nth-child(3)',
|
||||
content: this.$t('You can decide whether this is a list page.<br><br>Click on the CSS/XPath tag to enter the selector expression for list items.<br>For example, "<code>ul > li</code>"'),
|
||||
params: {
|
||||
placement: 'top'
|
||||
}
|
||||
},
|
||||
{
|
||||
target: '.stage-item:nth-child(4)',
|
||||
content: this.$t('You can decide whether this is a list page with pagination.<br><br>Click on the CSS/XPath tag to enter the selector expression for the pagination.<br>For example, "<code>a.next</code>"'),
|
||||
params: {
|
||||
placement: 'top'
|
||||
}
|
||||
},
|
||||
{
|
||||
target: '.fields-table-view',
|
||||
content: this.$t('You should enter necessary information for all fields in the stage.'),
|
||||
params: {
|
||||
placement: 'top'
|
||||
}
|
||||
},
|
||||
{
|
||||
target: '.fields-table-view tr:nth-child(1) td:nth-child(7)',
|
||||
content: this.$t('If you have multiple stages, e.g. list page + detail page, you should select the next stage in the detail link\'s field.'),
|
||||
params: {
|
||||
placement: 'top'
|
||||
}
|
||||
},
|
||||
// process
|
||||
{
|
||||
target: '#tab-process',
|
||||
content: this.$t('You can view the<br> visualization of the stage<br> workflow.')
|
||||
},
|
||||
// settings
|
||||
{
|
||||
target: '#tab-settings',
|
||||
content: this.$t('You can add the settings here, which will be loaded in the Scrapy\'s <code>settings.py</code> file.<br><br>JSON and Array data are supported.')
|
||||
},
|
||||
// Spiderfile
|
||||
{
|
||||
target: '#tab-spiderfile',
|
||||
content: this.$t('You can edit the <code>Spiderfile</code> here.<br><br>For more information, please refer to the <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">Documentation (Chinese)</a>.')
|
||||
}
|
||||
],
|
||||
tourCallbacks: {
|
||||
onStop: () => {
|
||||
this.$utils.tour.finishTour('spider-detail-config')
|
||||
},
|
||||
onPreviousStep: (currentStep) => {
|
||||
if (currentStep === 10) {
|
||||
this.activeTab = 'stages'
|
||||
} else if (currentStep === 11) {
|
||||
this.activeTab = 'process'
|
||||
} else if (currentStep === 12) {
|
||||
this.activeTab = 'settings'
|
||||
}
|
||||
},
|
||||
onNextStep: (currentStep) => {
|
||||
if (currentStep === 9) {
|
||||
this.activeTab = 'process'
|
||||
} else if (currentStep === 10) {
|
||||
this.activeTab = 'settings'
|
||||
} else if (currentStep === 11) {
|
||||
this.activeTab = 'spiderfile'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -10,7 +10,8 @@ const i18n = new VueI18n({
|
||||
messages: {
|
||||
en,
|
||||
zh
|
||||
}
|
||||
},
|
||||
silentTranslationWarn: true
|
||||
})
|
||||
|
||||
export default i18n
|
||||
|
||||
@@ -434,6 +434,19 @@ docker run -d --restart always --name crawlab_worker \\
|
||||
'You can edit, save, rename<br> and delete the selected file <br>in this box.': '在这个栏位中,您可以<br>编辑、保存、重命名、<br>删除所选择的文件',
|
||||
'Here you can add environment variables that will be passed to the spider program when running a task.': '这里您可以添加环境变量,这些环境变量会被传入运行的爬虫程序中',
|
||||
'You can add, edit and delete schedules (cron jobs) for the spider.': '您可以添加、修改、删除爬虫的定时任务',
|
||||
'You can switch to each section of configurable spider.': '您可以切换到可配置爬虫的每一个部分',
|
||||
'Here is the starting URL of the spider.': '这里是爬虫的起始URL',
|
||||
'Here is the starting stage of the spider.<br><br>A <strong>Stage</strong> is basically a callback in the Scrapy spider.': '这里是爬虫的起始阶段<br><br><strong>阶段</strong>就是 Scrapy 爬虫中的回调函数',
|
||||
'You can run a spider task.<br><br>Spider will be automatically saved when clicking on this button.': '您可以运行爬虫任务<br><br>点击该按钮会自动保存爬虫',
|
||||
'Add/duplicate/delete a stage.': '添加/复制/删除阶段',
|
||||
'Add/duplicate/delete an extract field in the stage.': '添加/复制/删除该阶段下的抓取字段',
|
||||
'You can decide whether this is a list page.<br><br>Click on the CSS/XPath tag to enter the selector expression for list items.<br>For example, "<code>ul > li</code>"': '您可以决定这是否为一个列表页<br><br>点击 CSS/XPath 标签来输入列表元素的选择器表达式<br>例如 "<code>ul > li</code>"',
|
||||
'You can decide whether this is a list page with pagination.<br><br>Click on the CSS/XPath tag to enter the selector expression for the pagination.<br>For example, "<code>a.next</code>"': '您可以决定这是否为一个含分页的列表页<br><br>点击 CSS/XPath 标签来输入分页的选择器表达式<br>例如 "<code>a.next</code>"',
|
||||
'You should enter necessary information for all fields in the stage.': '您应该输入该阶段下所有字段的信息',
|
||||
'If you have multiple stages, e.g. list page + detail page, you should select the next stage in the detail link\'s field.': '如果您有多个阶段,例如列表页+详情页,您应该在详情页链接字段中选择下一个阶段',
|
||||
'You can view the<br> visualization of the stage<br> workflow.': '您可以查看阶段工作流的<br>可视化界面',
|
||||
'You can add the settings here, which will be loaded in the Scrapy\'s <code>settings.py</code> file.<br><br>JSON and Array data are supported.': '您可以在这里添加设置,它们会在 Scrapy 中的 <code>settings.py</code> 中被加载<br><br>JSON 和数组都支持',
|
||||
'You can edit the <code>Spiderfile</code> here.<br><br>For more information, please refer to the <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">Documentation (Chinese)</a>.': '您可以在这里编辑 <code>Spiderfile</code><br><br>更多信息, 请参考 <a href="https://docs.crawlab.cn/Usage/Spider/ConfigurableSpider.html" target="_blank" style="color: #409EFF">文档</a>.',
|
||||
|
||||
// 其他
|
||||
'Star crawlab-team/crawlab on GitHub': '在 GitHub 上为 Crawlab 加星吧'
|
||||
|
||||
@@ -47,35 +47,6 @@ export const constantRouterMap = [
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/nodes',
|
||||
component: Layout,
|
||||
meta: {
|
||||
title: 'Node',
|
||||
icon: 'fa fa-server'
|
||||
},
|
||||
children: [
|
||||
{
|
||||
path: '',
|
||||
name: 'NodeList',
|
||||
component: () => import('../views/node/NodeList'),
|
||||
meta: {
|
||||
title: 'Nodes',
|
||||
icon: 'fa fa-server'
|
||||
}
|
||||
},
|
||||
{
|
||||
path: ':id',
|
||||
name: 'NodeDetail',
|
||||
component: () => import('../views/node/NodeDetail'),
|
||||
meta: {
|
||||
title: 'Node Detail',
|
||||
icon: 'fa fa-circle-o'
|
||||
},
|
||||
hidden: true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/spiders',
|
||||
component: Layout,
|
||||
@@ -155,21 +126,49 @@ export const constantRouterMap = [
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/sites',
|
||||
path: '/nodes',
|
||||
component: Layout,
|
||||
hidden: true,
|
||||
meta: {
|
||||
title: 'Site',
|
||||
icon: 'fa fa-sitemap'
|
||||
title: 'Node',
|
||||
icon: 'fa fa-server'
|
||||
},
|
||||
children: [
|
||||
{
|
||||
path: '',
|
||||
name: 'SiteList',
|
||||
component: () => import('../views/site/SiteList'),
|
||||
name: 'NodeList',
|
||||
component: () => import('../views/node/NodeList'),
|
||||
meta: {
|
||||
title: 'Sites',
|
||||
icon: 'fa fa-sitemap'
|
||||
title: 'Nodes',
|
||||
icon: 'fa fa-server'
|
||||
}
|
||||
},
|
||||
{
|
||||
path: ':id',
|
||||
name: 'NodeDetail',
|
||||
component: () => import('../views/node/NodeDetail'),
|
||||
meta: {
|
||||
title: 'Node Detail',
|
||||
icon: 'fa fa-circle-o'
|
||||
},
|
||||
hidden: true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/disclaimer',
|
||||
component: Layout,
|
||||
meta: {
|
||||
title: 'Disclaimer',
|
||||
icon: 'fa fa-exclamation-triangle'
|
||||
},
|
||||
children: [
|
||||
{
|
||||
path: '',
|
||||
name: 'Disclaimer',
|
||||
component: () => import('../views/doc/Disclaimer'),
|
||||
meta: {
|
||||
title: 'Disclaimer',
|
||||
icon: 'fa fa-exclamation-triangle'
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -193,25 +192,6 @@ export const constantRouterMap = [
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/disclaimer',
|
||||
component: Layout,
|
||||
meta: {
|
||||
title: 'Disclaimer',
|
||||
icon: 'fa fa-exclamation-triangle'
|
||||
},
|
||||
children: [
|
||||
{
|
||||
path: '',
|
||||
name: 'Disclaimer',
|
||||
component: () => import('../views/doc/Disclaimer'),
|
||||
meta: {
|
||||
title: 'Disclaimer',
|
||||
icon: 'fa fa-exclamation-triangle'
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
path: '/setting',
|
||||
component: Layout,
|
||||
|
||||
@@ -184,6 +184,12 @@ export default {
|
||||
setTimeout(() => {
|
||||
this.$refs['config'].update()
|
||||
}, 0)
|
||||
|
||||
if (!this.$utils.tour.isFinishedTour('spider-detail-config')) {
|
||||
setTimeout(() => {
|
||||
this.$tours['spider-detail-config'].start()
|
||||
}, 100)
|
||||
}
|
||||
}
|
||||
this.$st.sendEv('爬虫详情', '切换标签', tab.name)
|
||||
},
|
||||
|
||||
@@ -38,13 +38,15 @@
|
||||
|
||||
<!--table list-->
|
||||
<el-table :data="filteredTableData"
|
||||
ref="table"
|
||||
class="table"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
border
|
||||
row-key="_id"
|
||||
@row-click="onRowClick"
|
||||
@selection-change="onSelectionChange">
|
||||
>
|
||||
<el-table-column type="selection" width="55"/>
|
||||
<el-table-column type="selection" width="55" reserve-selection/>
|
||||
<template v-for="col in columns">
|
||||
<el-table-column v-if="col.name === 'spider_name'"
|
||||
:key="col.name"
|
||||
@@ -240,7 +242,6 @@ export default {
|
||||
},
|
||||
methods: {
|
||||
onSearch (value) {
|
||||
console.log(value)
|
||||
},
|
||||
onRefresh () {
|
||||
this.$store.dispatch('task/getTaskList')
|
||||
@@ -267,6 +268,7 @@ export default {
|
||||
message: '删除任务成功'
|
||||
})
|
||||
this.$store.dispatch('task/getTaskList')
|
||||
this.$refs['table'].clearSelection()
|
||||
return
|
||||
}
|
||||
this.$message({
|
||||
|
||||
@@ -41,6 +41,5 @@ services:
|
||||
- "/opt/crawlab/redis/data:/data"
|
||||
splash: # use Splash to run spiders on dynamic pages
|
||||
image: scrapinghub/splash
|
||||
container_name: splash
|
||||
# ports:
|
||||
# - "8050:8050"
|
||||
|
||||
Reference in New Issue
Block a user