mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
准备可配置爬虫自定义设置变量
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
package entity
|
||||
|
||||
type Field struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Attr string `yaml:"attr" json:"attr"`
|
||||
NextStage string `yaml:"next_stage" json:"next_stage"`
|
||||
Remark string `yaml:"remark" json:"remark"`
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages map[string]Stage `yaml:"stages" json:"stages"`
|
||||
Settings map[string]string `yaml:"settings" json:"settings"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
@@ -20,10 +20,11 @@ type Stage struct {
|
||||
Fields []Field `yaml:"fields" json:"fields"`
|
||||
}
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
StartStage string `yaml:"start_stage" json:"start_stage"`
|
||||
Stages map[string]Stage `yaml:"stages" json:"stages"`
|
||||
type Field struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Attr string `yaml:"attr" json:"attr"`
|
||||
NextStage string `yaml:"next_stage" json:"next_stage"`
|
||||
Remark string `yaml:"remark" json:"remark"`
|
||||
}
|
||||
|
||||
@@ -205,8 +205,8 @@ func PostConfigSpiderSpiderfile(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
// 校验configData
|
||||
if err := services.ValidateSpiderfile(configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
@@ -217,6 +217,12 @@ func PostConfigSpiderSpiderfile(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
|
||||
@@ -219,12 +219,18 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
|
||||
// 环境变量配置
|
||||
envs := s.Envs
|
||||
if s.Type == constants.Configurable {
|
||||
// 数据库配置
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_HOST", Value: viper.GetString("mongo.host")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PORT", Value: viper.GetString("mongo.port")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_DB", Value: viper.GetString("mongo.db")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_USERNAME", Value: viper.GetString("mongo.username")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_PASSWORD", Value: viper.GetString("mongo.password")})
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_MONGO_AUTHSOURCE", Value: viper.GetString("mongo.authSource")})
|
||||
|
||||
// 设置配置
|
||||
for envName, envValue := range s.Config.Settings {
|
||||
envs = append(envs, model.Env{Name: "CRAWLAB_SETTING_" + envName, Value: envValue})
|
||||
}
|
||||
}
|
||||
cmd = SetEnv(cmd, envs, t.Id, s.Col)
|
||||
|
||||
|
||||
@@ -23,3 +23,5 @@ stages:
|
||||
fields:
|
||||
- name: "description"
|
||||
css: "#product_description + p"
|
||||
settings:
|
||||
ROBOTSTXT_OBEY: true
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'config_spider'
|
||||
BOT_NAME = 'Crawlab Configurable Spider'
|
||||
|
||||
SPIDER_MODULES = ['config_spider.spiders']
|
||||
NEWSPIDER_MODULE = 'config_spider.spiders'
|
||||
@@ -88,3 +88,9 @@ ITEM_PIPELINES = {
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
||||
|
||||
for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWALAB_SETTING_')]:
|
||||
setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
|
||||
setting_value = os.environ.get('setting_env_name')
|
||||
locals()[setting_name] = setting_value
|
||||
|
||||
|
||||
@@ -182,6 +182,20 @@
|
||||
</el-tab-pane>
|
||||
<!--./Graph-->
|
||||
|
||||
<!--Setting-->
|
||||
<el-tab-pane name="setting" :label="$t('Setting')">
|
||||
<setting-fields-table-view
|
||||
type="list"
|
||||
:fields="spiderForm.settings"
|
||||
/>
|
||||
<fields-table-view
|
||||
type="list"
|
||||
title="List Page Fields"
|
||||
:fields="stage.fields"
|
||||
/>
|
||||
</el-tab-pane>
|
||||
<!--./Setting-->
|
||||
|
||||
<!--Spiderfile-->
|
||||
<el-tab-pane name="spiderfile" label="Spiderfile">
|
||||
<div class="spiderfile-actions">
|
||||
@@ -209,10 +223,12 @@ import CrawlConfirmDialog from '../Common/CrawlConfirmDialog'
|
||||
import 'codemirror/lib/codemirror.js'
|
||||
import 'codemirror/mode/yaml/yaml.js'
|
||||
import FileDetail from '../File/FileDetail'
|
||||
import SettingFieldsTableView from '../TableView/SettingFieldsTableView'
|
||||
|
||||
export default {
|
||||
name: 'ConfigList',
|
||||
components: {
|
||||
SettingFieldsTableView,
|
||||
FileDetail,
|
||||
CrawlConfirmDialog,
|
||||
FieldsTableView
|
||||
|
||||
349
frontend/src/components/TableView/SettingFieldsTableView.vue
Normal file
349
frontend/src/components/TableView/SettingFieldsTableView.vue
Normal file
@@ -0,0 +1,349 @@
|
||||
<template>
|
||||
<div class="setting-fields-table-view">
|
||||
<!-- <el-row class="button-group-container">-->
|
||||
<!-- <label class="title">{{$t(this.title)}}</label>-->
|
||||
<!-- <div class="button-group">-->
|
||||
<!-- <el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>-->
|
||||
<!-- </div>-->
|
||||
<!-- </el-row>-->
|
||||
<el-row>
|
||||
<el-table :data="fields"
|
||||
class="table edit"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
:cell-style="getCellClassStyle"
|
||||
>
|
||||
<el-table-column class-name="action" width="80px" align="right">
|
||||
<template slot-scope="scope">
|
||||
<i class="action-item el-icon-copy-document" @click="onCopyField(scope.row)"></i>
|
||||
<i class="action-item el-icon-remove-outline" @click="onRemoveField(scope.row)"></i>
|
||||
<i class="action-item el-icon-circle-plus-outline" @click="onAddField(scope.row)"></i>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Field Name')" width="150px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.name"
|
||||
:placeholder="$t('Field Name')"
|
||||
@change="onNameChange(scope.row)"
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Selector Type')" width="150px" align="center" class-name="selector-type">
|
||||
<template slot-scope="scope">
|
||||
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'css')">
|
||||
<el-tag
|
||||
:class="scope.row.css ? 'active' : 'inactive'"
|
||||
type="success"
|
||||
>
|
||||
CSS
|
||||
</el-tag>
|
||||
</span>
|
||||
<span class="button-selector-item" @click="onClickSelectorType(scope.row, 'xpath')">
|
||||
<el-tag
|
||||
:class="scope.row.xpath ? 'active' : 'inactive'"
|
||||
type="primary"
|
||||
>
|
||||
XPath
|
||||
</el-tag>
|
||||
</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Selector')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="scope.row.css">
|
||||
<el-input v-model="scope.row.css" :placeholder="$t('CSS / XPath')"></el-input>
|
||||
</template>
|
||||
<template v-else>
|
||||
<el-input v-model="scope.row.xpath" :placeholder="$t('CSS / XPath')"></el-input>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Is Attribute')" width="150px" align="center">
|
||||
<template slot-scope="scope">
|
||||
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, false)">
|
||||
<el-tag
|
||||
:class="!scope.row.attr ? 'active' : 'inactive'"
|
||||
type="success"
|
||||
>
|
||||
{{$t('Text')}}
|
||||
</el-tag>
|
||||
</span>
|
||||
<span class="button-selector-item" @click="onClickIsAttribute(scope.row, true)">
|
||||
<el-tag
|
||||
:class="scope.row.attr ? 'active' : 'inactive'"
|
||||
type="primary"
|
||||
>
|
||||
{{$t('Attribute')}}
|
||||
</el-tag>
|
||||
</span>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Attribute')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="scope.row.attr">
|
||||
<el-input v-model="scope.row.attr" :placeholder="$t('Attribute')"/>
|
||||
</template>
|
||||
<template v-else>
|
||||
<span style="margin-left: 15px; color: lightgrey">
|
||||
N/A
|
||||
</span>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Next Stage')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<el-select
|
||||
v-model="scope.row.next_stage"
|
||||
:class="!scope.row.next_stage ? 'disabled' : ''"
|
||||
@change="onChangeNextStage(scope.row)"
|
||||
>
|
||||
<el-option :label="$t('No Next Stage')" value=""/>
|
||||
<el-option v-for="n in stageNames" :key="n" :label="n" :value="n"/>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Remark')" width="auto" min-width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.remark" :placeholder="$t('Remark')"/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-row>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
|
||||
export default {
|
||||
name: 'SettingFieldsTableView',
|
||||
props: {
|
||||
type: {
|
||||
type: String,
|
||||
default: 'list'
|
||||
},
|
||||
title: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
stageNames: {
|
||||
type: Array,
|
||||
default () {
|
||||
return []
|
||||
}
|
||||
},
|
||||
fields: {
|
||||
type: Array,
|
||||
default () {
|
||||
return []
|
||||
}
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
])
|
||||
},
|
||||
methods: {
|
||||
addField () {
|
||||
this.fields.push({
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '添加字段')
|
||||
},
|
||||
deleteField (index) {
|
||||
this.fields.splice(index, 1)
|
||||
this.$st.sendEv('爬虫详情-配置', '删除字段')
|
||||
},
|
||||
onNameChange (row) {
|
||||
if (this.fields.filter(d => d.name === row.name).length > 1) {
|
||||
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
|
||||
}
|
||||
this.$st.sendEv('爬虫详情-配置', '更改字段')
|
||||
},
|
||||
onCheck (row) {
|
||||
this.fields.forEach(d => {
|
||||
if (row.name !== d.name) {
|
||||
this.$set(d, 'is_detail', false)
|
||||
}
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '设置详情页URL')
|
||||
},
|
||||
onClickSelectorType (row, selectorType) {
|
||||
if (selectorType === 'css') {
|
||||
if (row.xpath) this.$set(row, 'xpath', '')
|
||||
if (!row.css) this.$set(row, 'css', 'body')
|
||||
} else {
|
||||
if (row.css) this.$set(row, 'css', '')
|
||||
if (!row.xpath) this.$set(row, 'xpath', '//body')
|
||||
}
|
||||
},
|
||||
onClickIsAttribute (row, isAttribute) {
|
||||
if (!isAttribute) {
|
||||
// 文本
|
||||
if (row.attr) this.$set(row, 'attr', '')
|
||||
} else {
|
||||
// 属性
|
||||
if (!row.attr) this.$set(row, 'attr', 'href')
|
||||
}
|
||||
},
|
||||
onCopyField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i, 0, JSON.parse(JSON.stringify(row)))
|
||||
break
|
||||
}
|
||||
}
|
||||
},
|
||||
onRemoveField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i, 1)
|
||||
break
|
||||
}
|
||||
}
|
||||
if (this.fields.length === 0) {
|
||||
this.fields.push({
|
||||
css: 'body',
|
||||
next_stage: ''
|
||||
})
|
||||
}
|
||||
},
|
||||
onAddField (row) {
|
||||
for (let i = 0; i < this.fields.length; i++) {
|
||||
if (row.name === this.fields[i].name) {
|
||||
this.fields.splice(i + 1, 0, {
|
||||
name: `field_${Math.floor(new Date().getTime()).toString()}`,
|
||||
css: 'body',
|
||||
next_stage: ''
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
},
|
||||
getCellClassStyle ({ row, columnIndex }) {
|
||||
if (columnIndex === 1) {
|
||||
// 字段名称
|
||||
if (!row.name) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
} else if (columnIndex === 3) {
|
||||
// 选择器
|
||||
if (!row.css && !row.xpath) {
|
||||
return {
|
||||
'border': '1px solid red'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
onChangeNextStage (row) {
|
||||
this.fields.forEach(f => {
|
||||
if (f.name !== row.name) {
|
||||
this.$set(f, 'next_stage', '')
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.el-table.edit >>> .el-table__body td {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-table__body td .cell {
|
||||
padding: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner {
|
||||
height: 36px;
|
||||
border: none;
|
||||
border-radius: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select .el-input .el-select__caret {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .button-selector-item {
|
||||
cursor: pointer;
|
||||
margin: 0 5px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-tag.inactive {
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action {
|
||||
background: none !important;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr {
|
||||
border: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr th {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td:nth-child(2) {
|
||||
border-left: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit >>> tr td {
|
||||
border-right: 1px solid rgb(220, 223, 230);
|
||||
}
|
||||
|
||||
.el-table.edit::before {
|
||||
background: none;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item {
|
||||
font-size: 14px;
|
||||
margin-right: 5px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .action-item:last-child {
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.button-group-container {
|
||||
/*display: inline-block;*/
|
||||
/*width: 100%;*/
|
||||
}
|
||||
|
||||
.button-group-container .title {
|
||||
float: left;
|
||||
line-height: 32px;
|
||||
}
|
||||
|
||||
.button-group-container .button-group {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.action-button-group {
|
||||
display: flex;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.action-button-group >>> .el-checkbox__label {
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select.disabled .el-input__inner {
|
||||
color: lightgrey;
|
||||
}
|
||||
</style>
|
||||
Reference in New Issue
Block a user