mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
updated configurable spider
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
aiohttp==3.5.4
|
||||
amqp==2.4.2
|
||||
aniso8601==6.0.0
|
||||
Appium-Python-Client==0.40
|
||||
APScheduler==3.6.0
|
||||
asn1crypto==0.24.0
|
||||
async-timeout==3.0.1
|
||||
@@ -26,6 +27,8 @@ Flask-Cors==3.0.7
|
||||
Flask-RESTful==0.3.7
|
||||
flask-restplus==0.12.1
|
||||
flower==0.9.3
|
||||
gevent==1.4.0
|
||||
greenlet==0.4.15
|
||||
gunicorn==19.9.0
|
||||
html5lib==1.0.1
|
||||
humanfriendly==4.18
|
||||
@@ -55,6 +58,8 @@ python-dateutil==2.8.0
|
||||
pytz==2018.9
|
||||
queuelib==1.5.0
|
||||
redis==3.2.1
|
||||
redisbeat==1.1.4
|
||||
reppy==0.4.12
|
||||
requests==2.21.0
|
||||
Scrapy==1.6.0
|
||||
selenium==3.141.0
|
||||
|
||||
@@ -5,6 +5,7 @@ import subprocess
|
||||
from datetime import datetime
|
||||
from random import random
|
||||
|
||||
import gevent
|
||||
import requests
|
||||
from bson import ObjectId
|
||||
from flask import current_app, request
|
||||
@@ -23,7 +24,8 @@ from tasks.spider import execute_spider, execute_config_spider
|
||||
from utils import jsonify
|
||||
from utils.deploy import zip_file, unzip_file
|
||||
from utils.file import get_file_suffix_stats, get_file_suffix
|
||||
from utils.spider import get_lang_by_stats, get_last_n_run_errors_count, get_last_n_day_tasks_count
|
||||
from utils.spider import get_lang_by_stats, get_last_n_run_errors_count, get_last_n_day_tasks_count, get_list_page_data, \
|
||||
get_detail_page_data
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument('file', type=FileStorage, location='files')
|
||||
@@ -71,9 +73,12 @@ class SpiderApi(BaseApi):
|
||||
# Configurable Spider
|
||||
########################
|
||||
|
||||
# spider crawl fields
|
||||
# spider crawl fields for list page
|
||||
('fields', str),
|
||||
|
||||
# spider crawl fields for detail page
|
||||
('detail_fields', str),
|
||||
|
||||
# spider crawl type
|
||||
('crawl_type', str),
|
||||
|
||||
@@ -442,13 +447,22 @@ class SpiderApi(BaseApi):
|
||||
|
||||
def update_fields(self, id: str):
|
||||
"""
|
||||
Update fields variables for configurable spiders
|
||||
Update list page fields variables for configurable spiders
|
||||
:param id: spider_id
|
||||
"""
|
||||
args = self.parser.parse_args()
|
||||
fields = json.loads(args.fields)
|
||||
db_manager.update_one(col_name='spiders', id=id, values={'fields': fields})
|
||||
|
||||
def update_detail_fields(self, id: str):
|
||||
"""
|
||||
Update detail page fields variables for configurable spiders
|
||||
:param id: spider_id
|
||||
"""
|
||||
args = self.parser.parse_args()
|
||||
detail_fields = json.loads(args.detail_fields)
|
||||
db_manager.update_one(col_name='spiders', id=id, values={'detail_fields': detail_fields})
|
||||
|
||||
def preview_crawl(self, id: str):
|
||||
spider = db_manager.get(col_name='spiders', id=id)
|
||||
|
||||
@@ -489,25 +503,8 @@ class SpiderApi(BaseApi):
|
||||
'error': 'item_selector should not be empty'
|
||||
}, 400
|
||||
|
||||
# TODO: enable xpath
|
||||
data = []
|
||||
items = sel.cssselect(spider['item_selector'])
|
||||
for item in items:
|
||||
row = {}
|
||||
for f in spider['fields']:
|
||||
if f['type'] == QueryType.CSS:
|
||||
# css selector
|
||||
res = item.cssselect(f['query'])
|
||||
else:
|
||||
# xpath
|
||||
res = item.xpath(f['query'])
|
||||
data = get_list_page_data(spider, sel)[:10]
|
||||
|
||||
if len(res) > 0:
|
||||
if f['extract_type'] == ExtractType.TEXT:
|
||||
row[f['name']] = res[0].text
|
||||
else:
|
||||
row[f['name']] = res[0].get(f['attribute'])
|
||||
data.append(row)
|
||||
return {
|
||||
'status': 'ok',
|
||||
'items': data
|
||||
@@ -517,7 +514,23 @@ class SpiderApi(BaseApi):
|
||||
pass
|
||||
|
||||
elif spider['crawl_type'] == CrawlType.LIST_DETAIL:
|
||||
pass
|
||||
data = get_list_page_data(spider, sel)[:10]
|
||||
|
||||
ev_list = []
|
||||
for idx, d in enumerate(data):
|
||||
for f in spider['fields']:
|
||||
if f.get('is_detail'):
|
||||
url = d.get(f['name'])
|
||||
if url is not None:
|
||||
ev_list.append(gevent.spawn(get_detail_page_data, url, spider, idx, data))
|
||||
break
|
||||
|
||||
gevent.joinall(ev_list)
|
||||
|
||||
return {
|
||||
'status': 'ok',
|
||||
'items': data
|
||||
}
|
||||
|
||||
|
||||
class SpiderImportApi(Resource):
|
||||
|
||||
@@ -11,6 +11,15 @@ from spiders.db import spider
|
||||
|
||||
|
||||
class SpidersItem(scrapy.Item):
|
||||
fields = {f['name']: scrapy.Field() for f in spider['fields']}
|
||||
if spider['crawl_type'] == 'list':
|
||||
fields = {f['name']: scrapy.Field() for f in spider['fields']}
|
||||
elif spider['crawl_type'] == 'detail':
|
||||
fields = {f['name']: scrapy.Field() for f in spider['detail_fields']}
|
||||
elif spider['crawl_type'] == 'list-detail':
|
||||
fields = {f['name']: scrapy.Field() for f in (spider['fields'] + spider['detail_fields'])}
|
||||
else:
|
||||
fields = {}
|
||||
|
||||
# basic fields
|
||||
fields['_id'] = scrapy.Field()
|
||||
fields['task_id'] = scrapy.Field()
|
||||
|
||||
@@ -7,57 +7,110 @@ from spiders.db import spider
|
||||
from spiders.items import SpidersItem
|
||||
|
||||
|
||||
class NormalSpiderSpider(scrapy.Spider):
|
||||
def get_detail_url(item):
|
||||
for f in spider['fields']:
|
||||
if f.get('is_detail'):
|
||||
return item.get(f['name'])
|
||||
return None
|
||||
|
||||
|
||||
def get_spiders_item(sel, fields, item=None):
|
||||
if item is None:
|
||||
item = SpidersItem()
|
||||
|
||||
for f in fields:
|
||||
if f['type'] == 'xpath':
|
||||
# xpath selector
|
||||
if f['extract_type'] == 'text':
|
||||
# text content
|
||||
query = f['query'] + '/text()'
|
||||
else:
|
||||
# attribute
|
||||
attribute = f["attribute"]
|
||||
query = f['query'] + f'/@("{attribute}")'
|
||||
item[f['name']] = sel.xpath(query).extract_first()
|
||||
|
||||
else:
|
||||
# css selector
|
||||
if f['extract_type'] == 'text':
|
||||
# text content
|
||||
query = f['query'] + '::text'
|
||||
else:
|
||||
# attribute
|
||||
attribute = f["attribute"]
|
||||
query = f['query'] + f'::attr("{attribute}")'
|
||||
item[f['name']] = sel.css(query).extract_first()
|
||||
|
||||
return item
|
||||
|
||||
|
||||
def get_list_items(response):
|
||||
if spider['item_selector_type'] == 'xpath':
|
||||
# xpath selector
|
||||
items = response.xpath(spider['item_selector'])
|
||||
else:
|
||||
# css selector
|
||||
items = response.css(spider['item_selector'])
|
||||
return items
|
||||
|
||||
|
||||
def get_next_url(response):
|
||||
# pagination
|
||||
if spider.get('pagination_selector') is not None:
|
||||
if spider['pagination_selector_type'] == 'xpath':
|
||||
# xpath selector
|
||||
next_url = response.xpath(spider['pagination_selector'] + '/@href').extract_first()
|
||||
else:
|
||||
# css selector
|
||||
next_url = response.css(spider['pagination_selector'] + '::attr("href")').extract_first()
|
||||
|
||||
# found next url
|
||||
if next_url is not None:
|
||||
if not next_url.startswith('http') and not next_url.startswith('//'):
|
||||
u = urlparse(response.url)
|
||||
next_url = f'{u.scheme}://{u.netloc}{next_url}'
|
||||
return next_url
|
||||
return None
|
||||
|
||||
|
||||
class ConfigSpiderSpider(scrapy.Spider):
|
||||
name = 'config_spider'
|
||||
# allowed_domains = []
|
||||
start_urls = [spider['start_url']]
|
||||
|
||||
def parse(self, response):
|
||||
if spider['item_selector_type'] == 'xpath':
|
||||
# xpath selector
|
||||
items = response.xpath(spider['item_selector'])
|
||||
else:
|
||||
# css selector
|
||||
items = response.css(spider['item_selector'])
|
||||
for _item in items:
|
||||
item = SpidersItem()
|
||||
for f in spider['fields']:
|
||||
if f['type'] == 'xpath':
|
||||
# xpath selector
|
||||
if f['extract_type'] == 'text':
|
||||
# text content
|
||||
query = f['query'] + '/text()'
|
||||
else:
|
||||
# attribute
|
||||
attribute = f["attribute"]
|
||||
query = f['query'] + f'/@("{attribute}")'
|
||||
item[f['name']] = _item.xpath(query).extract_first()
|
||||
|
||||
else:
|
||||
# css selector
|
||||
if f['extract_type'] == 'text':
|
||||
# text content
|
||||
query = f['query'] + '::text'
|
||||
else:
|
||||
# attribute
|
||||
attribute = f["attribute"]
|
||||
query = f['query'] + f'::attr("{attribute}")'
|
||||
item[f['name']] = _item.css(query).extract_first()
|
||||
|
||||
if spider['crawl_type'] == 'list':
|
||||
items = get_list_items(response)
|
||||
# list page only
|
||||
for _item in items:
|
||||
item = get_spiders_item(sel=_item, fields=spider['fields'])
|
||||
yield item
|
||||
|
||||
# pagination
|
||||
if spider.get('pagination_selector') is not None:
|
||||
if spider['pagination_selector_type'] == 'xpath':
|
||||
# xpath selector
|
||||
next_url = response.xpath(spider['pagination_selector'] + '/@href').extract_first()
|
||||
else:
|
||||
# css selector
|
||||
next_url = response.css(spider['pagination_selector'] + '::attr("href")').extract_first()
|
||||
|
||||
# found next url
|
||||
next_url = get_next_url(response)
|
||||
if next_url is not None:
|
||||
if not next_url.startswith('http') and not next_url.startswith('//'):
|
||||
u = urlparse(response.url)
|
||||
next_url = f'{u.scheme}://{u.netloc}{next_url}'
|
||||
yield scrapy.Request(url=next_url)
|
||||
|
||||
elif spider['crawl_type'] == 'detail':
|
||||
# TODO: detail page onlny
|
||||
# detail page only
|
||||
pass
|
||||
|
||||
elif spider['crawl_type'] == 'list-detail':
|
||||
# list page + detail page
|
||||
items = get_list_items(response)
|
||||
for _item in items:
|
||||
item = get_spiders_item(sel=_item, fields=spider['fields'])
|
||||
detail_url = get_detail_url(item)
|
||||
if detail_url is not None:
|
||||
yield scrapy.Request(url=detail_url,
|
||||
callback=self.parse_detail,
|
||||
meta={
|
||||
'item': item
|
||||
})
|
||||
next_url = get_next_url(response)
|
||||
if next_url is not None:
|
||||
yield scrapy.Request(url=next_url)
|
||||
|
||||
def parse_detail(self, response):
|
||||
item = get_spiders_item(sel=response, fields=spider['detail_fields'], item=response.meta['item'])
|
||||
yield item
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import os
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from bson import ObjectId
|
||||
from lxml import etree
|
||||
|
||||
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
|
||||
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType, QueryType, ExtractType
|
||||
from constants.task import TaskStatus
|
||||
from db.manager import db_manager
|
||||
|
||||
@@ -69,3 +71,53 @@ def get_last_n_day_tasks_count(spider_id: ObjectId, n: int) -> list:
|
||||
'$gte': (datetime.now() - timedelta(n))
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
def get_list_page_data(spider, sel):
|
||||
data = []
|
||||
if spider['item_selector_type'] == QueryType.XPATH:
|
||||
items = sel.xpath(spider['item_selector'])
|
||||
else:
|
||||
items = sel.cssselect(spider['item_selector'])
|
||||
for item in items:
|
||||
row = {}
|
||||
for f in spider['fields']:
|
||||
if f['type'] == QueryType.CSS:
|
||||
# css selector
|
||||
res = item.cssselect(f['query'])
|
||||
else:
|
||||
# xpath
|
||||
res = item.xpath(f['query'])
|
||||
|
||||
if len(res) > 0:
|
||||
if f['extract_type'] == ExtractType.TEXT:
|
||||
row[f['name']] = res[0].text
|
||||
else:
|
||||
row[f['name']] = res[0].get(f['attribute'])
|
||||
data.append(row)
|
||||
return data
|
||||
|
||||
|
||||
def get_detail_page_data(url, spider, idx, data):
|
||||
r = requests.get(url)
|
||||
|
||||
sel = etree.HTML(r.content)
|
||||
|
||||
row = {}
|
||||
for f in spider['detail_fields']:
|
||||
if f['type'] == QueryType.CSS:
|
||||
# css selector
|
||||
res = sel.cssselect(f['query'])
|
||||
else:
|
||||
# xpath
|
||||
res = sel.xpath(f['query'])
|
||||
|
||||
if len(res) > 0:
|
||||
if f['extract_type'] == ExtractType.TEXT:
|
||||
row[f['name']] = res[0].text
|
||||
else:
|
||||
row[f['name']] = res[0].get(f['attribute'])
|
||||
|
||||
# assign values
|
||||
for k, v in row.items():
|
||||
data[idx][k] = v
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<el-table :data="previewCrawlData"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
border>
|
||||
<el-table-column v-for="(f, index) in spiderForm.fields"
|
||||
<el-table-column v-for="(f, index) in fields"
|
||||
:label="f.name"
|
||||
:key="index"
|
||||
min-width="100px">
|
||||
@@ -20,8 +20,9 @@
|
||||
</el-dialog>
|
||||
<!--./preview results-->
|
||||
|
||||
<!--config detail-->
|
||||
<el-row>
|
||||
<el-col :span="11" offset="1">
|
||||
<el-col :span="11" :offset="1">
|
||||
<el-form label-width="150px">
|
||||
<el-form-item :label="$t('Crawl Type')">
|
||||
<el-button-group>
|
||||
@@ -72,76 +73,40 @@
|
||||
</el-form>
|
||||
</el-col>
|
||||
</el-row>
|
||||
<!--./config detail-->
|
||||
|
||||
<!--button group-->
|
||||
<el-row style="margin-top: 10px">
|
||||
<div class="button-group-wrapper">
|
||||
<div class="button-group">
|
||||
<el-button type="primary" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>
|
||||
</div>
|
||||
<div class="button-group">
|
||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
</div>
|
||||
<el-row class="button-group-container">
|
||||
<div class="button-group">
|
||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
</div>
|
||||
</el-row>
|
||||
<!--./button group-->
|
||||
|
||||
<!--field list-->
|
||||
<el-row style="margin-top: 10px;">
|
||||
<el-table :data="spiderForm.fields"
|
||||
class="table edit"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
border>
|
||||
<el-table-column :label="$t('Field Name')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.name" :placeholder="$t('Field Name')"></el-input>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query Type')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.type" :placeholder="$t('Query Type')">
|
||||
<el-option value="css" :label="$t('CSS Selector')"></el-option>
|
||||
<el-option value="xpath" :label="$t('XPath')"></el-option>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.query" :placeholder="$t('Query')"></el-input>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Extract Type')" width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.extract_type" :placeholder="$t('Extract Type')">
|
||||
<el-option value="text" :label="$t('Text')"></el-option>
|
||||
<el-option value="attribute" :label="$t('Attribute')"></el-option>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Attribute')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="scope.row.extract_type === 'attribute'">
|
||||
<el-input v-model="scope.row.attribute"
|
||||
:placeholder="$t('Attribute')">
|
||||
</el-input>
|
||||
</template>
|
||||
<template v-else>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Action')" fixed="right">
|
||||
<template slot-scope="scope">
|
||||
<div class="action-button-group">
|
||||
<el-button size="mini" icon="el-icon-delete" type="danger"
|
||||
@click="deleteField(scope.$index)"></el-button>
|
||||
</div>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
<!--list field list-->
|
||||
<el-row v-if="['list','list-detail'].includes(spiderForm.crawl_type)"
|
||||
class="list-fields-container">
|
||||
<fields-table-view
|
||||
type="list"
|
||||
title="List Page Fields"
|
||||
:fields="spiderForm.fields"
|
||||
/>
|
||||
</el-row>
|
||||
<!--./field list-->
|
||||
<!--./list field list-->
|
||||
|
||||
<!--detail field list-->
|
||||
<el-row v-if="['detail','list-detail'].includes(spiderForm.crawl_type)"
|
||||
class="detail-fields-container"
|
||||
style="margin-top: 10px;">
|
||||
<fields-table-view
|
||||
type="detail"
|
||||
title="Detail Page Fields"
|
||||
:fields="spiderForm.detail_fields"
|
||||
/>
|
||||
</el-row>
|
||||
<!--./detail field list-->
|
||||
</div>
|
||||
</template>
|
||||
|
||||
@@ -149,9 +114,11 @@
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
import FieldsTableView from '../TableView/FieldsTableView'
|
||||
|
||||
export default {
|
||||
name: 'ConfigList',
|
||||
components: { FieldsTableView },
|
||||
data () {
|
||||
return {
|
||||
crawlTypeList: [
|
||||
@@ -168,18 +135,20 @@ export default {
|
||||
...mapState('spider', [
|
||||
'spiderForm',
|
||||
'previewCrawlData'
|
||||
])
|
||||
]),
|
||||
fields () {
|
||||
if (this.spiderForm.crawl_type === 'list') {
|
||||
return this.spiderForm.fields
|
||||
} else if (this.spiderForm.crawl_type === 'detail') {
|
||||
return this.spiderForm.detail_fields
|
||||
} else if (this.spiderForm.crawl_type === 'list-detail') {
|
||||
return this.spiderForm.fields.concat(this.spiderForm.detail_fields)
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
addField () {
|
||||
this.spiderForm.fields.push({
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
},
|
||||
deleteField (index) {
|
||||
this.spiderForm.fields.splice(index, 1)
|
||||
},
|
||||
onSelectCrawlType (value) {
|
||||
this.spiderForm.crawl_type = value
|
||||
},
|
||||
@@ -201,6 +170,9 @@ export default {
|
||||
this.saveLoading = false
|
||||
})
|
||||
})
|
||||
.then(() => {
|
||||
this.$store.dispatch('spider/updateSpiderDetailFields')
|
||||
})
|
||||
.catch(() => {
|
||||
this.$message.error(this.$t('Something wrong happened'))
|
||||
this.saveLoading = false
|
||||
@@ -241,6 +213,7 @@ export default {
|
||||
}
|
||||
},
|
||||
created () {
|
||||
// fields for list page
|
||||
if (!this.spiderForm.fields) {
|
||||
this.spiderForm.fields = []
|
||||
for (let i = 0; i < 3; i++) {
|
||||
@@ -251,6 +224,19 @@ export default {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// fields for detail page
|
||||
if (!this.spiderForm.detail_fields) {
|
||||
this.spiderForm.detail_fields = []
|
||||
for (let i = 0; i < 3; i++) {
|
||||
this.spiderForm.detail_fields.push({
|
||||
name: `field_${i + 1}`,
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.spiderForm.crawl_type) this.$set(this.spiderForm, 'crawl_type', 'list')
|
||||
if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
|
||||
if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
|
||||
@@ -261,43 +247,29 @@ export default {
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.el-table {
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-table__body td {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-table__body td .cell {
|
||||
padding: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner {
|
||||
height: 36px;
|
||||
border: none;
|
||||
border-radius: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select .el-input .el-select__caret {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.button-group-wrapper {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
.button-group-container {
|
||||
margin-top: 10px;
|
||||
border-bottom: 1px dashed #dcdfe6;
|
||||
padding-bottom: 20px;
|
||||
}
|
||||
|
||||
.button-group {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.action-button-group {
|
||||
margin-left: 10px;
|
||||
.list-fields-container {
|
||||
margin-top: 20px;
|
||||
border-bottom: 1px dashed #dcdfe6;
|
||||
padding-bottom: 20px;
|
||||
}
|
||||
|
||||
.detail-fields-container {
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
.title {
|
||||
color: #606266;
|
||||
font-size: 14px;
|
||||
}
|
||||
</style>
|
||||
|
||||
179
frontend/src/components/TableView/FieldsTableView.vue
Normal file
179
frontend/src/components/TableView/FieldsTableView.vue
Normal file
@@ -0,0 +1,179 @@
|
||||
<template>
|
||||
<div class="fields-table-view">
|
||||
<el-row class="button-group-container">
|
||||
<label class="title">{{$t(this.title)}}</label>
|
||||
<div class="button-group">
|
||||
<el-button type="primary" size="small" @click="addField" icon="el-icon-plus">{{$t('Add Field')}}</el-button>
|
||||
</div>
|
||||
</el-row>
|
||||
<el-row>
|
||||
<el-table :data="fields"
|
||||
class="table edit"
|
||||
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
|
||||
border>
|
||||
<el-table-column v-if="type === 'list' && spiderForm.crawl_type === 'list-detail'"
|
||||
:label="$t('Detail Page URL')"
|
||||
align="center">
|
||||
<template slot-scope="scope">
|
||||
<el-checkbox v-model="scope.row.is_detail"
|
||||
@change="onCheck(scope.row)">
|
||||
</el-checkbox>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Field Name')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.name" :placeholder="$t('Field Name')"
|
||||
@change="onNameChange(scope.row)"></el-input>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query Type')" width="200px">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.type" :placeholder="$t('Query Type')">
|
||||
<el-option value="css" :label="$t('CSS Selector')"></el-option>
|
||||
<el-option value="xpath" :label="$t('XPath')"></el-option>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Query')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<el-input v-model="scope.row.query" :placeholder="$t('Query')"></el-input>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Extract Type')" width="120px">
|
||||
<template slot-scope="scope">
|
||||
<el-select v-model="scope.row.extract_type" :placeholder="$t('Extract Type')">
|
||||
<el-option value="text" :label="$t('Text')"></el-option>
|
||||
<el-option value="attribute" :label="$t('Attribute')"></el-option>
|
||||
</el-select>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Attribute')" width="250px">
|
||||
<template slot-scope="scope">
|
||||
<template v-if="scope.row.extract_type === 'attribute'">
|
||||
<el-input v-model="scope.row.attribute"
|
||||
:placeholder="$t('Attribute')">
|
||||
</el-input>
|
||||
</template>
|
||||
<template v-else>
|
||||
</template>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column :label="$t('Action')" fixed="right" min-width="100px">
|
||||
<template slot-scope="scope">
|
||||
<div class="action-button-group">
|
||||
<el-button size="mini"
|
||||
style="margin-left:10px"
|
||||
icon="el-icon-delete"
|
||||
type="danger"
|
||||
@click="deleteField(scope.$index)">
|
||||
</el-button>
|
||||
</div>
|
||||
</template>
|
||||
</el-table-column>
|
||||
</el-table>
|
||||
</el-row>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
|
||||
export default {
|
||||
name: 'FieldsTableView',
|
||||
props: {
|
||||
type: {
|
||||
type: String,
|
||||
default: 'list'
|
||||
},
|
||||
title: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
fields: {
|
||||
type: Array,
|
||||
default () {
|
||||
return []
|
||||
}
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
])
|
||||
},
|
||||
methods: {
|
||||
addField () {
|
||||
this.fields.push({
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
},
|
||||
deleteField (index) {
|
||||
this.fields.splice(index, 1)
|
||||
},
|
||||
onNameChange (row) {
|
||||
if (this.fields.filter(d => d.name === row.name).length > 1) {
|
||||
this.$message.error(this.$t(`Duplicated field names for ${row.name}`))
|
||||
}
|
||||
},
|
||||
onCheck (row) {
|
||||
this.fields.forEach(d => {
|
||||
if (row.name !== d.name) {
|
||||
this.$set(d, 'is_detail', false)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.el-table.edit >>> .el-table__body td {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-table__body td .cell {
|
||||
padding: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-input__inner {
|
||||
height: 36px;
|
||||
border: none;
|
||||
border-radius: 0;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.el-table.edit >>> .el-select .el-input .el-select__caret {
|
||||
line-height: 36px;
|
||||
}
|
||||
|
||||
.button-group-container {
|
||||
/*display: inline-block;*/
|
||||
/*width: 100%;*/
|
||||
}
|
||||
|
||||
.button-group-container .title {
|
||||
float: left;
|
||||
line-height: 32px;
|
||||
}
|
||||
|
||||
.button-group-container .button-group {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.action-button-group {
|
||||
display: flex;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.action-button-group >>> .el-checkbox__label {
|
||||
font-size: 12px;
|
||||
}
|
||||
</style>
|
||||
@@ -128,6 +128,9 @@ export default {
|
||||
'Pagination Selector Type': '分页项选择器类别',
|
||||
'Preview Results': '预览结果',
|
||||
'Obey robots.txt': '遵守Robots协议',
|
||||
'List Page Fields': '列表页字段',
|
||||
'Detail Page Fields': '详情页字段',
|
||||
'Detail Page URL': '详情页URL',
|
||||
|
||||
// 爬虫列表
|
||||
'Name': '名称',
|
||||
|
||||
Reference in New Issue
Block a user