mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-31 18:10:50 +01:00
@@ -50,6 +50,6 @@ MONGO_PORT = 27017
|
|||||||
MONGO_DB = 'crawlab_test'
|
MONGO_DB = 'crawlab_test'
|
||||||
|
|
||||||
# Flask 变量
|
# Flask 变量
|
||||||
DEBUG = True
|
DEBUG = False
|
||||||
FLASK_HOST = '127.0.0.1'
|
FLASK_HOST = '127.0.0.1'
|
||||||
FLASK_PORT = 8000
|
FLASK_PORT = 8000
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import shutil
|
|||||||
import subprocess
|
import subprocess
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from random import random
|
from random import random
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import gevent
|
import gevent
|
||||||
import requests
|
import requests
|
||||||
@@ -25,7 +26,7 @@ from utils import jsonify
|
|||||||
from utils.deploy import zip_file, unzip_file
|
from utils.deploy import zip_file, unzip_file
|
||||||
from utils.file import get_file_suffix_stats, get_file_suffix
|
from utils.file import get_file_suffix_stats, get_file_suffix
|
||||||
from utils.spider import get_lang_by_stats, get_last_n_run_errors_count, get_last_n_day_tasks_count, get_list_page_data, \
|
from utils.spider import get_lang_by_stats, get_last_n_run_errors_count, get_last_n_day_tasks_count, get_list_page_data, \
|
||||||
get_detail_page_data
|
get_detail_page_data, generate_urls
|
||||||
|
|
||||||
parser = reqparse.RequestParser()
|
parser = reqparse.RequestParser()
|
||||||
parser.add_argument('file', type=FileStorage, location='files')
|
parser.add_argument('file', type=FileStorage, location='files')
|
||||||
@@ -85,6 +86,9 @@ class SpiderApi(BaseApi):
|
|||||||
# spider start url
|
# spider start url
|
||||||
('start_url', str),
|
('start_url', str),
|
||||||
|
|
||||||
|
# url pattern: support generation of urls with patterns
|
||||||
|
('url_pattern', str),
|
||||||
|
|
||||||
# spider item selector
|
# spider item selector
|
||||||
('item_selector', str),
|
('item_selector', str),
|
||||||
|
|
||||||
@@ -98,7 +102,7 @@ class SpiderApi(BaseApi):
|
|||||||
('pagination_selector_type', str),
|
('pagination_selector_type', str),
|
||||||
|
|
||||||
# whether to obey robots.txt
|
# whether to obey robots.txt
|
||||||
('obey_robots_txt', str),
|
('obey_robots_txt', bool),
|
||||||
)
|
)
|
||||||
|
|
||||||
def get(self, id=None, action=None):
|
def get(self, id=None, action=None):
|
||||||
@@ -478,20 +482,29 @@ class SpiderApi(BaseApi):
|
|||||||
}, 400
|
}, 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = requests.get(spider['start_url'], headers={
|
r = None
|
||||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
|
for url in generate_urls(spider['start_url']):
|
||||||
})
|
r = requests.get(url, headers={
|
||||||
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
|
||||||
|
})
|
||||||
|
break
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
return {
|
return {
|
||||||
'status': 'ok',
|
'status': 'ok',
|
||||||
'error': 'connection error'
|
'error': 'connection error'
|
||||||
}, 500
|
}, 500
|
||||||
|
|
||||||
if r.status_code != 200:
|
if not r:
|
||||||
return {
|
return {
|
||||||
'status': 'ok',
|
'status': 'ok',
|
||||||
'error': 'status code is not 200, but %s' % r.status_code
|
'error': 'response is not returned'
|
||||||
}
|
}, 500
|
||||||
|
|
||||||
|
if r and r.status_code != 200:
|
||||||
|
return {
|
||||||
|
'status': 'ok',
|
||||||
|
'error': 'status code is not 200, but %s' % r.status_code
|
||||||
|
}, r.status_code
|
||||||
|
|
||||||
# get html parse tree
|
# get html parse tree
|
||||||
sel = etree.HTML(r.content)
|
sel = etree.HTML(r.content)
|
||||||
@@ -502,10 +515,21 @@ class SpiderApi(BaseApi):
|
|||||||
def _get_text_child_tags(sel):
|
def _get_text_child_tags(sel):
|
||||||
tags = []
|
tags = []
|
||||||
for tag in sel.iter():
|
for tag in sel.iter():
|
||||||
if tag.text is not None:
|
if tag.text is not None and tag.text.strip() != '':
|
||||||
tags.append(tag)
|
tags.append(tag)
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_a_child_tags(sel):
|
||||||
|
tags = []
|
||||||
|
for tag in sel.iter():
|
||||||
|
if tag.tag == 'a':
|
||||||
|
if tag.get('href') is not None and not tag.get('href').startswith('#') and not tag.get(
|
||||||
|
'href').startswith('javascript'):
|
||||||
|
tags.append(tag)
|
||||||
|
|
||||||
|
return tags
|
||||||
|
|
||||||
def preview_crawl(self, id: str):
|
def preview_crawl(self, id: str):
|
||||||
spider = db_manager.get(col_name='spiders', id=id)
|
spider = db_manager.get(col_name='spiders', id=id)
|
||||||
|
|
||||||
@@ -544,6 +568,9 @@ class SpiderApi(BaseApi):
|
|||||||
if f.get('is_detail'):
|
if f.get('is_detail'):
|
||||||
url = d.get(f['name'])
|
url = d.get(f['name'])
|
||||||
if url is not None:
|
if url is not None:
|
||||||
|
if not url.startswith('http') and not url.startswith('//'):
|
||||||
|
u = urlparse(spider['start_url'])
|
||||||
|
url = f'{u.scheme}://{u.netloc}{url}'
|
||||||
ev_list.append(gevent.spawn(get_detail_page_data, url, spider, idx, data))
|
ev_list.append(gevent.spawn(get_detail_page_data, url, spider, idx, data))
|
||||||
break
|
break
|
||||||
|
|
||||||
@@ -566,7 +593,7 @@ class SpiderApi(BaseApi):
|
|||||||
sel = self._get_html(spider)
|
sel = self._get_html(spider)
|
||||||
|
|
||||||
# when error happens, return
|
# when error happens, return
|
||||||
if type(sel) == type(tuple):
|
if type(sel) == tuple:
|
||||||
return sel
|
return sel
|
||||||
|
|
||||||
list_tag_list = []
|
list_tag_list = []
|
||||||
@@ -592,15 +619,54 @@ class SpiderApi(BaseApi):
|
|||||||
|
|
||||||
# find the list tag with the most child text tags
|
# find the list tag with the most child text tags
|
||||||
_tag_list = []
|
_tag_list = []
|
||||||
_max_tag = None
|
max_tag = None
|
||||||
_max_num = 0
|
max_num = 0
|
||||||
for tag in list_tag_list:
|
for tag in list_tag_list:
|
||||||
_child_text_tags = self._get_text_child_tags(tag[0])
|
_child_text_tags = self._get_text_child_tags(tag[0])
|
||||||
if len(_child_text_tags) > _max_num:
|
if len(_child_text_tags) > max_num:
|
||||||
_max_tag = tag
|
max_tag = tag
|
||||||
_max_num = len(_child_text_tags)
|
max_num = len(_child_text_tags)
|
||||||
|
|
||||||
# TODO: extract list fields
|
# get list item selector
|
||||||
|
item_selector = None
|
||||||
|
if max_tag.get('id') is not None:
|
||||||
|
item_selector = f'#{max_tag.get("id")} > {max_tag.getchildren()[0].tag}'
|
||||||
|
elif max_tag.get('class') is not None:
|
||||||
|
if len(sel.cssselect(f'.{max_tag.get("class")}')) == 1:
|
||||||
|
item_selector = f'.{max_tag.get("class")} > {max_tag.getchildren()[0].tag}'
|
||||||
|
|
||||||
|
# get list fields
|
||||||
|
fields = []
|
||||||
|
if item_selector is not None:
|
||||||
|
for i, tag in enumerate(self._get_text_child_tags(max_tag[0])):
|
||||||
|
if tag.get('class') is not None:
|
||||||
|
cls_str = '.'.join([x for x in tag.get("class").split(' ') if x != ''])
|
||||||
|
# print(tag.tag + '.' + cls_str)
|
||||||
|
if len(tag.cssselect(f'{tag.tag}.{cls_str}')) == 1:
|
||||||
|
fields.append({
|
||||||
|
'name': f'field{i + 1}',
|
||||||
|
'type': 'css',
|
||||||
|
'extract_type': 'text',
|
||||||
|
'query': f'{tag.tag}.{cls_str}',
|
||||||
|
})
|
||||||
|
|
||||||
|
for i, tag in enumerate(self._get_a_child_tags(max_tag[0])):
|
||||||
|
# if the tag is <a...></a>, extract its href
|
||||||
|
if tag.get('class') is not None:
|
||||||
|
cls_str = '.'.join([x for x in tag.get("class").split(' ') if x != ''])
|
||||||
|
fields.append({
|
||||||
|
'name': f'field{i + 1}_url',
|
||||||
|
'type': 'css',
|
||||||
|
'extract_type': 'attribute',
|
||||||
|
'attribute': 'href',
|
||||||
|
'query': f'{tag.tag}.{cls_str}',
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'status': 'ok',
|
||||||
|
'item_selector': item_selector,
|
||||||
|
'fields': fields
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class SpiderImportApi(Resource):
|
class SpiderImportApi(Resource):
|
||||||
|
|||||||
@@ -1,10 +1,15 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import scrapy
|
import scrapy
|
||||||
|
|
||||||
from spiders.db import spider
|
from spiders.db import spider
|
||||||
from spiders.items import SpidersItem
|
from spiders.items import SpidersItem
|
||||||
|
from spiders.utils import generate_urls
|
||||||
|
|
||||||
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')))
|
||||||
|
|
||||||
|
|
||||||
def get_detail_url(item):
|
def get_detail_url(item):
|
||||||
@@ -75,8 +80,10 @@ def get_next_url(response):
|
|||||||
|
|
||||||
class ConfigSpiderSpider(scrapy.Spider):
|
class ConfigSpiderSpider(scrapy.Spider):
|
||||||
name = 'config_spider'
|
name = 'config_spider'
|
||||||
# allowed_domains = []
|
|
||||||
start_urls = [spider['start_url']]
|
def start_requests(self):
|
||||||
|
for url in generate_urls(spider['start_url']):
|
||||||
|
yield scrapy.Request(url=url)
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
|
|
||||||
@@ -91,7 +98,7 @@ class ConfigSpiderSpider(scrapy.Spider):
|
|||||||
yield scrapy.Request(url=next_url)
|
yield scrapy.Request(url=next_url)
|
||||||
|
|
||||||
elif spider['crawl_type'] == 'detail':
|
elif spider['crawl_type'] == 'detail':
|
||||||
# TODO: detail page onlny
|
# TODO: detail page only
|
||||||
# detail page only
|
# detail page only
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
@@ -121,3 +124,51 @@ def get_detail_page_data(url, spider, idx, data):
|
|||||||
# assign values
|
# assign values
|
||||||
for k, v in row.items():
|
for k, v in row.items():
|
||||||
data[idx][k] = v
|
data[idx][k] = v
|
||||||
|
|
||||||
|
|
||||||
|
def generate_urls(base_url: str) -> str:
|
||||||
|
url = base_url
|
||||||
|
|
||||||
|
# number range list
|
||||||
|
list_arr = []
|
||||||
|
for i, res in enumerate(re.findall(r'{(\d+),(\d+)}', base_url)):
|
||||||
|
try:
|
||||||
|
_min = int(res[0])
|
||||||
|
_max = int(res[1])
|
||||||
|
except ValueError as err:
|
||||||
|
raise ValueError(f'{base_url} is not a valid URL pattern')
|
||||||
|
|
||||||
|
# list
|
||||||
|
_list = range(_min, _max + 1)
|
||||||
|
|
||||||
|
# key
|
||||||
|
_key = f'n{i}'
|
||||||
|
|
||||||
|
# append list and key
|
||||||
|
list_arr.append((_list, _key))
|
||||||
|
|
||||||
|
# replace url placeholder with key
|
||||||
|
url = url.replace('{' + res[0] + ',' + res[1] + '}', '{' + _key + '}', 1)
|
||||||
|
|
||||||
|
# string list
|
||||||
|
for i, res in enumerate(re.findall(r'\[([\w\-,]+)\]', base_url)):
|
||||||
|
# list
|
||||||
|
_list = res.split(',')
|
||||||
|
|
||||||
|
# key
|
||||||
|
_key = f's{i}'
|
||||||
|
|
||||||
|
# append list and key
|
||||||
|
list_arr.append((_list, _key))
|
||||||
|
|
||||||
|
# replace url placeholder with key
|
||||||
|
url = url.replace('[' + ','.join(_list) + ']', '{' + _key + '}', 1)
|
||||||
|
|
||||||
|
# combine together
|
||||||
|
_list_arr = []
|
||||||
|
for res in itertools.product(*map(lambda x: x[0], list_arr)):
|
||||||
|
_url = url
|
||||||
|
for _arr, _rep in zip(list_arr, res):
|
||||||
|
_list, _key = _arr
|
||||||
|
_url = _url.replace('{' + _key + '}', str(_rep), 1)
|
||||||
|
yield _url
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
API_BASE_URL=http://localhost:5000/api
|
NODE_ENV='development'
|
||||||
|
VUE_APP_BASE_URL=http://localhost:8000/api
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
API_BASE_URL=http://139.129.230.98:8000/api
|
NODE_ENV='production'
|
||||||
|
VUE_APP_BASE_URL=http://crawlab.cn:8000/api
|
||||||
|
|||||||
@@ -13,5 +13,8 @@ module.exports = {
|
|||||||
},
|
},
|
||||||
parserOptions: {
|
parserOptions: {
|
||||||
parser: 'babel-eslint'
|
parser: 'babel-eslint'
|
||||||
|
},
|
||||||
|
globals: {
|
||||||
|
'_hmt': 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
|
||||||
<meta name="renderer" content="webkit">
|
<meta name="renderer" content="webkit">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
|
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
|
||||||
|
<script>
|
||||||
|
|
||||||
|
</script>
|
||||||
<title>Crawlab</title>
|
<title>Crawlab</title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<!--<script src=<%= BASE_URL %>/tinymce4.7.5/tinymce.min.js></script>-->
|
<!--<script src=<%= BASE_URL %>/tinymce4.7.5/tinymce.min.js></script>-->
|
||||||
<div id="app"></div>
|
<div id="app"></div>
|
||||||
<!-- built files will be auto injected -->
|
<!-- built files will be auto injected -->
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "crawlab",
|
"name": "crawlab",
|
||||||
"version": "0.2.0",
|
"version": "0.2.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"serve": "cross-env NODE_ENV=development vue-cli-service serve --ip=0.0.0.0",
|
"serve": "cross-env NODE_ENV=development vue-cli-service serve --ip=0.0.0.0",
|
||||||
"serve-prod": "cross-env NODE_ENV=production vue-cli-service serve --mode=production --ip=0.0.0.0",
|
"serve:prod": "cross-env NODE_ENV=production vue-cli-service serve --mode=production --ip=0.0.0.0",
|
||||||
"config": "vue ui",
|
"config": "vue ui",
|
||||||
"build": "vue-cli-service build",
|
"build:prod": "vue-cli-service build --mode production",
|
||||||
"lint": "vue-cli-service lint",
|
"lint": "vue-cli-service lint",
|
||||||
"test:unit": "vue-cli-service test:unit"
|
"test:unit": "vue-cli-service test:unit"
|
||||||
},
|
},
|
||||||
@@ -23,6 +23,7 @@
|
|||||||
"nprogress": "0.2.0",
|
"nprogress": "0.2.0",
|
||||||
"path": "^0.12.7",
|
"path": "^0.12.7",
|
||||||
"vue": "^2.5.22",
|
"vue": "^2.5.22",
|
||||||
|
"vue-ba": "^1.2.5",
|
||||||
"vue-codemirror-lite": "^1.0.4",
|
"vue-codemirror-lite": "^1.0.4",
|
||||||
"vue-i18n": "^8.9.0",
|
"vue-i18n": "^8.9.0",
|
||||||
"vue-router": "^3.0.1",
|
"vue-router": "^3.0.1",
|
||||||
|
|||||||
@@ -10,8 +10,45 @@ import DialogView from './components/Common/DialogView'
|
|||||||
|
|
||||||
export default {
|
export default {
|
||||||
name: 'App',
|
name: 'App',
|
||||||
|
data () {
|
||||||
|
return {
|
||||||
|
msgPopup: undefined
|
||||||
|
}
|
||||||
|
},
|
||||||
components: {
|
components: {
|
||||||
DialogView
|
DialogView
|
||||||
|
},
|
||||||
|
computed: {
|
||||||
|
useStats () {
|
||||||
|
return localStorage.getItem('useStats')
|
||||||
|
}
|
||||||
|
},
|
||||||
|
methods: {},
|
||||||
|
mounted () {
|
||||||
|
window.setUseStats = (value) => {
|
||||||
|
localStorage.setItem('useStats', value)
|
||||||
|
document.querySelector('.el-message__closeBtn').click()
|
||||||
|
if (value === 1) {
|
||||||
|
_hmt.push(['_trackPageview', '/allow_stats'])
|
||||||
|
} else {
|
||||||
|
_hmt.push(['_trackPageview', '/disallow_stats'])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// first-time user
|
||||||
|
if (this.useStats === undefined || this.useStats === null) {
|
||||||
|
this.$message({
|
||||||
|
type: 'info',
|
||||||
|
dangerouslyUseHTMLString: true,
|
||||||
|
showClose: true,
|
||||||
|
duration: 0,
|
||||||
|
message: this.$t('<p>Do you allow us to collect some statistics to improve Crawlab?</p>' +
|
||||||
|
'<div style="text-align: center;margin-top: 10px;">' +
|
||||||
|
'<button class="message-btn" onclick="setUseStats(1)">' + this.$t('Yes') + '</button>' +
|
||||||
|
'<button class="message-btn" onclick="setUseStats(0)">' + this.$t('No') + '</button>' +
|
||||||
|
'</div>')
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
@@ -52,4 +89,31 @@ export default {
|
|||||||
.el-form .el-form-item {
|
.el-form .el-form-item {
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.message-btn {
|
||||||
|
margin: 0 5px;
|
||||||
|
padding: 5px 10px;
|
||||||
|
background: transparent;
|
||||||
|
color: #909399;
|
||||||
|
font-size: 12px;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-btn:hover {
|
||||||
|
opacity: 0.8;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-btn.success {
|
||||||
|
background: #67c23a;
|
||||||
|
border-color: #67c23a;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-btn.danger {
|
||||||
|
background: #f56c6c;
|
||||||
|
border-color: #f56c6c;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@@ -1,15 +1,12 @@
|
|||||||
import axios from 'axios'
|
import axios from 'axios'
|
||||||
|
|
||||||
let baseUrl = 'http://localhost:8000/api'
|
let baseUrl = process.env.VUE_APP_API_BASE_URL ? process.env.VUE_APP_API_BASE_URL : 'http://localhost:8000/api'
|
||||||
if (process.env.NODE_ENV === 'production') {
|
|
||||||
baseUrl = 'http://139.129.230.98:8000/api'
|
|
||||||
}
|
|
||||||
// console.log(process.env)
|
// console.log(process.env)
|
||||||
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
|
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
|
||||||
|
|
||||||
const request = (method, path, params, data) => {
|
const request = (method, path, params, data) => {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const url = `${baseUrl}${path}`
|
const url = baseUrl + path
|
||||||
axios({
|
axios({
|
||||||
method,
|
method,
|
||||||
url,
|
url,
|
||||||
|
|||||||
@@ -34,12 +34,15 @@
|
|||||||
</el-button>
|
</el-button>
|
||||||
</el-button-group>
|
</el-button-group>
|
||||||
</el-form-item>
|
</el-form-item>
|
||||||
<el-form-item :label="$t('Start URL')">
|
<el-form-item :label="$t('Start URL')" required>
|
||||||
<el-input v-model="spiderForm.start_url" :placeholder="$t('Start URL')"></el-input>
|
<el-input v-model="spiderForm.start_url" :placeholder="$t('Start URL')"></el-input>
|
||||||
</el-form-item>
|
</el-form-item>
|
||||||
<el-form-item :label="$t('Obey robots.txt')">
|
<el-form-item :label="$t('Obey robots.txt')">
|
||||||
<el-switch v-model="spiderForm.obey_robots_txt" :placeholder="$t('Obey robots.txt')"></el-switch>
|
<el-switch v-model="spiderForm.obey_robots_txt" :placeholder="$t('Obey robots.txt')"></el-switch>
|
||||||
</el-form-item>
|
</el-form-item>
|
||||||
|
<!--<el-form-item :label="$t('URL Pattern')">-->
|
||||||
|
<!--<el-input v-model="spiderForm.url_pattern" :placeholder="$t('URL Pattern')"></el-input>-->
|
||||||
|
<!--</el-form-item>-->
|
||||||
</el-form>
|
</el-form>
|
||||||
</el-col>
|
</el-col>
|
||||||
<el-col :span="11" :offset="1">
|
<el-col :span="11" :offset="1">
|
||||||
@@ -79,7 +82,8 @@
|
|||||||
<el-row class="button-group-container">
|
<el-row class="button-group-container">
|
||||||
<div class="button-group">
|
<div class="button-group">
|
||||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||||
<el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">{{$t('Extract Fields')}}</el-button>
|
<el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">{{$t('Extract Fields')}}
|
||||||
|
</el-button>
|
||||||
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
||||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||||
</div>
|
</div>
|
||||||
@@ -214,6 +218,24 @@ export default {
|
|||||||
})
|
})
|
||||||
},
|
},
|
||||||
onExtractFields () {
|
onExtractFields () {
|
||||||
|
this.onSave()
|
||||||
|
.then(() => {
|
||||||
|
this.extractFieldsLoading = true
|
||||||
|
this.$store.dispatch('spider/extractFields')
|
||||||
|
.then(response => {
|
||||||
|
if (response.data.item_selector) {
|
||||||
|
this.$set(this.spiderForm, 'item_selector', response.data.item_selector)
|
||||||
|
this.$set(this.spiderForm, 'item_selector_type', 'css')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.data.fields && response.data.fields.length) {
|
||||||
|
this.spiderForm.fields = response.data.fields
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
this.extractFieldsLoading = false
|
||||||
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
created () {
|
created () {
|
||||||
@@ -245,7 +267,7 @@ export default {
|
|||||||
if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
|
if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
|
||||||
if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
|
if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
|
||||||
if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css')
|
if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css')
|
||||||
if (!this.spiderForm.obey_robots_txt) this.$set(this.spiderForm, 'obey_robots_txt', true)
|
if (this.spiderForm.obey_robots_txt === undefined) this.$set(this.spiderForm, 'obey_robots_txt', true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import 'font-awesome/scss/font-awesome.scss'// FontAwesome
|
|||||||
|
|
||||||
import 'codemirror/lib/codemirror.css'
|
import 'codemirror/lib/codemirror.css'
|
||||||
|
|
||||||
|
// import ba from 'vue-ba'
|
||||||
|
|
||||||
import App from './App'
|
import App from './App'
|
||||||
import store from './store'
|
import store from './store'
|
||||||
import router from './router'
|
import router from './router'
|
||||||
@@ -24,8 +26,23 @@ import i18n from './i18n'
|
|||||||
|
|
||||||
Vue.use(ElementUI, { locale })
|
Vue.use(ElementUI, { locale })
|
||||||
|
|
||||||
|
// Vue.use(ba, 'c35e3a563a06caee2524902c81975add')
|
||||||
|
// Vue.use(ba, {
|
||||||
|
// siteId: 'c35e3a563a06caee2524902c81975add'
|
||||||
|
// })
|
||||||
|
|
||||||
Vue.config.productionTip = false
|
Vue.config.productionTip = false
|
||||||
|
|
||||||
|
// 百度统计
|
||||||
|
window._hmt = window._hmt || [];
|
||||||
|
(function () {
|
||||||
|
let hm = document.createElement('script')
|
||||||
|
hm.src = 'https://hm.baidu.com/hm.js?c35e3a563a06caee2524902c81975add'
|
||||||
|
let s = document.getElementsByTagName('script')[0]
|
||||||
|
s.parentNode.insertBefore(hm, s)
|
||||||
|
})()
|
||||||
|
|
||||||
|
// inject request api
|
||||||
Vue.prototype.$request = request
|
Vue.prototype.$request = request
|
||||||
|
|
||||||
const app = new Vue({
|
const app = new Vue({
|
||||||
|
|||||||
@@ -222,4 +222,12 @@ router.beforeEach((to, from, next) => {
|
|||||||
next()
|
next()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
router.afterEach((to, from, next) => {
|
||||||
|
if (to.path) {
|
||||||
|
if (localStorage.getItem('useStats') !== '0') {
|
||||||
|
window._hmt.push(['_trackPageview', to.path])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
export default router
|
export default router
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import file from './modules/file'
|
|||||||
import schedule from './modules/schedule'
|
import schedule from './modules/schedule'
|
||||||
import lang from './modules/lang'
|
import lang from './modules/lang'
|
||||||
import site from './modules/site'
|
import site from './modules/site'
|
||||||
|
import stats from './modules/stats'
|
||||||
import getters from './getters'
|
import getters from './getters'
|
||||||
|
|
||||||
Vue.use(Vuex)
|
Vue.use(Vuex)
|
||||||
@@ -29,7 +30,9 @@ const store = new Vuex.Store({
|
|||||||
file,
|
file,
|
||||||
schedule,
|
schedule,
|
||||||
lang,
|
lang,
|
||||||
site
|
site,
|
||||||
|
// 百度统计
|
||||||
|
stats
|
||||||
},
|
},
|
||||||
getters
|
getters
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ const actions = {
|
|||||||
// configurable spider
|
// configurable spider
|
||||||
crawl_type: state.spiderForm.crawl_type,
|
crawl_type: state.spiderForm.crawl_type,
|
||||||
start_url: state.spiderForm.start_url,
|
start_url: state.spiderForm.start_url,
|
||||||
|
url_pattern: state.spiderForm.url_pattern,
|
||||||
item_selector: state.spiderForm.item_selector,
|
item_selector: state.spiderForm.item_selector,
|
||||||
item_selector_type: state.spiderForm.item_selector_type,
|
item_selector_type: state.spiderForm.item_selector_type,
|
||||||
pagination_selector: state.spiderForm.pagination_selector,
|
pagination_selector: state.spiderForm.pagination_selector,
|
||||||
@@ -207,6 +208,9 @@ const actions = {
|
|||||||
.then(response => {
|
.then(response => {
|
||||||
commit('SET_PREVIEW_CRAWL_DATA', response.data.items)
|
commit('SET_PREVIEW_CRAWL_DATA', response.data.items)
|
||||||
})
|
})
|
||||||
|
},
|
||||||
|
extractFields ({ state, commit }) {
|
||||||
|
return request.post(`/spiders/${state.spiderForm._id}/extract_fields`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
14
frontend/src/store/modules/stats.js
Normal file
14
frontend/src/store/modules/stats.js
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
const state = {}
|
||||||
|
const getters = {
|
||||||
|
useStats () {
|
||||||
|
return localStorage.getItem('useStats')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const mutations = {}
|
||||||
|
const actions = {}
|
||||||
|
export default {
|
||||||
|
state,
|
||||||
|
getters,
|
||||||
|
mutations,
|
||||||
|
actions
|
||||||
|
}
|
||||||
@@ -85,6 +85,9 @@ export default {
|
|||||||
this.dailyTasks = response.data.daily_tasks
|
this.dailyTasks = response.data.daily_tasks
|
||||||
this.initEchartsDailyTasks()
|
this.initEchartsDailyTasks()
|
||||||
})
|
})
|
||||||
|
},
|
||||||
|
mounted () {
|
||||||
|
// this.$ba.trackPageview('/')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -2,15 +2,15 @@
|
|||||||
<div class="tags-view-container">
|
<div class="tags-view-container">
|
||||||
<scroll-pane ref="scrollPane" class="tags-view-wrapper">
|
<scroll-pane ref="scrollPane" class="tags-view-wrapper">
|
||||||
<router-link
|
<router-link
|
||||||
v-for="tag in visitedViews"
|
v-for="tag in visitedViews"
|
||||||
ref="tag"
|
ref="tag"
|
||||||
:class="isActive(tag)?'active':''"
|
:class="isActive(tag)?'active':''"
|
||||||
:to="{ path: tag.path, query: tag.query, fullPath: tag.fullPath }"
|
:to="{ path: tag.path, query: tag.query, fullPath: tag.fullPath }"
|
||||||
:key="tag.path"
|
:key="tag.path"
|
||||||
tag="span"
|
tag="span"
|
||||||
class="tags-view-item"
|
class="tags-view-item"
|
||||||
@click.middle.native="closeSelectedTag(tag)"
|
@click.middle.native="closeSelectedTag(tag)"
|
||||||
@contextmenu.prevent.native="openMenu(tag,$event)">
|
@contextmenu.prevent.native="openMenu(tag,$event)">
|
||||||
{{ $t(generateTitle(tag.title)) }}
|
{{ $t(generateTitle(tag.title)) }}
|
||||||
<span v-if="!tag.meta.affix" class="el-icon-close" @click.prevent.stop="closeSelectedTag(tag)"/>
|
<span v-if="!tag.meta.affix" class="el-icon-close" @click.prevent.stop="closeSelectedTag(tag)"/>
|
||||||
</router-link>
|
</router-link>
|
||||||
@@ -47,7 +47,7 @@ export default {
|
|||||||
return this.$store.state.tagsView.visitedViews
|
return this.$store.state.tagsView.visitedViews
|
||||||
},
|
},
|
||||||
routers () {
|
routers () {
|
||||||
return this.$store.state.permission.routers
|
return this.$store.state.permission ? this.$store.state.permission.routers : []
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
watch: {
|
watch: {
|
||||||
|
|||||||
@@ -8401,6 +8401,14 @@ vm-browserify@0.0.4:
|
|||||||
dependencies:
|
dependencies:
|
||||||
indexof "0.0.1"
|
indexof "0.0.1"
|
||||||
|
|
||||||
|
vue-ba@^1.2.5:
|
||||||
|
version "1.2.5"
|
||||||
|
resolved "https://registry.npm.taobao.org/vue-ba/download/vue-ba-1.2.5.tgz#fef30732ee749a65a81a4f47113527ee41fda64b"
|
||||||
|
integrity sha1-/vMHMu50mmWoGk9HETUn7kH9pks=
|
||||||
|
dependencies:
|
||||||
|
deep-equal "^1.0.1"
|
||||||
|
vue "^2.3.3"
|
||||||
|
|
||||||
vue-codemirror-lite@^1.0.4:
|
vue-codemirror-lite@^1.0.4:
|
||||||
version "1.0.4"
|
version "1.0.4"
|
||||||
resolved "http://registry.npm.taobao.org/vue-codemirror-lite/download/vue-codemirror-lite-1.0.4.tgz#48a5cd7d17c0914503c8cd9d9b56b438e49c3410"
|
resolved "http://registry.npm.taobao.org/vue-codemirror-lite/download/vue-codemirror-lite-1.0.4.tgz#48a5cd7d17c0914503c8cd9d9b56b438e49c3410"
|
||||||
@@ -8485,6 +8493,11 @@ vue-template-es2015-compiler@^1.6.0, vue-template-es2015-compiler@^1.8.2:
|
|||||||
version "1.8.2"
|
version "1.8.2"
|
||||||
resolved "http://registry.npm.taobao.org/vue-template-es2015-compiler/download/vue-template-es2015-compiler-1.8.2.tgz#dd73e80ba58bb65dd7a8aa2aeef6089cf6116f2a"
|
resolved "http://registry.npm.taobao.org/vue-template-es2015-compiler/download/vue-template-es2015-compiler-1.8.2.tgz#dd73e80ba58bb65dd7a8aa2aeef6089cf6116f2a"
|
||||||
|
|
||||||
|
vue@^2.3.3:
|
||||||
|
version "2.6.10"
|
||||||
|
resolved "https://registry.npm.taobao.org/vue/download/vue-2.6.10.tgz#a72b1a42a4d82a721ea438d1b6bf55e66195c637"
|
||||||
|
integrity sha1-pysaQqTYKnIepDjRtr9V5mGVxjc=
|
||||||
|
|
||||||
vue@^2.5.17, vue@^2.5.22:
|
vue@^2.5.17, vue@^2.5.22:
|
||||||
version "2.6.6"
|
version "2.6.6"
|
||||||
resolved "https://registry.yarnpkg.com/vue/-/vue-2.6.6.tgz#dde41e483c11c46a7bf523909f4f2f816ab60d25"
|
resolved "https://registry.yarnpkg.com/vue/-/vue-2.6.6.tgz#dde41e483c11c46a7bf523909f4f2f816ab60d25"
|
||||||
|
|||||||
Reference in New Issue
Block a user