Merge branch 'develop' of https://github.com/casperwnb/crawlab into develop

This commit is contained in:
casperwnb
2019-04-19 12:11:25 +08:00
73 changed files with 18717 additions and 695 deletions

View File

@@ -62,6 +62,10 @@ npm run serve
![spider-list](./docs/img/screenshot-task-detail-results.png)
## 使用流程
![user-process](./docs/img/用户使用流程图.png)
## 架构
Crawlab的架构跟Celery非常相似但是加入了包括前端、爬虫、Flower在内的额外模块以支持爬虫管理的功能。

View File

@@ -8,7 +8,8 @@ from celery import Celery
from flask import Flask
from flask_cors import CORS
from flask_restful import Api
# from flask_restplus import Api
from utils.log import other
from constants.node import NodeStatus
from db.manager import db_manager
from routes.schedules import ScheduleApi
@@ -78,7 +79,7 @@ def monitor_nodes_status(celery_app):
})
def update_nodes_status_online(event):
print(event)
other.info(f"{event}")
with celery_app.connection() as connection:
recv = celery_app.events.Receiver(connection, handlers={

View File

@@ -6,11 +6,11 @@ import subprocess
file_dir = os.path.dirname(os.path.realpath(__file__))
root_path = os.path.abspath(os.path.join(file_dir, '..'))
sys.path.append(root_path)
from utils.log import other
from config import BROKER_URL
if __name__ == '__main__':
p = subprocess.Popen(['celery', 'flower', '-b', BROKER_URL], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in iter(p.stdout.readline, 'b'):
if line.decode('utf-8') != '':
print(line.decode('utf-8'))
other.info(line.decode('utf-8'))

View File

@@ -1,12 +1,17 @@
# project variables
PROJECT_SOURCE_FILE_FOLDER = '/Users/yeqing/projects/crawlab/spiders'
PROJECT_DEPLOY_FILE_FOLDER = '/var/crawlab'
PROJECT_LOGS_FOLDER = '/var/logs/crawlab'
# 爬虫源码路径
PROJECT_SOURCE_FILE_FOLDER = '../spiders'
# 配置python虚拟环境的路径
PYTHON_ENV_PATH = '/Users/chennan/Desktop/2019/env/bin/python'
# 爬虫部署路径
PROJECT_DEPLOY_FILE_FOLDER = '../deployfile'
PROJECT_LOGS_FOLDER = '../deployfile/logs'
PROJECT_TMP_FOLDER = '/tmp'
# celery variables
BROKER_URL = 'redis://192.168.99.100:6379/0'
CELERY_RESULT_BACKEND = 'mongodb://192.168.99.100:27017/'
BROKER_URL = 'redis://127.0.0.1:6379/0'
CELERY_RESULT_BACKEND = 'mongodb://127.0.0.1:27017/'
CELERY_MONGODB_BACKEND_SETTINGS = {
'database': 'crawlab_test',
'taskmeta_collection': 'tasks_celery',
@@ -18,7 +23,7 @@ CELERY_ENABLE_UTC = True
FLOWER_API_ENDPOINT = 'http://localhost:5555/api'
# database variables
MONGO_HOST = '192.168.99.100'
MONGO_HOST = '127.0.0.1'
MONGO_PORT = 27017
MONGO_DB = 'crawlab_test'

View File

@@ -24,7 +24,7 @@ from routes.spiders import SpiderApi, SpiderImportApi, SpiderManageApi
from routes.stats import StatsApi
from routes.tasks import TaskApi
from tasks.celery import celery_app
from utils.log import other
# flask app instance
app = Flask(__name__)
app.config.from_object('config')
@@ -81,7 +81,7 @@ def run_flower():
p = subprocess.Popen(['celery', 'flower', '-b', BROKER_URL], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in iter(p.stdout.readline, 'b'):
if line.decode('utf-8') != '':
print(line.decode('utf-8'))
other.info(line.decode('utf-8'))
def run_worker():

35
crawlab/requirements.txt Normal file
View File

@@ -0,0 +1,35 @@
amqp==2.4.2
aniso8601==6.0.0
APScheduler==3.6.0
attrs==19.1.0
Babel==2.6.0
billiard==3.6.0.0
celery==4.3.0
certifi==2019.3.9
chardet==3.0.4
Click==7.0
coloredlogs==10.0
Flask==1.0.2
Flask-Cors==3.0.7
Flask-RESTful==0.3.7
flask-restplus==0.12.1
flower==0.9.3
humanfriendly==4.18
idna==2.8
itsdangerous==1.1.0
Jinja2==2.10
jsonschema==3.0.1
kombu==4.5.0
MarkupSafe==1.1.1
mongoengine==0.17.0
pymongo==3.7.2
pyrsistent==0.14.11
pytz==2018.9
redis==3.2.1
requests==2.21.0
six==1.12.0
tornado==5.1.1
tzlocal==1.5.1
urllib3==1.24.1
vine==1.3.0
Werkzeug==0.15.2

View File

@@ -1,4 +1,5 @@
from flask_restful import reqparse, Resource
# from flask_restplus import reqparse, Resource
from db.manager import db_manager
from utils import jsonify
@@ -25,6 +26,8 @@ class BaseApi(Resource):
self.parser.add_argument(arg, type=type)
def get(self, id=None, action=None):
import pdb
pdb.set_trace()
args = self.parser.parse_args()
# action by id

View File

@@ -8,6 +8,7 @@ from db.manager import db_manager
from routes.base import BaseApi
from utils import jsonify
from utils.spider import get_spider_col_fields
from utils.log import other
class TaskApi(BaseApi):
@@ -27,6 +28,7 @@ class TaskApi(BaseApi):
'code': 400,
'error': 'action "%s" invalid' % action
}, 400
other.info(f"到这了{action},{id}")
return getattr(self, action)(id)
elif id is not None:

View File

@@ -20,6 +20,7 @@ class Scheduler(object):
scheduler = BackgroundScheduler(jobstores=jobstores)
def execute_spider(self, id: str):
r = requests.get('http://%s:%s/api/spiders/%s/on_crawl' % (
FLASK_HOST,
FLASK_PORT,

View File

@@ -2,15 +2,12 @@ import os
from datetime import datetime
from bson import ObjectId
from celery.utils.log import get_logger
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER
from config import PROJECT_DEPLOY_FILE_FOLDER, PROJECT_LOGS_FOLDER,PYTHON_ENV_PATH
from constants.task import TaskStatus
from db.manager import db_manager
from .celery import celery_app
import subprocess
logger = get_logger(__name__)
from utils.log import other as logger
@celery_app.task(bind=True)
@@ -19,6 +16,8 @@ def execute_spider(self, id: str):
hostname = self.request.hostname
spider = db_manager.get('spiders', id=id)
command = spider.get('cmd')
if command.startswith("env"):
command = PYTHON_ENV_PATH + command.replace("env","")
current_working_directory = os.path.join(PROJECT_DEPLOY_FILE_FOLDER, str(spider.get('_id')))

View File

@@ -1,5 +1,5 @@
import os, zipfile
from utils.log import other
# 打包目录为zip文件未压缩
def zip_file(source_dir, output_filename):
@@ -20,4 +20,4 @@ def unzip_file(zip_src, dst_dir):
for file in fz.namelist():
fz.extract(file, dst_dir)
else:
print('This is not zip')
other.info('This is not zip')

75
crawlab/utils/log.py Normal file
View File

@@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
# @Time : 2019-01-28 15:37
# @Author : cxa
# @File : log.py
# @Software: PyCharm
import os
import logging
import logging.config as log_conf
import datetime
import coloredlogs
log_dir = os.path.dirname(os.path.dirname(__file__)) + '/logs'
if not os.path.exists(log_dir):
os.mkdir(log_dir)
today = datetime.datetime.now().strftime("%Y%m%d")
log_path = os.path.join(log_dir, f'app_{today}.log')
log_config = {
'version': 1.0,
'formatters': {
'colored_console': {'()': 'coloredlogs.ColoredFormatter',
'format': "%(asctime)s - %(name)s - %(levelname)s - %(message)s", 'datefmt': '%H:%M:%S'},
'detail': {
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
'datefmt': "%Y-%m-%d %H:%M:%S" # 如果不加这个会显示到毫秒。
},
'simple': {
'format': '%(name)s - %(levelname)s - %(message)s',
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler', # 日志打印到屏幕显示的类。
'level': 'INFO',
'formatter': 'colored_console'
},
'file': {
'class': 'logging.handlers.RotatingFileHandler', # 日志打印到文件的类。
'maxBytes': 1024 * 1024 * 1024, # 单个文件最大内存
'backupCount': 1, # 备份的文件个数
'filename': log_path, # 日志文件名
'level': 'INFO', # 日志等级
'formatter': 'detail', # 调用上面的哪个格式
'encoding': 'utf-8', # 编码
},
},
'loggers': {
'crawler': {
'handlers': ['console', 'file'], # 只打印屏幕
'level': 'DEBUG', # 只显示错误的log
},
'parser': {
'handlers': ['file'],
'level': 'INFO',
},
'other': {
'handlers': ['console', 'file'],
'level': 'INFO',
},
'storage': {
'handlers': ['console', 'file'],
'level': 'INFO',
}
}
}
log_conf.dictConfig(log_config)
crawler = logging.getLogger('crawler')
storage = logging.getLogger('storage')
other = logging.getLogger('storage')
coloredlogs.install(level='DEBUG', logger=crawler)
coloredlogs.install(level='DEBUG', logger=storage)
coloredlogs.install(level='DEBUG', logger=other)

View File

@@ -3,19 +3,22 @@ import os
from constants.spider import FILE_SUFFIX_LANG_MAPPING, LangType, SUFFIX_IGNORE, SpiderType
from db.manager import db_manager
def get_lang_by_stats(stats: dict) -> LangType:
"""
:param stats: stats is generated by utils.file.get_file_suffix_stats
:return:
"""
data = stats.items()
data = sorted(data, key=lambda item: item[1])
data = list(filter(lambda item: item[0] not in SUFFIX_IGNORE, data))
top_suffix = data[-1][0]
if FILE_SUFFIX_LANG_MAPPING.get(top_suffix) is not None:
return FILE_SUFFIX_LANG_MAPPING.get(top_suffix)
return LangType.OTHER
try:
data = stats.items()
data = sorted(data, key=lambda item: item[1])
data = list(filter(lambda item: item[0] not in SUFFIX_IGNORE, data))
top_suffix = data[-1][0]
if FILE_SUFFIX_LANG_MAPPING.get(top_suffix) is not None:
return FILE_SUFFIX_LANG_MAPPING.get(top_suffix)
return LangType.OTHER
except IndexError as e:
pass
def get_spider_type(path: str) -> SpiderType:

View File

@@ -0,0 +1,82 @@
const puppeteer = require('puppeteer');
const MongoClient = require('mongodb').MongoClient;
(async () => {
// browser
const browser = await (puppeteer.launch({
headless: true
}));
// define start url
const url = 'https://juejin.im';
// start a new page
const page = await browser.newPage();
// navigate to url
try {
await page.goto(url, {waitUntil: 'domcontentloaded'});
await page.waitFor(2000);
} catch (e) {
console.error(e);
// close browser
browser.close();
// exit code 1 indicating an error happened
code = 1;
process.emit("exit ");
process.reallyExit(code);
return
}
// scroll down to fetch more data
for (let i = 0; i < 100; i++) {
console.log('Pressing PageDown...');
await page.keyboard.press('PageDown', 200);
await page.waitFor(100);
}
// scrape data
const results = await page.evaluate(() => {
let results = [];
document.querySelectorAll('.entry-list > .item').forEach(el => {
if (!el.querySelector('.title')) return;
results.push({
url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'),
title: el.querySelector('.title').innerText
});
});
return results;
});
// open database connection
const client = await MongoClient.connect('mongodb://192.168.99.100:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;
const col = db.collection(colName);
// save to database
for (let i = 0; i < results.length; i++) {
// de-duplication
const r = await col.findOne({url: results[i]});
if (r) continue;
// assign taskID
results[i].task_id = taskId;
results[i].source = 'juejin';
// insert row
await col.insertOne(results[i]);
}
console.log(`results.length: ${results.length}`);
// close database connection
client.close();
// shutdown browser
browser.close();
})();

View File

@@ -0,0 +1,82 @@
const puppeteer = require('puppeteer');
const MongoClient = require('mongodb').MongoClient;
(async () => {
// browser
const browser = await (puppeteer.launch({
headless: true
}));
// define start url
const url = 'https://juejin.im';
// start a new page
const page = await browser.newPage();
// navigate to url
try {
await page.goto(url, {waitUntil: 'domcontentloaded'});
await page.waitFor(2000);
} catch (e) {
console.error(e);
// close browser
browser.close();
// exit code 1 indicating an error happened
code = 1;
process.emit("exit ");
process.reallyExit(code);
return
}
// scroll down to fetch more data
for (let i = 0; i < 100; i++) {
console.log('Pressing PageDown...');
await page.keyboard.press('PageDown', 200);
await page.waitFor(100);
}
// scrape data
const results = await page.evaluate(() => {
let results = [];
document.querySelectorAll('.entry-list > .item').forEach(el => {
if (!el.querySelector('.title')) return;
results.push({
url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'),
title: el.querySelector('.title').innerText
});
});
return results;
});
// open database connection
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;
const col = db.collection(colName);
// save to database
for (let i = 0; i < results.length; i++) {
// de-duplication
const r = await col.findOne({url: results[i]});
if (r) continue;
// assign taskID
results[i].task_id = taskId;
results[i].source = 'juejin';
// insert row
await col.insertOne(results[i]);
}
console.log(`results.length: ${results.length}`);
// close database connection
client.close();
// shutdown browser
browser.close();
})();

View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class JuejinItem(scrapy.Item):
# define the fields for your item here like:
_id = scrapy.Field()
title = scrapy.Field()
link = scrapy.Field()
like = scrapy.Field()
task_id = scrapy.Field()

View File

@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
# Define here the models for your spider middleware
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/spider-middleware.html
from scrapy import signals
class JuejinSpiderMiddleware(object):
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the spider middleware does not modify the
# passed objects.
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_spider_input(self, response, spider):
# Called for each response that goes through the spider
# middleware and into the spider.
# Should return None or raise an exception.
return None
def process_spider_output(self, response, result, spider):
# Called with the results returned from the Spider, after
# it has processed the response.
# Must return an iterable of Request, dict or Item objects.
for i in result:
yield i
def process_spider_exception(self, response, exception, spider):
# Called when a spider or process_spider_input() method
# (from other spider middleware) raises an exception.
# Should return either None or an iterable of Response, dict
# or Item objects.
pass
def process_start_requests(self, start_requests, spider):
# Called with the start requests of the spider, and works
# similarly to the process_spider_output() method, except
# that it doesnt have a response associated.
# Must return only requests (not items).
for r in start_requests:
yield r
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)

View File

@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os
from pymongo import MongoClient
MONGO_HOST = '127.0.0.1'
MONGO_PORT = 27017
MONGO_DB = 'crawlab_test'
class JuejinPipeline(object):
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
db = mongo[MONGO_DB]
col_name = os.environ.get('CRAWLAB_COLLECTION','test')
col = db[col_name]
def process_item(self, item, spider):
item['task_id'] = os.environ.get('CRAWLAB_TASK_ID')
self.col.save(item)
return item

View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
# Scrapy settings for juejin project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'juejin'
SPIDER_MODULES = ['juejin.spiders']
NEWSPIDER_MODULE = 'juejin.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
# Obey robots.txt rules
ROBOTSTXT_OBEY = True
# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
# COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False
# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
# }
# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
# 'juejin.middlewares.JuejinSpiderMiddleware': 543,
# }
# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# DOWNLOADER_MIDDLEWARES = {
# 'juejin.middlewares.MyCustomDownloaderMiddleware': 543,
# }
# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
# EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
# }
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'juejin.pipelines.JuejinPipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

View File

@@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.

View File

@@ -0,0 +1,4 @@
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.

View File

@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
import scrapy
from juejin.items import JuejinItem
class JuejinSpiderSpider(scrapy.Spider):
name = 'juejin_spider'
allowed_domains = ['juejin.com']
start_urls = ['https://juejin.im/search?query=celery']
def parse(self, response):
for item in response.css('ul.main-list > li.item'):
yield JuejinItem(
title=item.css('.title span').extract_first(),
link=item.css('a::attr("href")').extract_first(),
like=item.css('.like .count::text').extract_first(),
)

View File

@@ -0,0 +1,11 @@
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.org/en/latest/deploy.html
[settings]
default = juejin.settings
[deploy]
#url = http://localhost:6800/
project = juejin

View File

@@ -0,0 +1,2 @@
from scrapy import cmdline
cmdline.execute(["scrapy","crawl","juejin_spider"])

View File

@@ -0,0 +1,82 @@
const puppeteer = require('puppeteer');
const MongoClient = require('mongodb').MongoClient;
(async () => {
// browser
const browser = await (puppeteer.launch({
headless: true
}));
// define start url
const url = 'https://juejin.im';
// start a new page
const page = await browser.newPage();
// navigate to url
try {
await page.goto(url, {waitUntil: 'domcontentloaded'});
await page.waitFor(2000);
} catch (e) {
console.error(e);
// close browser
browser.close();
// exit code 1 indicating an error happened
code = 1;
process.emit("exit ");
process.reallyExit(code);
return
}
// scroll down to fetch more data
for (let i = 0; i < 100; i++) {
console.log('Pressing PageDown...');
await page.keyboard.press('PageDown', 200);
await page.waitFor(100);
}
// scrape data
const results = await page.evaluate(() => {
let results = [];
document.querySelectorAll('.entry-list > .item').forEach(el => {
if (!el.querySelector('.title')) return;
results.push({
url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'),
title: el.querySelector('.title').innerText
});
});
return results;
});
// open database connection
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;
const col = db.collection(colName);
// save to database
for (let i = 0; i < results.length; i++) {
// de-duplication
const r = await col.findOne({url: results[i]});
if (r) continue;
// assign taskID
results[i].task_id = taskId;
results[i].source = 'juejin';
// insert row
await col.insertOne(results[i]);
}
console.log(`results.length: ${results.length}`);
// close database connection
client.close();
// shutdown browser
browser.close();
})();

View File

@@ -0,0 +1,82 @@
const puppeteer = require('puppeteer');
const MongoClient = require('mongodb').MongoClient;
(async () => {
// browser
const browser = await (puppeteer.launch({
headless: true
}));
// define start url
const url = 'https://juejin.im';
// start a new page
const page = await browser.newPage();
// navigate to url
try {
await page.goto(url, {waitUntil: 'domcontentloaded'});
await page.waitFor(2000);
} catch (e) {
console.error(e);
// close browser
browser.close();
// exit code 1 indicating an error happened
code = 1;
process.emit("exit ");
process.reallyExit(code);
return
}
// scroll down to fetch more data
for (let i = 0; i < 100; i++) {
console.log('Pressing PageDown...');
await page.keyboard.press('PageDown', 200);
await page.waitFor(100);
}
// scrape data
const results = await page.evaluate(() => {
let results = [];
document.querySelectorAll('.entry-list > .item').forEach(el => {
if (!el.querySelector('.title')) return;
results.push({
url: 'https://juejin.com' + el.querySelector('.title').getAttribute('href'),
title: el.querySelector('.title').innerText
});
});
return results;
});
// open database connection
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;
const col = db.collection(colName);
// save to database
for (let i = 0; i < results.length; i++) {
// de-duplication
const r = await col.findOne({url: results[i]});
if (r) continue;
// assign taskID
results[i].task_id = taskId;
results[i].source = 'juejin';
// insert row
await col.insertOne(results[i]);
}
console.log(`results.length: ${results.length}`);
// close database connection
client.close();
// shutdown browser
browser.close();
})();

BIN
docs/.DS_Store vendored

Binary file not shown.

View File

@@ -0,0 +1,2 @@
# App

6
docs/Concept/Deploy 2.md Normal file
View File

@@ -0,0 +1,6 @@
# 部署
所有爬虫在运行前需要被部署当相应当节点中。
部署时,爬虫会被打包到相应的目录中,方便环境隔离,开发环境的爬虫和生产环境的爬虫需要打包部署来实现隔离。

View File

@@ -0,0 +1,2 @@
# Examples

View File

@@ -0,0 +1,22 @@
# 安装
最快安装Crawlab的方式是克隆一份代码到本地
```bash
git clone https://github.com/tikazyq/crawlab
```
安装类库
```bash
# 安装后台类库
pip install -r requirements.txt
```
```bash
# 安装前台类库
cd frontend
npm install
```

View File

@@ -0,0 +1,436 @@
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>App · GitBook</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="../Examples/" />
<link rel="prev" href="Celery.html" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
简介
</a>
</li>
<li class="chapter " data-level="1.2" data-path="../QuickStart/">
<a href="../QuickStart/">
快速开始
</a>
<ul class="articles">
<li class="chapter " data-level="1.2.1" data-path="../QuickStart/Installation.html">
<a href="../QuickStart/Installation.html">
安装
</a>
</li>
<li class="chapter " data-level="1.2.2" data-path="../QuickStart/Run.html">
<a href="../QuickStart/Run.html">
运行
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.3" data-path="../Concept/">
<a href="../Concept/">
概念
</a>
<ul class="articles">
<li class="chapter " data-level="1.3.1" data-path="../Concept/Node.html">
<a href="../Concept/Node.html">
节点
</a>
</li>
<li class="chapter " data-level="1.3.2" data-path="../Concept/Spider.html">
<a href="../Concept/Spider.html">
爬虫
</a>
</li>
<li class="chapter " data-level="1.3.3" data-path="../Concept/Task.html">
<a href="../Concept/Task.html">
任务
</a>
</li>
<li class="chapter " data-level="1.3.4" data-path="../Concept/Deploy.html">
<a href="../Concept/Deploy.html">
部署
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.4" data-path="./">
<a href="./">
架构
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="Celery.html">
<a href="Celery.html">
Celery
</a>
</li>
<li class="chapter active" data-level="1.4.2" data-path="App.html">
<a href="App.html">
App
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.5" data-path="../Examples/">
<a href="../Examples/">
Examples
</a>
<ul class="articles">
<li class="chapter " data-level="1.5.1" data-path="../Examples/">
<a href="../Examples/">
与Scrapy集成
</a>
</li>
<li class="chapter " data-level="1.5.2" data-path="../Examples/">
<a href="../Examples/">
与Puppeteer集成
</a>
</li>
</ul>
</li>
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >App</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<h1 id="app">App</h1>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
<a href="Celery.html" class="navigation navigation-prev " aria-label="Previous page: Celery">
<i class="fa fa-angle-left"></i>
</a>
<a href="../Examples/" class="navigation navigation-next " aria-label="Next page: Examples">
<i class="fa fa-angle-right"></i>
</a>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
gitbook.page.hasChanged({"page":{"title":"App","level":"1.4.2","depth":2,"next":{"title":"Examples","level":"1.5","depth":1,"path":"Examples/README.md","ref":"Examples/README.md","articles":[{"title":"与Scrapy集成","level":"1.5.1","depth":2,"path":"Examples/README.md","ref":"Examples/README.md","articles":[]},{"title":"与Puppeteer集成","level":"1.5.2","depth":2,"path":"Examples/README.md","ref":"Examples/README.md","articles":[]}]},"previous":{"title":"Celery","level":"1.4.1","depth":2,"path":"Architecture/Celery.md","ref":"Architecture/Celery.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":[],"pluginsConfig":{"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"Architecture/App.md","mtime":"2019-03-28T11:49:43.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-03-28T12:07:05.349Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
</body>
</html>

View File

@@ -0,0 +1,438 @@
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>部署 · GitBook</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="../Architecture/" />
<link rel="prev" href="Task.html" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
简介
</a>
</li>
<li class="chapter " data-level="1.2" data-path="../QuickStart/">
<a href="../QuickStart/">
快速开始
</a>
<ul class="articles">
<li class="chapter " data-level="1.2.1" data-path="../QuickStart/Installation.html">
<a href="../QuickStart/Installation.html">
安装
</a>
</li>
<li class="chapter " data-level="1.2.2" data-path="../QuickStart/Run.html">
<a href="../QuickStart/Run.html">
运行
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.3" data-path="./">
<a href="./">
概念
</a>
<ul class="articles">
<li class="chapter " data-level="1.3.1" data-path="Node.html">
<a href="Node.html">
节点
</a>
</li>
<li class="chapter " data-level="1.3.2" data-path="Spider.html">
<a href="Spider.html">
爬虫
</a>
</li>
<li class="chapter " data-level="1.3.3" data-path="Task.html">
<a href="Task.html">
任务
</a>
</li>
<li class="chapter active" data-level="1.3.4" data-path="Deploy.html">
<a href="Deploy.html">
部署
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.4" data-path="../Architecture/">
<a href="../Architecture/">
架构
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="../Architecture/Celery.html">
<a href="../Architecture/Celery.html">
Celery
</a>
</li>
<li class="chapter " data-level="1.4.2" data-path="../Architecture/App.html">
<a href="../Architecture/App.html">
App
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.5" data-path="../Examples/">
<a href="../Examples/">
Examples
</a>
<ul class="articles">
<li class="chapter " data-level="1.5.1" data-path="../Examples/">
<a href="../Examples/">
与Scrapy集成
</a>
</li>
<li class="chapter " data-level="1.5.2" data-path="../Examples/">
<a href="../Examples/">
与Puppeteer集成
</a>
</li>
</ul>
</li>
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >部署</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<h1 id="&#x90E8;&#x7F72;">&#x90E8;&#x7F72;</h1>
<p>&#x6240;&#x6709;&#x722C;&#x866B;&#x5728;&#x8FD0;&#x884C;&#x524D;&#x9700;&#x8981;&#x88AB;&#x90E8;&#x7F72;&#x5F53;&#x76F8;&#x5E94;&#x5F53;&#x8282;&#x70B9;&#x4E2D;&#x3002;</p>
<p>&#x90E8;&#x7F72;&#x65F6;&#xFF0C;&#x722C;&#x866B;&#x4F1A;&#x88AB;&#x6253;&#x5305;&#x5230;&#x76F8;&#x5E94;&#x7684;&#x76EE;&#x5F55;&#x4E2D;&#xFF0C;&#x65B9;&#x4FBF;&#x73AF;&#x5883;&#x9694;&#x79BB;&#xFF0C;&#x5F00;&#x53D1;&#x73AF;&#x5883;&#x7684;&#x722C;&#x866B;&#x548C;&#x751F;&#x4EA7;&#x73AF;&#x5883;&#x7684;&#x722C;&#x866B;&#x9700;&#x8981;&#x6253;&#x5305;&#x90E8;&#x7F72;&#x6765;&#x5B9E;&#x73B0;&#x9694;&#x79BB;&#x3002;</p>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
<a href="Task.html" class="navigation navigation-prev " aria-label="Previous page: 任务">
<i class="fa fa-angle-left"></i>
</a>
<a href="../Architecture/" class="navigation navigation-next " aria-label="Next page: 架构">
<i class="fa fa-angle-right"></i>
</a>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
gitbook.page.hasChanged({"page":{"title":"部署","level":"1.3.4","depth":2,"next":{"title":"架构","level":"1.4","depth":1,"path":"Architecture/README.md","ref":"Architecture/README.md","articles":[{"title":"Celery","level":"1.4.1","depth":2,"path":"Architecture/Celery.md","ref":"Architecture/Celery.md","articles":[]},{"title":"App","level":"1.4.2","depth":2,"path":"Architecture/App.md","ref":"Architecture/App.md","articles":[]}]},"previous":{"title":"任务","level":"1.3.3","depth":2,"path":"Concept/Task.md","ref":"Concept/Task.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":[],"pluginsConfig":{"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"Concept/Deploy.md","mtime":"2019-03-28T12:06:24.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-03-28T12:07:05.349Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
</body>
</html>

View File

@@ -0,0 +1,436 @@
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>Examples · GitBook</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="./" />
<link rel="prev" href="../Architecture/App.html" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
简介
</a>
</li>
<li class="chapter " data-level="1.2" data-path="../QuickStart/">
<a href="../QuickStart/">
快速开始
</a>
<ul class="articles">
<li class="chapter " data-level="1.2.1" data-path="../QuickStart/Installation.html">
<a href="../QuickStart/Installation.html">
安装
</a>
</li>
<li class="chapter " data-level="1.2.2" data-path="../QuickStart/Run.html">
<a href="../QuickStart/Run.html">
运行
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.3" data-path="../Concept/">
<a href="../Concept/">
概念
</a>
<ul class="articles">
<li class="chapter " data-level="1.3.1" data-path="../Concept/Node.html">
<a href="../Concept/Node.html">
节点
</a>
</li>
<li class="chapter " data-level="1.3.2" data-path="../Concept/Spider.html">
<a href="../Concept/Spider.html">
爬虫
</a>
</li>
<li class="chapter " data-level="1.3.3" data-path="../Concept/Task.html">
<a href="../Concept/Task.html">
任务
</a>
</li>
<li class="chapter " data-level="1.3.4" data-path="../Concept/Deploy.html">
<a href="../Concept/Deploy.html">
部署
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.4" data-path="../Architecture/">
<a href="../Architecture/">
架构
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="../Architecture/Celery.html">
<a href="../Architecture/Celery.html">
Celery
</a>
</li>
<li class="chapter " data-level="1.4.2" data-path="../Architecture/App.html">
<a href="../Architecture/App.html">
App
</a>
</li>
</ul>
</li>
<li class="chapter active" data-level="1.5" data-path="./">
<a href="./">
Examples
</a>
<ul class="articles">
<li class="chapter active" data-level="1.5.1" data-path="./">
<a href="./">
与Scrapy集成
</a>
</li>
<li class="chapter active" data-level="1.5.2" data-path="./">
<a href="./">
与Puppeteer集成
</a>
</li>
</ul>
</li>
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >Examples</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<h1 id="examples">Examples</h1>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
<a href="../Architecture/App.html" class="navigation navigation-prev " aria-label="Previous page: App">
<i class="fa fa-angle-left"></i>
</a>
<a href="./" class="navigation navigation-next " aria-label="Next page: 与Scrapy集成">
<i class="fa fa-angle-right"></i>
</a>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
gitbook.page.hasChanged({"page":{"title":"Examples","level":"1.5","depth":1,"next":{"title":"与Scrapy集成","level":"1.5.1","depth":2,"path":"Examples/README.md","ref":"Examples/README.md","articles":[]},"previous":{"title":"App","level":"1.4.2","depth":2,"path":"Architecture/App.md","ref":"Architecture/App.md","articles":[]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":[],"pluginsConfig":{"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"Examples/README.md","mtime":"2019-03-28T11:41:28.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-03-28T12:07:05.349Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
</body>
</html>

View File

@@ -0,0 +1,447 @@
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>安装 · GitBook</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="Run.html" />
<link rel="prev" href="./" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
简介
</a>
</li>
<li class="chapter " data-level="1.2" data-path="./">
<a href="./">
快速开始
</a>
<ul class="articles">
<li class="chapter active" data-level="1.2.1" data-path="Installation.html">
<a href="Installation.html">
安装
</a>
</li>
<li class="chapter " data-level="1.2.2" data-path="Run.html">
<a href="Run.html">
运行
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.3" data-path="../Concept/">
<a href="../Concept/">
概念
</a>
<ul class="articles">
<li class="chapter " data-level="1.3.1" data-path="../Concept/Node.html">
<a href="../Concept/Node.html">
节点
</a>
</li>
<li class="chapter " data-level="1.3.2" data-path="../Concept/Spider.html">
<a href="../Concept/Spider.html">
爬虫
</a>
</li>
<li class="chapter " data-level="1.3.3" data-path="../Concept/Task.html">
<a href="../Concept/Task.html">
任务
</a>
</li>
<li class="chapter " data-level="1.3.4" data-path="../Concept/Deploy.html">
<a href="../Concept/Deploy.html">
部署
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.4" data-path="../Architecture/">
<a href="../Architecture/">
架构
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="../Architecture/Celery.html">
<a href="../Architecture/Celery.html">
Celery
</a>
</li>
<li class="chapter " data-level="1.4.2" data-path="../Architecture/App.html">
<a href="../Architecture/App.html">
App
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.5" data-path="../Examples/">
<a href="../Examples/">
Examples
</a>
<ul class="articles">
<li class="chapter " data-level="1.5.1" data-path="../Examples/">
<a href="../Examples/">
与Scrapy集成
</a>
</li>
<li class="chapter " data-level="1.5.2" data-path="../Examples/">
<a href="../Examples/">
与Puppeteer集成
</a>
</li>
</ul>
</li>
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >安装</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<h1 id="&#x5B89;&#x88C5;">&#x5B89;&#x88C5;</h1>
<p>&#x6700;&#x5FEB;&#x5B89;&#x88C5;Crawlab&#x7684;&#x65B9;&#x5F0F;&#x662F;&#x514B;&#x9686;&#x4E00;&#x4EFD;&#x4EE3;&#x7801;&#x5230;&#x672C;&#x5730;</p>
<pre><code class="lang-bash">git <span class="hljs-built_in">clone</span> https://github.com/tikazyq/crawlab
</code></pre>
<p>&#x5B89;&#x88C5;&#x7C7B;&#x5E93;</p>
<pre><code class="lang-bash"><span class="hljs-comment"># &#x5B89;&#x88C5;&#x540E;&#x53F0;&#x7C7B;&#x5E93;</span>
pip install -r requirements.txt
</code></pre>
<pre><code class="lang-bash"><span class="hljs-comment"># &#x5B89;&#x88C5;&#x524D;&#x53F0;&#x7C7B;&#x5E93;</span>
<span class="hljs-built_in">cd</span> frontend
npm install
</code></pre>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
<a href="./" class="navigation navigation-prev " aria-label="Previous page: 快速开始">
<i class="fa fa-angle-left"></i>
</a>
<a href="Run.html" class="navigation navigation-next " aria-label="Next page: 运行">
<i class="fa fa-angle-right"></i>
</a>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
gitbook.page.hasChanged({"page":{"title":"安装","level":"1.2.1","depth":2,"next":{"title":"运行","level":"1.2.2","depth":2,"path":"QuickStart/Run.md","ref":"QuickStart/Run.md","articles":[]},"previous":{"title":"快速开始","level":"1.2","depth":1,"path":"QuickStart/README.md","ref":"QuickStart/README.md","articles":[{"title":"安装","level":"1.2.1","depth":2,"path":"QuickStart/Installation.md","ref":"QuickStart/Installation.md","articles":[]},{"title":"运行","level":"1.2.2","depth":2,"path":"QuickStart/Run.md","ref":"QuickStart/Run.md","articles":[]}]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":[],"pluginsConfig":{"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"QuickStart/Installation.md","mtime":"2019-03-28T11:55:48.000Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2019-03-28T12:07:05.349Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
</body>
</html>

Binary file not shown.

View File

@@ -0,0 +1,240 @@
require(['gitbook', 'jquery'], function(gitbook, $) {
// Configuration
var MAX_SIZE = 4,
MIN_SIZE = 0,
BUTTON_ID;
// Current fontsettings state
var fontState;
// Default themes
var THEMES = [
{
config: 'white',
text: 'White',
id: 0
},
{
config: 'sepia',
text: 'Sepia',
id: 1
},
{
config: 'night',
text: 'Night',
id: 2
}
];
// Default font families
var FAMILIES = [
{
config: 'serif',
text: 'Serif',
id: 0
},
{
config: 'sans',
text: 'Sans',
id: 1
}
];
// Return configured themes
function getThemes() {
return THEMES;
}
// Modify configured themes
function setThemes(themes) {
THEMES = themes;
updateButtons();
}
// Return configured font families
function getFamilies() {
return FAMILIES;
}
// Modify configured font families
function setFamilies(families) {
FAMILIES = families;
updateButtons();
}
// Save current font settings
function saveFontSettings() {
gitbook.storage.set('fontState', fontState);
update();
}
// Increase font size
function enlargeFontSize(e) {
e.preventDefault();
if (fontState.size >= MAX_SIZE) return;
fontState.size++;
saveFontSettings();
}
// Decrease font size
function reduceFontSize(e) {
e.preventDefault();
if (fontState.size <= MIN_SIZE) return;
fontState.size--;
saveFontSettings();
}
// Change font family
function changeFontFamily(configName, e) {
if (e && e instanceof Event) {
e.preventDefault();
}
var familyId = getFontFamilyId(configName);
fontState.family = familyId;
saveFontSettings();
}
// Change type of color theme
function changeColorTheme(configName, e) {
if (e && e instanceof Event) {
e.preventDefault();
}
var $book = gitbook.state.$book;
// Remove currently applied color theme
if (fontState.theme !== 0)
$book.removeClass('color-theme-'+fontState.theme);
// Set new color theme
var themeId = getThemeId(configName);
fontState.theme = themeId;
if (fontState.theme !== 0)
$book.addClass('color-theme-'+fontState.theme);
saveFontSettings();
}
// Return the correct id for a font-family config key
// Default to first font-family
function getFontFamilyId(configName) {
// Search for plugin configured font family
var configFamily = $.grep(FAMILIES, function(family) {
return family.config == configName;
})[0];
// Fallback to default font family
return (!!configFamily)? configFamily.id : 0;
}
// Return the correct id for a theme config key
// Default to first theme
function getThemeId(configName) {
// Search for plugin configured theme
var configTheme = $.grep(THEMES, function(theme) {
return theme.config == configName;
})[0];
// Fallback to default theme
return (!!configTheme)? configTheme.id : 0;
}
function update() {
var $book = gitbook.state.$book;
$('.font-settings .font-family-list li').removeClass('active');
$('.font-settings .font-family-list li:nth-child('+(fontState.family+1)+')').addClass('active');
$book[0].className = $book[0].className.replace(/\bfont-\S+/g, '');
$book.addClass('font-size-'+fontState.size);
$book.addClass('font-family-'+fontState.family);
if(fontState.theme !== 0) {
$book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, '');
$book.addClass('color-theme-'+fontState.theme);
}
}
function init(config) {
// Search for plugin configured font family
var configFamily = getFontFamilyId(config.family),
configTheme = getThemeId(config.theme);
// Instantiate font state object
fontState = gitbook.storage.get('fontState', {
size: config.size || 2,
family: configFamily,
theme: configTheme
});
update();
}
function updateButtons() {
// Remove existing fontsettings buttons
if (!!BUTTON_ID) {
gitbook.toolbar.removeButton(BUTTON_ID);
}
// Create buttons in toolbar
BUTTON_ID = gitbook.toolbar.createButton({
icon: 'fa fa-font',
label: 'Font Settings',
className: 'font-settings',
dropdown: [
[
{
text: 'A',
className: 'font-reduce',
onClick: reduceFontSize
},
{
text: 'A',
className: 'font-enlarge',
onClick: enlargeFontSize
}
],
$.map(FAMILIES, function(family) {
family.onClick = function(e) {
return changeFontFamily(family.config, e);
};
return family;
}),
$.map(THEMES, function(theme) {
theme.onClick = function(e) {
return changeColorTheme(theme.config, e);
};
return theme;
})
]
});
}
// Init configuration at start
gitbook.events.bind('start', function(e, config) {
var opts = config.fontsettings;
// Generate buttons at start
updateButtons();
// Init current settings
init(opts);
});
// Expose API
gitbook.fontsettings = {
enlargeFontSize: enlargeFontSize,
reduceFontSize: reduceFontSize,
setTheme: changeColorTheme,
setFamily: changeFontFamily,
getThemes: getThemes,
setThemes: setThemes,
getFamilies: getFamilies,
setFamilies: setFamilies
};
});

View File

@@ -0,0 +1,135 @@
pre,
code {
/* http://jmblog.github.io/color-themes-for-highlightjs */
/* Tomorrow Comment */
/* Tomorrow Red */
/* Tomorrow Orange */
/* Tomorrow Yellow */
/* Tomorrow Green */
/* Tomorrow Aqua */
/* Tomorrow Blue */
/* Tomorrow Purple */
}
pre .hljs-comment,
code .hljs-comment,
pre .hljs-title,
code .hljs-title {
color: #8e908c;
}
pre .hljs-variable,
code .hljs-variable,
pre .hljs-attribute,
code .hljs-attribute,
pre .hljs-tag,
code .hljs-tag,
pre .hljs-regexp,
code .hljs-regexp,
pre .hljs-deletion,
code .hljs-deletion,
pre .ruby .hljs-constant,
code .ruby .hljs-constant,
pre .xml .hljs-tag .hljs-title,
code .xml .hljs-tag .hljs-title,
pre .xml .hljs-pi,
code .xml .hljs-pi,
pre .xml .hljs-doctype,
code .xml .hljs-doctype,
pre .html .hljs-doctype,
code .html .hljs-doctype,
pre .css .hljs-id,
code .css .hljs-id,
pre .css .hljs-class,
code .css .hljs-class,
pre .css .hljs-pseudo,
code .css .hljs-pseudo {
color: #c82829;
}
pre .hljs-number,
code .hljs-number,
pre .hljs-preprocessor,
code .hljs-preprocessor,
pre .hljs-pragma,
code .hljs-pragma,
pre .hljs-built_in,
code .hljs-built_in,
pre .hljs-literal,
code .hljs-literal,
pre .hljs-params,
code .hljs-params,
pre .hljs-constant,
code .hljs-constant {
color: #f5871f;
}
pre .ruby .hljs-class .hljs-title,
code .ruby .hljs-class .hljs-title,
pre .css .hljs-rules .hljs-attribute,
code .css .hljs-rules .hljs-attribute {
color: #eab700;
}
pre .hljs-string,
code .hljs-string,
pre .hljs-value,
code .hljs-value,
pre .hljs-inheritance,
code .hljs-inheritance,
pre .hljs-header,
code .hljs-header,
pre .hljs-addition,
code .hljs-addition,
pre .ruby .hljs-symbol,
code .ruby .hljs-symbol,
pre .xml .hljs-cdata,
code .xml .hljs-cdata {
color: #718c00;
}
pre .css .hljs-hexcolor,
code .css .hljs-hexcolor {
color: #3e999f;
}
pre .hljs-function,
code .hljs-function,
pre .python .hljs-decorator,
code .python .hljs-decorator,
pre .python .hljs-title,
code .python .hljs-title,
pre .ruby .hljs-function .hljs-title,
code .ruby .hljs-function .hljs-title,
pre .ruby .hljs-title .hljs-keyword,
code .ruby .hljs-title .hljs-keyword,
pre .perl .hljs-sub,
code .perl .hljs-sub,
pre .javascript .hljs-title,
code .javascript .hljs-title,
pre .coffeescript .hljs-title,
code .coffeescript .hljs-title {
color: #4271ae;
}
pre .hljs-keyword,
code .hljs-keyword,
pre .javascript .hljs-function,
code .javascript .hljs-function {
color: #8959a8;
}
pre .hljs,
code .hljs {
display: block;
background: white;
color: #4d4d4c;
padding: 0.5em;
}
pre .coffeescript .javascript,
code .coffeescript .javascript,
pre .javascript .xml,
code .javascript .xml,
pre .tex .hljs-formula,
code .tex .hljs-formula,
pre .xml .javascript,
code .xml .javascript,
pre .xml .vbscript,
code .xml .vbscript,
pre .xml .css,
code .xml .css,
pre .xml .hljs-cdata,
code .xml .hljs-cdata {
opacity: 0.5;
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,90 @@
require(['gitbook', 'jquery'], function(gitbook, $) {
var SITES = {
'facebook': {
'label': 'Facebook',
'icon': 'fa fa-facebook',
'onClick': function(e) {
e.preventDefault();
window.open('http://www.facebook.com/sharer/sharer.php?s=100&p[url]='+encodeURIComponent(location.href));
}
},
'twitter': {
'label': 'Twitter',
'icon': 'fa fa-twitter',
'onClick': function(e) {
e.preventDefault();
window.open('http://twitter.com/home?status='+encodeURIComponent(document.title+' '+location.href));
}
},
'google': {
'label': 'Google+',
'icon': 'fa fa-google-plus',
'onClick': function(e) {
e.preventDefault();
window.open('https://plus.google.com/share?url='+encodeURIComponent(location.href));
}
},
'weibo': {
'label': 'Weibo',
'icon': 'fa fa-weibo',
'onClick': function(e) {
e.preventDefault();
window.open('http://service.weibo.com/share/share.php?content=utf-8&url='+encodeURIComponent(location.href)+'&title='+encodeURIComponent(document.title));
}
},
'instapaper': {
'label': 'Instapaper',
'icon': 'fa fa-instapaper',
'onClick': function(e) {
e.preventDefault();
window.open('http://www.instapaper.com/text?u='+encodeURIComponent(location.href));
}
},
'vk': {
'label': 'VK',
'icon': 'fa fa-vk',
'onClick': function(e) {
e.preventDefault();
window.open('http://vkontakte.ru/share.php?url='+encodeURIComponent(location.href));
}
}
};
gitbook.events.bind('start', function(e, config) {
var opts = config.sharing;
// Create dropdown menu
var menu = $.map(opts.all, function(id) {
var site = SITES[id];
return {
text: site.label,
onClick: site.onClick
};
});
// Create main button with dropdown
if (menu.length > 0) {
gitbook.toolbar.createButton({
icon: 'fa fa-share-alt',
label: 'Share',
position: 'right',
dropdown: [menu]
});
}
// Direct actions to share
$.each(SITES, function(sideId, site) {
if (!opts[sideId]) return;
gitbook.toolbar.createButton({
icon: site.icon,
label: site.text,
position: 'right',
onClick: site.onClick
});
});
});
});

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

15537
frontend/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
import request from '@/utils/request'
export function login (username, password) {
return request({
url: '/user/login',
method: 'post',
data: {
username,
password
}
})
}
export function getInfo (token) {
return request({
url: '/user/info',
method: 'get',
params: { token }
})
}
export function logout () {
return request({
url: '/user/logout',
method: 'post'
})
}

View File

@@ -0,0 +1,46 @@
import axios from 'axios'
let baseUrl = 'http://localhost:8000/api'
if (process.env.NODE_ENV === 'production') {
baseUrl = 'http://139.129.230.98:8000/api'
}
// const baseUrl = process.env.API_BASE_URL || 'http://localhost:8000/api'
const request = (method, path, params, data) => {
return new Promise((resolve, reject) => {
const url = `${baseUrl}${path}`
axios({
method,
url,
params,
data
})
.then(resolve)
.catch(reject)
})
}
const get = (path, params) => {
return request('GET', path, params)
}
const post = (path, data) => {
return request('POST', path, {}, data)
}
const put = (path, data) => {
return request('PUT', path, {}, data)
}
const del = (path, data) => {
return request('DELETE', path)
}
export default {
baseUrl,
request,
get,
post,
put,
delete: del
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

View File

@@ -0,0 +1,19 @@
/**
* database64文件格式转换为2进制
*
* @param {[String]} data dataURL 的格式为 “data:image/png;base64,****”,逗号之前都是一些说明性的文字,我们只需要逗号之后的就行了
* @param {[String]} mime [description]
* @return {[blob]} [description]
*/
export default function(data, mime) {
data = data.split(',')[1]
data = window.atob(data)
var ia = new Uint8Array(data.length)
for (var i = 0; i < data.length; i++) {
ia[i] = data.charCodeAt(i)
}
// canvas.toDataURL 返回的默认格式就是 image/png
return new Blob([ia], {
type: mime
})
}

View File

@@ -0,0 +1,103 @@
<template>
<div class="upload-container">
<el-button :style="{background:color,borderColor:color}" icon="el-icon-upload" size="mini" type="primary" @click=" dialogVisible=true">上传图片
</el-button>
<el-dialog :visible.sync="dialogVisible">
<el-upload
:multiple="true"
:file-list="fileList"
:show-file-list="true"
:on-remove="handleRemove"
:on-success="handleSuccess"
:before-upload="beforeUpload"
class="editor-slide-upload"
action="https://httpbin.org/post"
list-type="picture-card">
<el-button size="small" type="primary">点击上传</el-button>
</el-upload>
<el-button @click="dialogVisible = false"> </el-button>
<el-button type="primary" @click="handleSubmit"> </el-button>
</el-dialog>
</div>
</template>
<script>
// import { getToken } from 'api/qiniu'
export default {
name: 'EditorSlideUpload',
props: {
color: {
type: String,
default: '#1890ff'
}
},
data() {
return {
dialogVisible: false,
listObj: {},
fileList: []
}
},
methods: {
checkAllSuccess() {
return Object.keys(this.listObj).every(item => this.listObj[item].hasSuccess)
},
handleSubmit() {
const arr = Object.keys(this.listObj).map(v => this.listObj[v])
if (!this.checkAllSuccess()) {
this.$message('请等待所有图片上传成功 或 出现了网络问题,请刷新页面重新上传!')
return
}
this.$emit('successCBK', arr)
this.listObj = {}
this.fileList = []
this.dialogVisible = false
},
handleSuccess(response, file) {
const uid = file.uid
const objKeyArr = Object.keys(this.listObj)
for (let i = 0, len = objKeyArr.length; i < len; i++) {
if (this.listObj[objKeyArr[i]].uid === uid) {
this.listObj[objKeyArr[i]].url = response.files.file
this.listObj[objKeyArr[i]].hasSuccess = true
return
}
}
},
handleRemove(file) {
const uid = file.uid
const objKeyArr = Object.keys(this.listObj)
for (let i = 0, len = objKeyArr.length; i < len; i++) {
if (this.listObj[objKeyArr[i]].uid === uid) {
delete this.listObj[objKeyArr[i]]
return
}
}
},
beforeUpload(file) {
const _self = this
const _URL = window.URL || window.webkitURL
const fileName = file.uid
this.listObj[fileName] = {}
return new Promise((resolve, reject) => {
const img = new Image()
img.src = _URL.createObjectURL(file)
img.onload = function() {
_self.listObj[fileName] = { hasSuccess: false, uid: file.uid, width: this.width, height: this.height }
}
resolve(true)
})
}
}
}
</script>
<style rel="stylesheet/scss" lang="scss" scoped>
.editor-slide-upload {
margin-bottom: 20px;
/deep/ .el-upload--picture-card {
width: 100%;
}
}
</style>

View File

@@ -1,127 +0,0 @@
<template>
<div class="singleImageUpload2 upload-container">
<el-upload
:data="dataObj"
:multiple="false"
:show-file-list="false"
:on-success="handleImageSuccess"
class="image-uploader"
drag
action="https://httpbin.org/post">
<i class="el-icon-upload"/>
<div class="el-upload__text">Drag或<em>点击上传</em></div>
</el-upload>
<div v-show="imageUrl.length>0" class="image-preview">
<div v-show="imageUrl.length>1" class="image-preview-wrapper">
<img :src="imageUrl">
<div class="image-preview-action">
<i class="el-icon-delete" @click="rmImage"/>
</div>
</div>
</div>
</div>
</template>
<script>
import { getToken } from '@/api/qiniu'
export default {
name: 'SingleImageUpload2',
props: {
value: {
type: String,
default: ''
}
},
data() {
return {
tempUrl: '',
dataObj: { token: '', key: '' }
}
},
computed: {
imageUrl() {
return this.value
}
},
methods: {
rmImage() {
this.emitInput('')
},
emitInput(val) {
this.$emit('input', val)
},
handleImageSuccess() {
this.emitInput(this.tempUrl)
},
beforeUpload() {
const _self = this
return new Promise((resolve, reject) => {
getToken().then(response => {
const key = response.data.qiniu_key
const token = response.data.qiniu_token
_self._data.dataObj.token = token
_self._data.dataObj.key = key
this.tempUrl = response.data.qiniu_url
resolve(true)
}).catch(() => {
reject(false)
})
})
}
}
}
</script>
<style rel="stylesheet/scss" lang="scss" scoped>
.upload-container {
width: 100%;
height: 100%;
position: relative;
.image-uploader {
height: 100%;
}
.image-preview {
width: 100%;
height: 100%;
position: absolute;
left: 0px;
top: 0px;
border: 1px dashed #d9d9d9;
.image-preview-wrapper {
position: relative;
width: 100%;
height: 100%;
img {
width: 100%;
height: 100%;
}
}
.image-preview-action {
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
cursor: default;
text-align: center;
color: #fff;
opacity: 0;
font-size: 20px;
background-color: rgba(0, 0, 0, .5);
transition: opacity .3s;
cursor: pointer;
text-align: center;
line-height: 200px;
.el-icon-delete {
font-size: 36px;
}
}
&:hover {
.image-preview-action {
opacity: 1;
}
}
}
}
</style>

View File

@@ -1,154 +0,0 @@
<template>
<div class="upload-container">
<el-upload
:data="dataObj"
:multiple="false"
:show-file-list="false"
:on-success="handleImageSuccess"
class="image-uploader"
drag
action="https://httpbin.org/post">
<i class="el-icon-upload"/>
<div class="el-upload__text">将文件拖到此处<em>点击上传</em></div>
</el-upload>
<div class="image-preview image-app-preview">
<div v-show="imageUrl.length>1" class="image-preview-wrapper">
<img :src="imageUrl">
<div class="image-preview-action">
<i class="el-icon-delete" @click="rmImage"/>
</div>
</div>
</div>
<div class="image-preview">
<div v-show="imageUrl.length>1" class="image-preview-wrapper">
<img :src="imageUrl">
<div class="image-preview-action">
<i class="el-icon-delete" @click="rmImage"/>
</div>
</div>
</div>
</div>
</template>
<script>
import { getToken } from '@/api/qiniu'
export default {
name: 'SingleImageUpload3',
props: {
value: {
type: String,
default: ''
}
},
data() {
return {
tempUrl: '',
dataObj: { token: '', key: '' }
}
},
computed: {
imageUrl() {
return this.value
}
},
methods: {
rmImage() {
this.emitInput('')
},
emitInput(val) {
this.$emit('input', val)
},
handleImageSuccess(file) {
this.emitInput(file.files.file)
},
beforeUpload() {
const _self = this
return new Promise((resolve, reject) => {
getToken().then(response => {
const key = response.data.qiniu_key
const token = response.data.qiniu_token
_self._data.dataObj.token = token
_self._data.dataObj.key = key
this.tempUrl = response.data.qiniu_url
resolve(true)
}).catch(err => {
console.log(err)
reject(false)
})
})
}
}
}
</script>
<style rel="stylesheet/scss" lang="scss" scoped>
@import "~@/styles/mixin.scss";
.upload-container {
width: 100%;
position: relative;
@include clearfix;
.image-uploader {
width: 35%;
float: left;
}
.image-preview {
width: 200px;
height: 200px;
position: relative;
border: 1px dashed #d9d9d9;
float: left;
margin-left: 50px;
.image-preview-wrapper {
position: relative;
width: 100%;
height: 100%;
img {
width: 100%;
height: 100%;
}
}
.image-preview-action {
position: absolute;
width: 100%;
height: 100%;
left: 0;
top: 0;
cursor: default;
text-align: center;
color: #fff;
opacity: 0;
font-size: 20px;
background-color: rgba(0, 0, 0, .5);
transition: opacity .3s;
cursor: pointer;
text-align: center;
line-height: 200px;
.el-icon-delete {
font-size: 36px;
}
}
&:hover {
.image-preview-action {
opacity: 1;
}
}
}
.image-app-preview {
width: 320px;
height: 180px;
position: relative;
border: 1px dashed #d9d9d9;
float: left;
margin-left: 50px;
.app-fake-conver {
height: 44px;
position: absolute;
width: 100%; // background: rgba(0, 0, 0, .1);
text-align: center;
line-height: 64px;
color: #fff;
}
}
}
</style>

View File

@@ -126,5 +126,5 @@ export default {
'Node info has been saved successfully': '节点信息已成功保存',
'Are you sure to deploy this spider?': '你确定要部署该爬虫?',
'Are you sure to delete this spider?': '你确定要删除该爬虫?',
'Spider info has been saved successfully': '爬虫信息已成功保存',
'Spider info has been saved successfully': '爬虫信息已成功保存'
}

View File

@@ -1,32 +0,0 @@
<template>
<div class="dashboard-container">
<div class="dashboard-text">name:{{ name }}</div>
<div class="dashboard-text">roles:<span v-for="role in roles" :key="role">{{ role }}</span></div>
</div>
</template>
<script>
import { mapGetters } from 'vuex'
export default {
name: 'Dashboard',
computed: {
...mapGetters([
'name',
'roles'
])
}
}
</script>
<style rel="stylesheet/scss" lang="scss" scoped>
.dashboard {
&-container {
margin: 30px;
}
&-text {
font-size: 30px;
line-height: 46px;
}
}
</style>

View File

@@ -1,84 +0,0 @@
<template>
<div class="app-container">
<el-form ref="form" :model="form" label-width="120px">
<el-form-item label="Activity name">
<el-input v-model="form.name"/>
</el-form-item>
<el-form-item label="Activity zone">
<el-select v-model="form.region" placeholder="please select your zone">
<el-option label="Zone one" value="shanghai"/>
<el-option label="Zone two" value="beijing"/>
</el-select>
</el-form-item>
<el-form-item label="Activity time">
<el-col :span="11">
<el-date-picker v-model="form.date1" type="date" placeholder="Pick a date" style="width: 100%;"/>
</el-col>
<el-col :span="2" class="line">-</el-col>
<el-col :span="11">
<el-time-picker v-model="form.date2" type="fixed-time" placeholder="Pick a time" style="width: 100%;"/>
</el-col>
</el-form-item>
<el-form-item label="Instant delivery">
<el-switch v-model="form.delivery"/>
</el-form-item>
<el-form-item label="Activity type">
<el-checkbox-group v-model="form.type">
<el-checkbox label="Online activities" name="type"/>
<el-checkbox label="Promotion activities" name="type"/>
<el-checkbox label="Offline activities" name="type"/>
<el-checkbox label="Simple brand exposure" name="type"/>
</el-checkbox-group>
</el-form-item>
<el-form-item label="Resources">
<el-radio-group v-model="form.resource">
<el-radio label="Sponsor"/>
<el-radio label="Venue"/>
</el-radio-group>
</el-form-item>
<el-form-item label="Activity form">
<el-input v-model="form.desc" type="textarea"/>
</el-form-item>
<el-form-item>
<el-button type="primary" @click="onSubmit">Create</el-button>
<el-button @click="onCancel">Cancel</el-button>
</el-form-item>
</el-form>
</div>
</template>
<script>
export default {
data () {
return {
form: {
name: '',
region: '',
date1: '',
date2: '',
delivery: false,
type: [],
resource: '',
desc: ''
}
}
},
methods: {
onSubmit () {
this.$message('submit!')
},
onCancel () {
this.$message({
message: 'cancel!',
type: 'warning'
})
}
}
}
</script>
<style scoped>
.line{
text-align: center;
}
</style>

View File

@@ -1,7 +0,0 @@
<template >
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1">
<router-view />
</el-alert>
</div>
</template>

View File

@@ -1,7 +0,0 @@
<template >
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1-1" type="success">
<router-view />
</el-alert>
</div>
</template>

View File

@@ -1,7 +0,0 @@
<template>
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1-2" type="success">
<router-view />
</el-alert>
</div>
</template>

View File

@@ -1,5 +0,0 @@
<template functional>
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1-2-1" type="warning" />
</div>
</template>

View File

@@ -1,5 +0,0 @@
<template functional>
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1-2-2" type="warning" />
</div>
</template>

View File

@@ -1,5 +0,0 @@
<template functional>
<div style="padding:30px;">
<el-alert :closable="false" title="menu 1-3" type="success" />
</div>
</template>

View File

@@ -1,5 +0,0 @@
<template>
<div style="padding:30px;">
<el-alert :closable="false" title="menu 2" />
</div>
</template>

View File

@@ -1,78 +0,0 @@
<template>
<div class="app-container">
<el-table
v-loading="listLoading"
:data="list"
element-loading-text="Loading"
border
fit
highlight-current-row>
<el-table-column align="center" label="ID" width="95">
<template slot-scope="scope">
{{ scope.$index }}
</template>
</el-table-column>
<el-table-column label="Title">
<template slot-scope="scope">
{{ scope.row.title }}
</template>
</el-table-column>
<el-table-column label="Author" width="110" align="center">
<template slot-scope="scope">
<span>{{ scope.row.author }}</span>
</template>
</el-table-column>
<el-table-column label="Pageviews" width="110" align="center">
<template slot-scope="scope">
{{ scope.row.pageviews }}
</template>
</el-table-column>
<el-table-column class-name="status-col" label="Status" width="110" align="center">
<template slot-scope="scope">
<el-tag :type="scope.row.status | statusFilter">{{ scope.row.status }}</el-tag>
</template>
</el-table-column>
<el-table-column align="center" prop="created_at" label="Display_time" width="200">
<template slot-scope="scope">
<i class="el-icon-time"/>
<span>{{ scope.row.display_time }}</span>
</template>
</el-table-column>
</el-table>
</div>
</template>
<script>
import { getList } from '@/api/table'
export default {
filters: {
statusFilter (status) {
const statusMap = {
published: 'success',
draft: 'gray',
deleted: 'danger'
}
return statusMap[status]
}
},
data () {
return {
list: null,
listLoading: true
}
},
created () {
this.fetchData()
},
methods: {
fetchData () {
this.listLoading = true
getList(this.listQuery).then(response => {
this.list = response.data.items
this.listLoading = false
})
}
}
}
</script>

View File

@@ -1,77 +0,0 @@
<template>
<div class="app-container">
<el-input v-model="filterText" placeholder="Filter keyword" style="margin-bottom:30px;"/>
<el-tree
ref="tree2"
:data="data2"
:props="defaultProps"
:filter-node-method="filterNode"
class="filter-tree"
default-expand-all
/>
</div>
</template>
<script>
export default {
data () {
return {
filterText: '',
data2: [{
id: 1,
label: 'Level one 1',
children: [{
id: 4,
label: 'Level two 1-1',
children: [{
id: 9,
label: 'Level three 1-1-1'
}, {
id: 10,
label: 'Level three 1-1-2'
}]
}]
}, {
id: 2,
label: 'Level one 2',
children: [{
id: 5,
label: 'Level two 2-1'
}, {
id: 6,
label: 'Level two 2-2'
}]
}, {
id: 3,
label: 'Level one 3',
children: [{
id: 7,
label: 'Level two 3-1'
}, {
id: 8,
label: 'Level two 3-2'
}]
}],
defaultProps: {
children: 'children',
label: 'label'
}
}
},
watch: {
filterText (val) {
this.$refs.tree2.filter(val)
}
},
methods: {
filterNode (value, data) {
if (!value) return true
return data.label.indexOf(value) !== -1
}
}
}
</script>

View File

@@ -1,66 +0,0 @@
amqp==2.4.1
aniso8601==4.1.0
APScheduler==3.5.3
asn1crypto==0.24.0
attrs==18.2.0
Automat==0.7.0
Babel==2.6.0
billiard==3.5.0.5
celery==4.2.1
certifi==2018.11.29
cffi==1.11.5
chardet==3.0.4
Click==7.0
constantly==15.1.0
cryptography==2.5
cssselect==1.0.3
Django==2.1.7
django-cors-headers==2.4.0
dnspython==1.16.0
docopt==0.6.2
eventlet==0.24.1
Flask==1.0.2
Flask-Cors==3.0.7
Flask-RESTful==0.3.7
Flask-Uploads==0.2.1
flower==0.9.2
gerapy==0.8.5
greenlet==0.4.15
gunicorn==19.9.0
hyperlink==18.0.0
idna==2.8
incremental==17.5.0
itsdangerous==1.1.0
Jinja2==2.10
kombu==4.3.0
lxml==4.3.1
MarkupSafe==1.1.0
mongoengine==0.16.3
monotonic==1.5
parsel==1.5.1
pyasn1==0.4.5
pyasn1-modules==0.2.4
pycparser==2.19
PyDispatcher==2.0.5
PyHamcrest==1.9.0
pymongo==3.7.2
PyMySQL==0.9.3
pyOpenSSL==19.0.0
python-scrapyd-api==2.1.2
pytz==2018.9
queuelib==1.5.0
redis==3.1.0
requests==2.21.0
Scrapy==1.6.0
scrapy-redis==0.6.8
scrapy-splash==0.7.2
service-identity==18.1.0
six==1.12.0
tornado==5.1.1
Twisted==18.9.0
tzlocal==1.5.1
urllib3==1.24.1
vine==1.2.0
w3lib==1.20.0
Werkzeug==0.14.1
zope.interface==4.6.0

View File

@@ -11,7 +11,7 @@ const MongoClient = require('mongodb').MongoClient;
const page = await browser.newPage();
// open database connection
const client = await MongoClient.connect('mongodb://192.168.99.100:27017');
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results';
const col = db.collection(colName);

View File

@@ -53,7 +53,7 @@ const MongoClient = require('mongodb').MongoClient;
});
// open database connection
const client = await MongoClient.connect('mongodb://192.168.99.100:27017');
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;

View File

@@ -8,7 +8,7 @@ import os
from pymongo import MongoClient
MONGO_HOST = '192.168.99.100'
MONGO_HOST = '127.0.0.1'
MONGO_PORT = 27017
MONGO_DB = 'crawlab_test'
@@ -16,9 +16,7 @@ MONGO_DB = 'crawlab_test'
class JuejinPipeline(object):
mongo = MongoClient(host=MONGO_HOST, port=MONGO_PORT)
db = mongo[MONGO_DB]
col_name = os.environ.get('CRAWLAB_COLLECTION')
if not col_name:
col_name = 'test'
col_name = os.environ.get('CRAWLAB_COLLECTION','test')
col = db[col_name]
def process_item(self, item, spider):

View File

@@ -0,0 +1,2 @@
from scrapy import cmdline
cmdline.execute(["scrapy","crawl","juejin_spider"])

View File

@@ -52,7 +52,7 @@ const MongoClient = require('mongodb').MongoClient;
});
// open database connection
const client = await MongoClient.connect('mongodb://192.168.99.100:27017');
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_juejin';
const taskId = process.env.CRAWLAB_TASK_ID;

View File

@@ -51,7 +51,7 @@ const MongoClient = require('mongodb').MongoClient;
});
// open database connection
const client = await MongoClient.connect('mongodb://192.168.99.100:27017');
const client = await MongoClient.connect('mongodb://127.0.0.1:27017');
let db = await client.db('crawlab_test');
const colName = process.env.CRAWLAB_COLLECTION || 'results_segmentfault';
const taskId = process.env.CRAWLAB_TASK_ID;