mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
updated README
This commit is contained in:
26
README-zh.md
26
README-zh.md
@@ -30,25 +30,11 @@
|
||||
|
||||
基于Golang的分布式爬虫管理平台,支持Python、NodeJS、Go、Java、PHP等多种编程语言以及多种爬虫框架。
|
||||
|
||||
[查看演示 Demo](https://demo-pro.crawlab.cn) | [文档](https://docs.crawlab.cn) | [文档 (v0.6-beta)](https://docs-next.crawlab.cn)
|
||||
[查看演示 Demo](https://demo-pro.crawlab.cn) | [文档](https://docs.crawlab.cn/zh/)
|
||||
|
||||
## 安装
|
||||
|
||||
三种方式:
|
||||
1. [Docker](http://docs.crawlab.cn/zh/Installation/Docker.html)(推荐)
|
||||
2. [直接部署](http://docs.crawlab.cn/zh/Installation/Direct.html)(了解内核)
|
||||
3. [Kubernetes](http://docs.crawlab.cn/zh/Installation/Kubernetes.html) (多节点部署)
|
||||
|
||||
### 要求(Docker)
|
||||
- Docker 18.03+
|
||||
- MongoDB 3.6+
|
||||
- Docker Compose 1.24+ (可选,但推荐)
|
||||
|
||||
### 要求(直接部署)
|
||||
- Go 1.15+
|
||||
- Node 12.20+
|
||||
- MongoDB 3.6+
|
||||
- [SeaweedFS](https://github.com/chrislusf/seaweedfs) 2.59+
|
||||
您可以参考这个[安装指南](https://docs.crawlab.cn/zh/guide/installation)。
|
||||
|
||||
## 快速开始
|
||||
|
||||
@@ -109,7 +95,7 @@ services:
|
||||
- master
|
||||
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
image: mongo:4.2
|
||||
container_name: crawlab_example_mongo
|
||||
restart: always
|
||||
```
|
||||
@@ -120,11 +106,7 @@ services:
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Docker部署的详情,请见[相关文档](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)。
|
||||
|
||||
### 直接部署
|
||||
|
||||
请参考[相关文档](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)。
|
||||
Docker部署的详情,请见[相关文档](https://docs.crawlab.cn/zh/guide/installation/docker.html)。
|
||||
|
||||
## 截图
|
||||
|
||||
|
||||
24
README.md
24
README.md
@@ -30,25 +30,11 @@
|
||||
|
||||
Golang-based distributed web crawler management platform, supporting various languages including Python, NodeJS, Go, Java, PHP and various web crawler frameworks including Scrapy, Puppeteer, Selenium.
|
||||
|
||||
[Demo](https://demo-pro.crawlab.cn) | [Documentation](https://docs.crawlab.cn) | [Documentation (v0.6-beta)](http://docs-next.crawlab.cn)
|
||||
[Demo](https://demo-pro.crawlab.cn) | [Documentation](https://docs.crawlab.cn/en/)
|
||||
|
||||
## Installation
|
||||
|
||||
Three methods:
|
||||
1. [Docker](http://docs.crawlab.cn/en/Installation/Docker.html) (Recommended)
|
||||
2. [Direct Deploy](http://docs.crawlab.cn/en/Installation/Direct.html) (Check Internal Kernel)
|
||||
3. [Kubernetes](http://docs.crawlab.cn/en/Installation/Kubernetes.html) (Multi-Node Deployment)
|
||||
|
||||
### Pre-requisite (Docker)
|
||||
- Docker 18.03+
|
||||
- MongoDB 3.6+
|
||||
- Docker Compose 1.24+ (optional but recommended)
|
||||
|
||||
### Pre-requisite (Direct Deploy)
|
||||
- Go 1.15+
|
||||
- Node 12.20+
|
||||
- MongoDB 3.6+
|
||||
- [SeaweedFS](https://github.com/chrislusf/seaweedfs) 2.59+
|
||||
You can follow the [installation guide](https://docs.crawlab.cn/en/guide/installation/).
|
||||
|
||||
## Quick Start
|
||||
|
||||
@@ -60,7 +46,7 @@ cd examples/docker/basic
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Next, you can look into the `docker-compose.yml` (with detailed config params) and the [Documentation (Chinese)](http://docs.crawlab.cn) for further information.
|
||||
Next, you can look into the `docker-compose.yml` (with detailed config params) and the [Documentation](http://docs.crawlab.cn/en/) for further information.
|
||||
|
||||
## Run
|
||||
|
||||
@@ -110,7 +96,7 @@ services:
|
||||
- master
|
||||
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
image: mongo:4.2
|
||||
container_name: crawlab_example_mongo
|
||||
restart: always
|
||||
```
|
||||
@@ -121,7 +107,7 @@ Then execute the command below, and Crawlab Master and Worker Nodes + MongoDB wi
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
For Docker Deployment details, please refer to [relevant documentation](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html).
|
||||
For Docker Deployment details, please refer to [relevant documentation](https://docs.crawlab.cn/en/guide/installation/docker.html).
|
||||
|
||||
|
||||
## Screenshot
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
version: '3.3'
|
||||
services:
|
||||
master:
|
||||
build:
|
||||
context: ./
|
||||
dockerfile: Dockerfile.local
|
||||
container_name: local_master
|
||||
environment:
|
||||
# CRAWLAB_API_ADDRESS: "https://<your_api_ip>:<your_api_port>" # backend API address 后端 API 地址. 适用于 https 或者源码部署
|
||||
CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
|
||||
CRAWLAB_MONGO_HOST: "mongo" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
|
||||
# CRAWLAB_MONGO_PORT: "27017" # MongoDB port MongoDB 的端口
|
||||
# CRAWLAB_MONGO_DB: "crawlab_test" # MongoDB database MongoDB 的数据库
|
||||
# CRAWLAB_MONGO_USERNAME: "username" # MongoDB username MongoDB 的用户名
|
||||
# CRAWLAB_MONGO_PASSWORD: "password" # MongoDB password MongoDB 的密码
|
||||
# CRAWLAB_MONGO_AUTHSOURCE: "admin" # MongoDB auth source MongoDB 的验证源
|
||||
CRAWLAB_REDIS_ADDRESS: "redis" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
|
||||
# CRAWLAB_REDIS_PORT: "6379" # Redis port Redis 的端口
|
||||
# CRAWLAB_REDIS_DATABASE: "1" # Redis database Redis 的数据库
|
||||
# CRAWLAB_REDIS_PASSWORD: "password" # Redis password Redis 的密码
|
||||
# CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info
|
||||
# CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
|
||||
# CRAWLAB_LOG_DELETEFREQUENCY: "@hourly" # frequency of deleting log files 删除日志文件的频率. 默认为每小时
|
||||
# CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数)
|
||||
# CRAWLAB_SERVER_REGISTER_TYPE: "mac" # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
|
||||
# CRAWLAB_SERVER_REGISTER_IP: "127.0.0.1" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
|
||||
# CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境
|
||||
# CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境
|
||||
# CRAWLAB_SERVER_LANG_DOTNET: "Y" # whether to pre-install .Net core 预安装 .Net Core 语言环境
|
||||
# CRAWLAB_SERVER_LANG_PHP: "Y" # whether to pre-install PHP 预安装 PHP 语言环境
|
||||
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
|
||||
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
|
||||
# CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务
|
||||
# CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫
|
||||
# CRAWLAB_SETTING_CHECKSCRAPY: "Y" # whether to automatically check if the spider is scrapy 是否自动检测爬虫为scrapy
|
||||
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址
|
||||
# CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口
|
||||
# CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱
|
||||
# CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com # sender ID 发送者 ID
|
||||
# CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username # SMTP username SMTP 用户名
|
||||
# CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password # SMTP password SMTP 密码
|
||||
ports:
|
||||
- "8080:8080" # frontend port mapping 前端端口映射
|
||||
depends_on:
|
||||
- mongo
|
||||
- redis
|
||||
# volumes:
|
||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||
worker:
|
||||
build:
|
||||
context: ./
|
||||
dockerfile: Dockerfile.local
|
||||
container_name: local_master
|
||||
environment:
|
||||
CRAWLAB_SERVER_MASTER: "N"
|
||||
CRAWLAB_MONGO_HOST: "mongo"
|
||||
CRAWLAB_REDIS_ADDRESS: "redis"
|
||||
depends_on:
|
||||
- mongo
|
||||
- redis
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: username
|
||||
# MONGO_INITDB_ROOT_PASSWORD: password
|
||||
# volumes:
|
||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
restart: always
|
||||
# volumes:
|
||||
# - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化
|
||||
# ports:
|
||||
# - "27017:27017" # expose port to host machine 暴露接口到宿主机
|
||||
redis:
|
||||
image: redis:latest
|
||||
restart: always
|
||||
# command: redis-server --requirepass "password" # set redis password 设置 Redis 密码
|
||||
# volumes:
|
||||
# - "/opt/crawlab/redis/data:/data" # make data persistent 持久化
|
||||
# ports:
|
||||
# - "6379:6379" # expose port to host machine 暴露接口到宿主机
|
||||
# splash: # use Splash to run spiders on dynamic pages
|
||||
# image: scrapinghub/splash
|
||||
# container_name: splash
|
||||
# ports:
|
||||
# - "8050:8050"
|
||||
@@ -4,33 +4,31 @@ services:
|
||||
image: crawlabteam/crawlab:latest
|
||||
container_name: crawlab_master
|
||||
environment:
|
||||
CRAWLAB_BASE_URL: crawlab
|
||||
CRAWLAB_SERVER_MASTER: Y
|
||||
CRAWLAB_NODE_MASTER: Y
|
||||
CRAWLAB_MONGO_HOST: mongo
|
||||
ports:
|
||||
- "8080:8080" # frontend port mapping 前端端口映射
|
||||
depends_on:
|
||||
- mongo
|
||||
# volumes:
|
||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||
# - "/opt/crawlab/master:/data" # data persistent 持久化数据
|
||||
worker:
|
||||
image: crawlabteam/crawlab:latest
|
||||
container_name: worker
|
||||
container_name: crawlab_worker
|
||||
environment:
|
||||
CRAWLAB_SERVER_MASTER: "N"
|
||||
CRAWLAB_NODE_MASTER: "N"
|
||||
CRAWLAB_MONGO_HOST: "mongo"
|
||||
# CRAWLAB_REDIS_ADDRESS: "redis"
|
||||
depends_on:
|
||||
- mongo
|
||||
# volumes:
|
||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||
# - "/opt/crawlab/worker:/data" # data persistent 持久化数据
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
image: mongo:4.2
|
||||
#restart: always
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: username
|
||||
# MONGO_INITDB_ROOT_PASSWORD: password
|
||||
# volumes:
|
||||
# - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化
|
||||
# - "/opt/crawlab/mongo/data/db:/data/db" # data persistent 持久化数据
|
||||
# ports:
|
||||
# - "27017:27017" # expose port to host machine 暴露接口到宿主机
|
||||
|
||||
Reference in New Issue
Block a user