mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
updated dockerfile
This commit is contained in:
@@ -36,6 +36,10 @@ RUN chmod 777 /tmp \
|
|||||||
&& ln -s /usr/bin/pip3 /usr/local/bin/pip \
|
&& ln -s /usr/bin/pip3 /usr/local/bin/pip \
|
||||||
&& ln -s /usr/bin/python3 /usr/local/bin/python
|
&& ln -s /usr/bin/python3 /usr/local/bin/python
|
||||||
|
|
||||||
|
# install seaweedfs
|
||||||
|
RUN wget https://github.com/chrislusf/seaweedfs/releases/download/2.48/linux_amd64.tar.gz \
|
||||||
|
&& tar -zxf linux_amd64.tar.gz \
|
||||||
|
&& cp weed /usr/local/bin
|
||||||
|
|
||||||
# install backend
|
# install backend
|
||||||
RUN pip install scrapy pymongo bs4 requests crawlab-sdk
|
RUN pip install scrapy pymongo bs4 requests crawlab-sdk
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
# Crawlab
|
# Crawlab
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
<a href="https://hub.docker.com/r/tikazyq/crawlab/builds" target="_blank">
|
<a href="https://github.com/crawlab-team/crawlab/actions/workflows/dockerpush.yml" target="_blank">
|
||||||
<img src="https://github.com/crawlab-team/crawlab/workflows/Docker/badge.svg">
|
<img src="https://github.com/crawlab-team/crawlab/actions/workflows/dockerpush.yml/badge.svg">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://hub.docker.com/r/tikazyq/crawlab" target="_blank">
|
<a href="https://hub.docker.com/r/tikazyq/crawlab" target="_blank">
|
||||||
<img src="https://img.shields.io/docker/pulls/tikazyq/crawlab?label=pulls&logo=docker">
|
<img src="https://img.shields.io/docker/pulls/tikazyq/crawlab?label=pulls&logo=docker">
|
||||||
|
|||||||
@@ -1,65 +1,30 @@
|
|||||||
version: '3.3'
|
version: '3.3'
|
||||||
services:
|
services:
|
||||||
master:
|
master:
|
||||||
image: tikazyq/crawlab:latest
|
image: crawlabteam/crawlab:latest
|
||||||
container_name: master
|
container_name: crawlab_master
|
||||||
environment:
|
environment:
|
||||||
# CRAWLAB_API_ADDRESS: "https://<your_api_ip>:<your_api_port>" # backend API address 后端 API 地址. 适用于 https 或者源码部署
|
CRAWLAB_MONGO_HOST: 'mongo'
|
||||||
CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N
|
|
||||||
CRAWLAB_MONGO_HOST: "mongo" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称
|
|
||||||
# CRAWLAB_MONGO_PORT: "27017" # MongoDB port MongoDB 的端口
|
|
||||||
# CRAWLAB_MONGO_DB: "crawlab_test" # MongoDB database MongoDB 的数据库
|
|
||||||
# CRAWLAB_MONGO_USERNAME: "username" # MongoDB username MongoDB 的用户名
|
|
||||||
# CRAWLAB_MONGO_PASSWORD: "password" # MongoDB password MongoDB 的密码
|
|
||||||
# CRAWLAB_MONGO_AUTHSOURCE: "admin" # MongoDB auth source MongoDB 的验证源
|
|
||||||
CRAWLAB_REDIS_ADDRESS: "redis" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称
|
|
||||||
# CRAWLAB_REDIS_PORT: "6379" # Redis port Redis 的端口
|
|
||||||
# CRAWLAB_REDIS_DATABASE: "1" # Redis database Redis 的数据库
|
|
||||||
# CRAWLAB_REDIS_PASSWORD: "password" # Redis password Redis 的密码
|
|
||||||
# CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info
|
|
||||||
# CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除
|
|
||||||
# CRAWLAB_LOG_DELETEFREQUENCY: "@hourly" # frequency of deleting log files 删除日志文件的频率. 默认为每小时
|
|
||||||
# CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数)
|
|
||||||
# CRAWLAB_SERVER_REGISTER_TYPE: "mac" # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突)
|
|
||||||
# CRAWLAB_SERVER_REGISTER_IP: "127.0.0.1" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效
|
|
||||||
# CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境
|
|
||||||
# CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境
|
|
||||||
# CRAWLAB_SERVER_LANG_DOTNET: "Y" # whether to pre-install .Net core 预安装 .Net Core 语言环境
|
|
||||||
# CRAWLAB_SERVER_LANG_PHP: "Y" # whether to pre-install PHP 预安装 PHP 语言环境
|
|
||||||
# CRAWLAB_SERVER_LANG_GO: "Y" # whether to pre-install Golang 预安装 Golang 语言环境
|
|
||||||
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
|
|
||||||
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
|
|
||||||
# CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务
|
|
||||||
# CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫
|
|
||||||
# CRAWLAB_SETTING_CHECKSCRAPY: "Y" # whether to automatically check if the spider is scrapy 是否自动检测爬虫为scrapy
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com # sender ID 发送者 ID
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username # SMTP username SMTP 用户名
|
|
||||||
# CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password # SMTP password SMTP 密码
|
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080" # frontend port mapping 前端端口映射
|
- "8080:8080" # frontend port mapping 前端端口映射
|
||||||
depends_on:
|
depends_on:
|
||||||
- mongo
|
- mongo
|
||||||
- redis
|
|
||||||
# volumes:
|
# volumes:
|
||||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||||
worker:
|
#worker:
|
||||||
image: tikazyq/crawlab:latest
|
# image: tikazyq/crawlab:latest
|
||||||
container_name: worker
|
# container_name: worker
|
||||||
environment:
|
# environment:
|
||||||
CRAWLAB_SERVER_MASTER: "N"
|
# CRAWLAB_SERVER_MASTER: "N"
|
||||||
CRAWLAB_MONGO_HOST: "mongo"
|
# CRAWLAB_MONGO_HOST: "mongo"
|
||||||
CRAWLAB_REDIS_ADDRESS: "redis"
|
# CRAWLAB_REDIS_ADDRESS: "redis"
|
||||||
depends_on:
|
# depends_on:
|
||||||
- mongo
|
# - mongo
|
||||||
- redis
|
|
||||||
# volumes:
|
# volumes:
|
||||||
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
# - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
|
||||||
mongo:
|
mongo:
|
||||||
image: mongo:latest
|
image: mongo:latest
|
||||||
restart: always
|
#restart: always
|
||||||
# environment:
|
# environment:
|
||||||
# MONGO_INITDB_ROOT_USERNAME: username
|
# MONGO_INITDB_ROOT_USERNAME: username
|
||||||
# MONGO_INITDB_ROOT_PASSWORD: password
|
# MONGO_INITDB_ROOT_PASSWORD: password
|
||||||
@@ -67,16 +32,3 @@ services:
|
|||||||
# - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化
|
# - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化
|
||||||
# ports:
|
# ports:
|
||||||
# - "27017:27017" # expose port to host machine 暴露接口到宿主机
|
# - "27017:27017" # expose port to host machine 暴露接口到宿主机
|
||||||
redis:
|
|
||||||
image: redis:latest
|
|
||||||
restart: always
|
|
||||||
# command: redis-server --requirepass "password" # set redis password 设置 Redis 密码
|
|
||||||
# volumes:
|
|
||||||
# - "/opt/crawlab/redis/data:/data" # make data persistent 持久化
|
|
||||||
# ports:
|
|
||||||
# - "6379:6379" # expose port to host machine 暴露接口到宿主机
|
|
||||||
# splash: # use Splash to run spiders on dynamic pages
|
|
||||||
# image: scrapinghub/splash
|
|
||||||
# container_name: splash
|
|
||||||
# ports:
|
|
||||||
# - "8050:8050"
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# replace absolute api url to relative
|
# replace absolute api url to relative
|
||||||
jspath=`ls /app/dist/js/app.*.js`
|
jspath=`ls /app/dist/js/app.*.js`
|
||||||
sed -i "s?/http:\/\/localhost:8000/\/api/?g" ${jspath}
|
sed -i "s?http:\/\/localhost:8000?\/api?g" ${jspath}
|
||||||
|
|
||||||
# replace default api path to new one
|
# replace default api path to new one
|
||||||
# if [ "${CRAWLAB_API_ADDRESS}" = "" ];
|
# if [ "${CRAWLAB_API_ADDRESS}" = "" ];
|
||||||
@@ -26,6 +26,9 @@ sed -i "s?/http:\/\/localhost:8000/\/api/?g" ${jspath}
|
|||||||
# start nginx
|
# start nginx
|
||||||
service nginx start
|
service nginx start
|
||||||
|
|
||||||
|
# start seaweedfs server
|
||||||
|
weed server -dir /data -master.dir /data -volume.dir.idx /data -ip localhost -ip.bind 0.0.0.0 -filer >> /var/log/weed.log 2>&1 &
|
||||||
|
|
||||||
#grant script
|
#grant script
|
||||||
# chmod +x /app/backend/scripts/*.sh
|
# chmod +x /app/backend/scripts/*.sh
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user