From 418dd67a892e0875d533c858783c0214fbeccb24 Mon Sep 17 00:00:00 2001 From: marvzhang Date: Fri, 16 Jul 2021 16:02:38 +0800 Subject: [PATCH] updated dockerfile --- Dockerfile | 4 +++ README.md | 4 +-- docker-compose.yml | 74 ++++++++-------------------------------------- docker_init.sh | 5 +++- 4 files changed, 23 insertions(+), 64 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0c5ddc65..70198461 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,10 @@ RUN chmod 777 /tmp \ && ln -s /usr/bin/pip3 /usr/local/bin/pip \ && ln -s /usr/bin/python3 /usr/local/bin/python +# install seaweedfs +RUN wget https://github.com/chrislusf/seaweedfs/releases/download/2.48/linux_amd64.tar.gz \ + && tar -zxf linux_amd64.tar.gz \ + && cp weed /usr/local/bin # install backend RUN pip install scrapy pymongo bs4 requests crawlab-sdk diff --git a/README.md b/README.md index edd3b6d7..ddc435f6 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Crawlab

- - + + diff --git a/docker-compose.yml b/docker-compose.yml index 1c06f422..3ec2ce38 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,65 +1,30 @@ version: '3.3' services: master: - image: tikazyq/crawlab:latest - container_name: master + image: crawlabteam/crawlab:latest + container_name: crawlab_master environment: - # CRAWLAB_API_ADDRESS: "https://:" # backend API address 后端 API 地址. 适用于 https 或者源码部署 - CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N - CRAWLAB_MONGO_HOST: "mongo" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称 - # CRAWLAB_MONGO_PORT: "27017" # MongoDB port MongoDB 的端口 - # CRAWLAB_MONGO_DB: "crawlab_test" # MongoDB database MongoDB 的数据库 - # CRAWLAB_MONGO_USERNAME: "username" # MongoDB username MongoDB 的用户名 - # CRAWLAB_MONGO_PASSWORD: "password" # MongoDB password MongoDB 的密码 - # CRAWLAB_MONGO_AUTHSOURCE: "admin" # MongoDB auth source MongoDB 的验证源 - CRAWLAB_REDIS_ADDRESS: "redis" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称 - # CRAWLAB_REDIS_PORT: "6379" # Redis port Redis 的端口 - # CRAWLAB_REDIS_DATABASE: "1" # Redis database Redis 的数据库 - # CRAWLAB_REDIS_PASSWORD: "password" # Redis password Redis 的密码 - # CRAWLAB_LOG_LEVEL: "info" # log level 日志级别. 默认为 info - # CRAWLAB_LOG_ISDELETEPERIODICALLY: "N" # whether to periodically delete log files 是否周期性删除日志文件. 默认不删除 - # CRAWLAB_LOG_DELETEFREQUENCY: "@hourly" # frequency of deleting log files 删除日志文件的频率. 默认为每小时 - # CRAWLAB_TASK_WORKERS: 8 # number of task executors 任务执行器个数(并行执行任务数) - # CRAWLAB_SERVER_REGISTER_TYPE: "mac" # node register type 节点注册方式. 默认为 mac 地址,也可设置为 ip(防止 mac 地址冲突) - # CRAWLAB_SERVER_REGISTER_IP: "127.0.0.1" # node register ip 节点注册IP. 节点唯一识别号,只有当 CRAWLAB_SERVER_REGISTER_TYPE 为 "ip" 时才生效 - # CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境 - # CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境 - # CRAWLAB_SERVER_LANG_DOTNET: "Y" # whether to pre-install .Net core 预安装 .Net Core 语言环境 - # CRAWLAB_SERVER_LANG_PHP: "Y" # whether to pre-install PHP 预安装 PHP 语言环境 - # CRAWLAB_SERVER_LANG_GO: "Y" # whether to pre-install Golang 预安装 Golang 语言环境 - # CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册 - # CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程 - # CRAWLAB_SETTING_RUNONMASTER: "N" # whether to run on master node 是否在主节点上运行任务 - # CRAWLAB_SETTING_DEMOSPIDERS: "Y" # whether to init demo spiders 是否使用Demo爬虫 - # CRAWLAB_SETTING_CHECKSCRAPY: "Y" # whether to automatically check if the spider is scrapy 是否自动检测爬虫为scrapy - # CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址 - # CRAWLAB_NOTIFICATION_MAIL_PORT: 465 # STMP server port STMP 服务器端口 - # CRAWLAB_NOTIFICATION_MAIL_SENDEREMAIL: admin@exmaple.com # sender email 发送者邮箱 - # CRAWLAB_NOTIFICATION_MAIL_SENDERIDENTITY: admin@exmaple.com # sender ID 发送者 ID - # CRAWLAB_NOTIFICATION_MAIL_SMTP_USER: username # SMTP username SMTP 用户名 - # CRAWLAB_NOTIFICATION_MAIL_SMTP_PASSWORD: password # SMTP password SMTP 密码 + CRAWLAB_MONGO_HOST: 'mongo' ports: - "8080:8080" # frontend port mapping 前端端口映射 depends_on: - mongo - - redis # volumes: # - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化 - worker: - image: tikazyq/crawlab:latest - container_name: worker - environment: - CRAWLAB_SERVER_MASTER: "N" - CRAWLAB_MONGO_HOST: "mongo" - CRAWLAB_REDIS_ADDRESS: "redis" - depends_on: - - mongo - - redis + #worker: + # image: tikazyq/crawlab:latest + # container_name: worker + # environment: + # CRAWLAB_SERVER_MASTER: "N" + # CRAWLAB_MONGO_HOST: "mongo" + # CRAWLAB_REDIS_ADDRESS: "redis" + # depends_on: + # - mongo # volumes: # - "/var/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化 mongo: image: mongo:latest - restart: always + #restart: always # environment: # MONGO_INITDB_ROOT_USERNAME: username # MONGO_INITDB_ROOT_PASSWORD: password @@ -67,16 +32,3 @@ services: # - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化 # ports: # - "27017:27017" # expose port to host machine 暴露接口到宿主机 - redis: - image: redis:latest - restart: always - # command: redis-server --requirepass "password" # set redis password 设置 Redis 密码 - # volumes: - # - "/opt/crawlab/redis/data:/data" # make data persistent 持久化 - # ports: - # - "6379:6379" # expose port to host machine 暴露接口到宿主机 - # splash: # use Splash to run spiders on dynamic pages - # image: scrapinghub/splash - # container_name: splash - # ports: - # - "8050:8050" diff --git a/docker_init.sh b/docker_init.sh index ef66ef3e..2f13b567 100755 --- a/docker_init.sh +++ b/docker_init.sh @@ -2,7 +2,7 @@ # replace absolute api url to relative jspath=`ls /app/dist/js/app.*.js` -sed -i "s?/http:\/\/localhost:8000/\/api/?g" ${jspath} +sed -i "s?http:\/\/localhost:8000?\/api?g" ${jspath} # replace default api path to new one # if [ "${CRAWLAB_API_ADDRESS}" = "" ]; @@ -26,6 +26,9 @@ sed -i "s?/http:\/\/localhost:8000/\/api/?g" ${jspath} # start nginx service nginx start +# start seaweedfs server +weed server -dir /data -master.dir /data -volume.dir.idx /data -ip localhost -ip.bind 0.0.0.0 -filer >> /var/log/weed.log 2>&1 & + #grant script # chmod +x /app/backend/scripts/*.sh