code cleanup

This commit is contained in:
marvzhang
2020-03-04 11:32:29 +08:00
parent 1bd6dee70b
commit 2af88a99ac
26 changed files with 179 additions and 493 deletions

View File

@@ -0,0 +1,45 @@
apiVersion: v1
kind: Service
metadata:
name: crawlab
namespace: crawlab-release
spec:
ports:
- port: 8080
targetPort: 8080
nodePort: 30098
selector:
app: crawlab-master
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-master
namespace: crawlab-release
spec:
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-master
template:
metadata:
labels:
app: crawlab-master
spec:
containers:
- image: tikazyq/crawlab:release
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "Y"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
- name: CRAWLAB_SETTING_ALLOWREGISTER
value: "Y"
ports:
- containerPort: 8080
name: crawlab

View File

@@ -0,0 +1,27 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-worker
namespace: crawlab-release
spec:
replicas: 2
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-worker
template:
metadata:
labels:
app: crawlab-worker
spec:
containers:
- image: tikazyq/crawlab:release
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "N"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: mongo-pv-volume-release
namespace: crawlab-release
labels:
type: local
spec:
storageClassName: manual
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/data/crawlab-release/mongodb/data"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mongo-pv-claim-release
namespace: crawlab-release
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi

41
devops/release/mongo.yaml Normal file
View File

@@ -0,0 +1,41 @@
apiVersion: v1
kind: Service
metadata:
name: mongo
namespace: crawlab-release
spec:
ports:
- port: 27017
selector:
app: mongo
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: mongo
namespace: crawlab-release
spec:
selector:
matchLabels:
app: mongo
strategy:
type: Recreate
template:
metadata:
labels:
app: mongo
spec:
containers:
- image: mongo:4
name: mongo
ports:
- containerPort: 27017
name: mongo
volumeMounts:
- name: mongo-persistent-storage
mountPath: /data/db
volumes:
- name: mongo-persistent-storage
persistentVolumeClaim:
claimName: mongo-pv-claim-release

4
devops/release/ns.yaml Normal file
View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: crawlab-release

34
devops/release/redis.yaml Normal file
View File

@@ -0,0 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: crawlab-release
spec:
ports:
- port: 6379
selector:
app: redis
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: redis
namespace: crawlab-release
spec:
selector:
matchLabels:
app: redis
strategy:
type: Recreate
template:
metadata:
labels:
app: redis
spec:
containers:
- image: redis
name: redis
ports:
- containerPort: 6379
name: redis

View File

@@ -1,27 +0,0 @@
FROM node:8.16.0 AS frontend-build
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org
RUN npm run build:prod
FROM alpine:latest
# 安装nginx
RUN mkdir /run/nginx && apk add nginx
# 拷贝编译文件
COPY --from=frontend-build /app/dist /app/dist
# 拷贝nginx代理文件
COPY crawlab.conf /etc/nginx/conf.d
# 拷贝执行脚本
COPY docker_init.sh /app/docker_init.sh
EXPOSE 8080
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,86 +0,0 @@
FROM golang:1.12-alpine AS backend-build
WORKDIR /go/src/app
COPY ./backend .
ENV GO111MODULE on
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
RUN go install -v ./...
FROM node:lts-alpine AS frontend-build
ARG NPM_DISABLE_SAFE_PERM=false
RUN if [ ${NPM_DISABLE_SAFE_PERM} = true ]; then \
# run the install
echo "info: use npm unsafe-perm mode" \
&& npm config set unsafe-perm true \
;fi
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org
RUN npm run build:prod
# images
FROM python:alpine
ADD . /app
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
# install packages
RUN apk update && apk add --no-cache --virtual .build-deps \
gcc \
linux-headers \
musl-dev \
libffi-dev \
libxml2-dev \
libxslt-dev \
openssl-dev
# install backend
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
# copy backend files
COPY --from=backend-build /go/src/app/conf ./conf
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
# install nginx
RUN apk add --no-cache nginx openrc
RUN apk del .build-deps
# copy frontend files
COPY --from=frontend-build /app/dist /app/dist
COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d
VOLUME [ /sys/fs/cgroup ]
RUN sed -i 's/#rc_sys=""/rc_sys="lxc"/g' /etc/rc.conf && \
echo 'rc_provide="loopback net"' >> /etc/rc.conf && \
sed -i 's/^#\(rc_logger="YES"\)$/\1/' /etc/rc.conf && \
sed -i '/tty/d' /etc/inittab && \
sed -i 's/hostname $opts/# hostname $opts/g' /etc/init.d/hostname && \
sed -i 's/mount -t tmpfs/# mount -t tmpfs/g' /lib/rc/sh/init.sh && \
sed -i 's/cgroup_add_service /# cgroup_add_service /g' /lib/rc/sh/openrc-run.sh && \
rm -rf /var/cache/apk/* && \
mkdir -p /run/openrc && \
touch /run/openrc/softlevel && \
/sbin/openrc
# working directory
WORKDIR /app/backend
ENV PYTHONIOENCODING utf-8
# frontend port
EXPOSE 8080
# backend port
EXPOSE 8000
# start backend
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,43 +0,0 @@
FROM golang:1.12-alpine AS backend-build
WORKDIR /go/src/app
COPY ./backend .
ENV GO111MODULE on
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
RUN go install -v ./...
# images
FROM python:alpine
ADD . /app
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
RUN apk update && apk add --no-cache --virtual .build-deps \
gcc \
linux-headers \
musl-dev \
libffi-dev \
libxml2-dev \
libxslt-dev \
openssl-dev
# install backend
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
# copy backend files
COPY --from=backend-build /go/src/app/conf ./conf
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
RUN apk del .build-deps
# working directory
WORKDIR /app/backend
ENV PYTHONIOENCODING utf-8
# backend port
EXPOSE 8000
# start backend
CMD crawlab

View File

@@ -1,7 +0,0 @@
# Examples
* frontend 前端镜像制作
* master Master节点镜像制作
* worker Worker节点镜像制作
* run_docker_master.sh 运行Master节点示例脚本
* run_docker_worker.sh 运行Worker节点示例脚本

View File

@@ -1,30 +0,0 @@
FROM node:8.16.0-alpine AS build
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install
RUN npm run build:prod
FROM alpine:latest
# 安装nginx
RUN mkdir /run/nginx && apk add nginx
# 拷贝编译文件
COPY --from=build /app/dist /app/dist
# 拷贝nginx代理文件
COPY crawlab.conf /etc/nginx/conf.d
# 拷贝执行脚本
COPY docker_init.sh /app/docker_init.sh
# 定义后端API脚本
ENV CRAWLAB_API_ADDRESS http://localhost:8000
EXPOSE 8080
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,5 +0,0 @@
# 前端镜像制作
前端需要手动build拿到编译后的文件然后放入此目录进行镜像构建
容器运行的时候需要指定`CRAWLAB_API_ADDRESS`环境变量用于访问后端API接口

View File

@@ -1,13 +0,0 @@
server {
gzip on;
gzip_min_length 1k;
gzip_buffers 4 16k;
#gzip_http_version 1.0;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png;
gzip_vary off;
gzip_disable "MSIE [1-6]\.";
listen 8080;
root /app/dist;
index index.html;
}

View File

@@ -1,23 +0,0 @@
#!/bin/sh
# replace default api path to new one
if [ "${CRAWLAB_API_ADDRESS}" = "" ];
then
:
else
jspath=`ls /app/dist/js/app.*.js`
sed -i "s?localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath}
fi
# replace base url
if [ "${CRAWLAB_BASE_URL}" = "" ];
then
:
else
indexpath=/app/dist/index.html
sed -i "s?/js/?${CRAWLAB_BASE_URL}/js/?g" ${indexpath}
sed -i "s?/css/?${CRAWLAB_BASE_URL}/css/?g" ${indexpath}
fi
# start nginx
nginx -g 'daemon off;'

View File

@@ -1,20 +0,0 @@
FROM alpine:latest
# 配置工作目录
WORKDIR /opt/crawlab
# 拷贝配置文件
COPY config.yml /opt/crawlab/conf/config.yml
# 拷贝可执行文件
COPY crawlab /usr/local/bin
# 创建spiders文件用于存放爬虫 授权可执行文件
RUN mkdir -p /opt/crawlab/spiders && chmod +x /usr/local/bin/crawlab
# 指定为Master节点
ENV CRAWLAB_SERVER_MASTER Y
EXPOSE 8000
CMD ["crawlab"]

View File

@@ -1,8 +0,0 @@
# Master 节点镜像制作
在Dockerfile里面的二进制包需要手动在源码目录下进行构建然后再放进来
## Linux 二进制包构建
```
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
```

View File

@@ -1,32 +0,0 @@
api:
address: "localhost:8000"
mongo:
host: "192.168.235.26"
port: 27017
db: crawlab_local
username: "root"
password: "example"
authSource: "admin"
redis:
address: 192.168.235.0
password: redis-1.0
database: 29
port: 16379
log:
level: info
path: "/logs/crawlab"
server:
host: 0.0.0.0
port: 8000
master: "Y"
secret: "crawlab"
register:
# mac 或者 ip如果是ip则需要手动指定IP
type: "mac"
ip: "192.168.0.104"
spider:
path: "/spiders"
task:
workers: 4
other:
tmppath: "/tmp"

View File

@@ -1,10 +0,0 @@
docker run -d --restart always --name crawlab \
-e CRAWLAB_REDIS_ADDRESS=192.168.99.1:6379 \
-e CRAWLAB_MONGO_HOST=192.168.99.1 \
-e CRAWLAB_SERVER_MASTER=Y \
-e CRAWLAB_API_ADDRESS=192.168.99.100:8000 \
-e CRAWLAB_SPIDER_PATH=/app/spiders \
-p 8080:8080 \
-p 8000:8000 \
-v /var/logs/crawlab:/var/logs/crawlab \
tikazyq/crawlab:0.3.0

View File

@@ -1,6 +0,0 @@
docker run --restart always --name crawlab \
-e CRAWLAB_REDIS_ADDRESS=192.168.99.1:6379 \
-e CRAWLAB_MONGO_HOST=192.168.99.1 \
-e CRAWLAB_SERVER_MASTER=N \
-v /var/logs/crawlab:/var/logs/crawlab \
tikazyq/crawlab:0.3.0

View File

@@ -1,24 +0,0 @@
FROM ubuntu:latest
ENV DEBIAN_FRONTEND=noninteractive
# 添加依赖描述文件
ADD requirements.txt /opt/crawlab/
# 添加二进制包
ADD crawlab /usr/local/bin/
RUN chmod +x /usr/local/bin/crawlab
# 安装基本环境
RUN apt-get update \
&& apt-get install -y curl git net-tools iputils-ping ntp python3 python3-pip \
&& apt-get clean \
&& ln -s /usr/bin/pip3 /usr/local/bin/pip \
&& ln -s /usr/bin/python3 /usr/local/bin/python
# 安装依赖
RUN pip install -r /opt/crawlab/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
WORKDIR /opt/crawlab
ENTRYPOINT ["crawlab"]

View File

@@ -1,26 +0,0 @@
# 本地开发环境worker节点制作
由于master和worker节点的存储信息是在redis上并且使用节点所在的mac地址作为key所以在开发本地需要启动master和worker节点会比较麻烦
这里是一个运行worker节点的一个例子
基本思路是worker节点所需的依赖制作成一个镜像然后把crawlab编译成二进制包接着把配置文件和二进制包通过volumes的形式挂载到容器内部
这样就可以正常的运行worker节点了之后对于容器编排的worker节点可以直接把该镜像当成worker节点的基础镜像
### 制作二进制包
`backend`目录下执行以下命令生成二进制包
```
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
```
### 构建worker镜像
```
docker build -t crawlab:worker .
```
### 运行worker节点
```
docker-compose up -d
```
如果在多台服务器使用`docker-compose.yml`进行编排可能出现节点注册不上的问题因为mac地址冲突了
可以使用`networks`定义当前节点的IP段这样就可以正常注册到redis

View File

@@ -1,25 +0,0 @@
mongo:
host: 127.0.0.1
port: 27017
db: crawlab_test
username: ""
password: ""
redis:
address: 127.0.0.1
password: ""
database: 1
port: 6379
log:
level: info
path: "/opt/crawlab/logs"
server:
host: 0.0.0.0
port: 8000
master: "N"
secret: "crawlab"
spider:
path: "/opt/crawlab/spiders"
task:
workers: 4
other:
tmppath: "/tmp"

View File

@@ -1,18 +0,0 @@
version: '3'
services:
worker:
image: crawlab:worker
container_name: crawlab-worker
volumes:
- $PWD/conf/config.yml:/opt/crawlab/conf/config.yml
# 二进制包使用源码生成
- $PWD/crawlab:/usr/local/bin/crawlab
networks:
- crawlabnet
networks:
crawlabnet:
ipam:
driver: default
config:
- subnet: 172.30.0.0/16

View File

@@ -1,7 +0,0 @@
geohash2==1.1
Scrapy==1.5.0
APScheduler==3.5.1
fonttools==3.34.2
elasticsearch==5.5.3
requests==2.22.0
pymysql==0.9.3

View File

@@ -1,32 +0,0 @@
version: '3.3'
services:
master:
image: "tikazyq/crawlab:develop"
environment:
CRAWLAB_API_ADDRESS: "crawlab.cn/dev/api"
CRAWLAB_BASE_URL: "/dev"
CRAWLAB_SERVER_MASTER: "Y"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_LOG_PATH: "/var/logs/crawlab"
ports:
- "8090:8080" # frontend
- "8010:8000" # backend
depends_on:
- mongo
- redis
worker:
image: "tikazyq/crawlab:develop"
environment:
CRAWLAB_SERVER_MASTER: "N"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
depends_on:
- mongo
- redis
mongo:
image: mongo:latest
restart: always
redis:
image: redis:latest
restart: always

View File

@@ -1,51 +0,0 @@
version: '3.3'
services:
master:
image: "tikazyq/crawlab:master"
environment:
CRAWLAB_API_ADDRESS: "https://crawlab.cn/api"
CRAWLAB_BASE_URL: "/demo"
CRAWLAB_SERVER_MASTER: "Y"
CRAWLAB_SERVER_REGISTER_TYPE: "ip"
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.1"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_LOG_PATH: "/var/logs/crawlab"
CRAWLAB_SETTING_ALLOWREGISTER: "Y"
CRAWLAB_SERVER_LANG_NODE: "Y"
ports:
- "8080:8080" # frontend
depends_on:
- mongo
- redis
volumes:
- "/opt/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
worker:
image: "tikazyq/crawlab:master"
environment:
CRAWLAB_SERVER_MASTER: "N"
CRAWLAB_SERVER_REGISTER_TYPE: "ip"
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.2"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_SERVER_LANG_NODE: "Y"
depends_on:
- mongo
- redis
volumes:
- "/opt/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
mongo:
image: mongo:latest
restart: always
volumes:
- "/opt/crawlab/mongo/data/db:/data/db"
- "/opt/crawlab/mongo/tmp:/tmp"
redis:
image: redis:latest
restart: always
volumes:
- "/opt/crawlab/redis/data:/data"
splash: # use Splash to run spiders on dynamic pages
image: scrapinghub/splash
# ports:
# - "8050:8050"