diff --git a/Dockerfile.master.apline b/Dockerfile.master.apline new file mode 100644 index 00000000..d6fc4ba2 --- /dev/null +++ b/Dockerfile.master.apline @@ -0,0 +1,78 @@ +FROM golang:1.12-alpine AS backend-build + +WORKDIR /go/src/app +COPY ./backend . + +ENV GO111MODULE on +ENV GOPROXY https://mirrors.aliyun.com/goproxy/ + +RUN go install -v ./... + +FROM node:8.16.0-alpine AS frontend-build + +ADD ./frontend /app +WORKDIR /app + +# install frontend +RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org + +RUN npm run build:prod + +# images +FROM python:alpine + +ADD . /app + + +RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories + +# install packages +RUN apk update && apk add --no-cache --virtual .build-deps \ + gcc \ + linux-headers \ + musl-dev \ + libffi-dev \ + libxml2-dev \ + libxslt-dev \ + openssl-dev + +# install backend +RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple + +# copy backend files +COPY --from=backend-build /go/src/app/conf ./conf +COPY --from=backend-build /go/bin/crawlab /usr/local/bin + +# install nginx +RUN apk add --no-cache nginx openrc + +RUN apk del .build-deps +# copy frontend files +COPY --from=frontend-build /app/dist /app/dist +COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d + +VOLUME [ “/sys/fs/cgroup” ] + +RUN sed -i 's/#rc_sys=""/rc_sys="lxc"/g' /etc/rc.conf && \ + echo 'rc_provide="loopback net"' >> /etc/rc.conf && \ + sed -i 's/^#\(rc_logger="YES"\)$/\1/' /etc/rc.conf && \ + sed -i '/tty/d' /etc/inittab && \ + sed -i 's/hostname $opts/# hostname $opts/g' /etc/init.d/hostname && \ + sed -i 's/mount -t tmpfs/# mount -t tmpfs/g' /lib/rc/sh/init.sh && \ + sed -i 's/cgroup_add_service /# cgroup_add_service /g' /lib/rc/sh/openrc-run.sh && \ + rm -rf /var/cache/apk/* && \ + mkdir -p /run/openrc && \ + touch /run/openrc/softlevel && \ + /sbin/openrc + +# working directory +WORKDIR /app/backend + +# frontend port +EXPOSE 8080 + +# backend port +EXPOSE 8000 + +# start backend +CMD ["/bin/sh", "/app/docker_init.sh"] \ No newline at end of file diff --git a/Dockerfile.worker.apline b/Dockerfile.worker.apline new file mode 100644 index 00000000..e7a66776 --- /dev/null +++ b/Dockerfile.worker.apline @@ -0,0 +1,43 @@ +FROM golang:1.12-alpine AS backend-build + +WORKDIR /go/src/app +COPY ./backend . + +ENV GO111MODULE on +ENV GOPROXY https://mirrors.aliyun.com/goproxy/ + +RUN go install -v ./... + +# images +FROM python:alpine + +ADD . /app + +RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories + +RUN apk update && apk add --no-cache --virtual .build-deps \ + gcc \ + linux-headers \ + musl-dev \ + libffi-dev \ + libxml2-dev \ + libxslt-dev \ + openssl-dev + +# install backend +RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple + +# copy backend files +COPY --from=backend-build /go/src/app/conf ./conf +COPY --from=backend-build /go/bin/crawlab /usr/local/bin + +RUN apk del .build-deps + +# working directory +WORKDIR /app/backend + +# backend port +EXPOSE 8000 + +# start backend +CMD crawlab \ No newline at end of file diff --git a/backend/conf/config.yml b/backend/conf/config.yml index 9387a94a..f1042ca6 100644 --- a/backend/conf/config.yml +++ b/backend/conf/config.yml @@ -20,6 +20,10 @@ server: port: 8000 master: "N" secret: "crawlab" + register: + # mac地址 或者 ip地址,如果是ip,则需要手动指定IP + type: "mac" + ip: "" spider: path: "/app/spiders" task: diff --git a/backend/services/node.go b/backend/services/node.go index 90f941e4..e5c8062a 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -5,12 +5,12 @@ import ( "crawlab/database" "crawlab/lib/cron" "crawlab/model" + "crawlab/services/register" "encoding/json" "fmt" "github.com/apex/log" "github.com/globalsign/mgo/bson" "github.com/spf13/viper" - "net" "runtime/debug" "time" ) @@ -49,43 +49,10 @@ const ( No = "N" ) -// 获取本机的IP地址 -// TODO: 考虑多个IP地址的情况 -func GetIp() (string, error) { - addrList, err := net.InterfaceAddrs() - if err != nil { - return "", err - } - for _, value := range addrList { - if ipNet, ok := value.(*net.IPNet); ok && !ipNet.IP.IsLoopback() { - if ipNet.IP.To4() != nil { - return ipNet.IP.String(), nil - } - } - } - return "", nil -} - -// 获取本机的MAC地址 -func GetMac() (string, error) { - interfaces, err := net.Interfaces() - if err != nil { - debug.PrintStack() - return "", err - } - for _, inter := range interfaces { - if inter.HardwareAddr != nil { - mac := inter.HardwareAddr.String() - return mac, nil - } - } - return "", nil -} - // 获取本机节点 func GetCurrentNode() (model.Node, error) { // 获取本机MAC地址 - mac, err := GetMac() + value, err := register.GetRegister().GetValue() if err != nil { debug.PrintStack() return model.Node{}, err @@ -101,14 +68,14 @@ func GetCurrentNode() (model.Node, error) { } // 尝试获取节点 - node, err = model.GetNodeByMac(mac) + node, err = model.GetNodeByMac(value) // 如果获取失败 if err != nil { // 如果为主节点,表示为第一次注册,插入节点信息 if IsMaster() { // 获取本机IP地址 - ip, err := GetIp() + ip, err := register.GetRegister().GetIp() if err != nil { debug.PrintStack() return model.Node{}, err @@ -117,8 +84,8 @@ func GetCurrentNode() (model.Node, error) { node = model.Node{ Id: bson.NewObjectId(), Ip: ip, - Name: mac, - Mac: mac, + Name: value, + Mac: value, IsMaster: true, } if err := node.Add(); err != nil { @@ -155,12 +122,12 @@ func IsMasterNode(id string) bool { // 获取节点数据 func GetNodeData() (Data, error) { - mac, err := GetMac() + val, err := register.GetRegister().GetValue() if err != nil { return Data{}, err } - value, err := database.RedisClient.HGet("nodes", mac) + value, err := database.RedisClient.HGet("nodes", val) data := Data{} if err := json.Unmarshal([]byte(value), &data); err != nil { return data, err @@ -269,14 +236,14 @@ func UpdateNodeStatus() { // 更新节点数据 func UpdateNodeData() { // 获取MAC地址 - mac, err := GetMac() + val, err := register.GetRegister().GetValue() if err != nil { log.Errorf(err.Error()) return } // 获取IP地址 - ip, err := GetIp() + ip, err := register.GetRegister().GetIp() if err != nil { log.Errorf(err.Error()) return @@ -284,7 +251,7 @@ func UpdateNodeData() { // 构造节点数据 data := Data{ - Mac: mac, + Mac: val, Ip: ip, Master: IsMaster(), UpdateTs: time.Now(), @@ -298,7 +265,7 @@ func UpdateNodeData() { debug.PrintStack() return } - if err := database.RedisClient.HSet("nodes", mac, string(dataBytes)); err != nil { + if err := database.RedisClient.HSet("nodes", val, string(dataBytes)); err != nil { log.Errorf(err.Error()) return } diff --git a/backend/services/register/register.go b/backend/services/register/register.go new file mode 100644 index 00000000..8f0d169e --- /dev/null +++ b/backend/services/register/register.go @@ -0,0 +1,105 @@ +package register + +import ( + "github.com/apex/log" + "github.com/spf13/viper" + "net" + "reflect" + "runtime/debug" +) + +type Register interface { + // 注册的key类型 + GetKey() string + // 注册的key的值,唯一标识节点 + GetValue() (string, error) + // 注册的节点IP + GetIp() (string, error) +} + +// mac 地址注册 +type MacRegister struct{} + +func (mac *MacRegister) GetKey() string { + return "mac" +} + +func (mac *MacRegister) GetValue() (string, error) { + interfaces, err := net.Interfaces() + if err != nil { + log.Errorf("get interfaces error:" + err.Error()) + debug.PrintStack() + return "", err + } + for _, inter := range interfaces { + if inter.HardwareAddr != nil { + mac := inter.HardwareAddr.String() + return mac, nil + } + } + return "", nil +} + +func (mac *MacRegister) GetIp() (string, error) { + return getIp() +} + +// ip 注册 +type IpRegister struct { + Ip string +} + +func (ip *IpRegister) GetKey() string { + return "ip" +} + +func (ip *IpRegister) GetValue() (string, error) { + return ip.Ip, nil +} + +func (ip *IpRegister) GetIp() (string, error) { + return ip.Ip, nil +} + +// 获取本机的IP地址 +// TODO: 考虑多个IP地址的情况 +func getIp() (string, error) { + addrList, err := net.InterfaceAddrs() + if err != nil { + return "", err + } + for _, value := range addrList { + if ipNet, ok := value.(*net.IPNet); ok && !ipNet.IP.IsLoopback() { + if ipNet.IP.To4() != nil { + return ipNet.IP.String(), nil + } + } + } + return "", nil +} + +var register Register + +// 获得注册器 +func GetRegister() Register { + if register != nil { + return register + } + + registerType := viper.GetString("server.register.type") + if registerType == "mac" { + register = &MacRegister{} + } else { + ip := viper.GetString("server.register.ip") + if ip == "" { + log.Error("server.register.ip is empty") + debug.PrintStack() + return nil + } + register = &IpRegister{ + Ip: ip, + } + } + log.Info("register type is :" + reflect.TypeOf(register).String()) + return register +} diff --git a/backend/utils/file.go b/backend/utils/file.go index 6d4bcd9f..9a4300a1 100644 --- a/backend/utils/file.go +++ b/backend/utils/file.go @@ -106,7 +106,7 @@ func DeCompress(srcFile *os.File, dstPath string) error { } // 创建新文件 - newFile, err := os.Create(filepath.Join(dstPath, innerFile.Name)) + newFile, err := os.OpenFile(filepath.Join(dstPath, innerFile.Name), os.O_RDWR|os.O_CREATE|os.O_TRUNC, info.Mode()) if err != nil { log.Errorf("Unzip File Error : " + err.Error()) debug.PrintStack() diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..a42197c5 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,7 @@ +# Examples + +* frontend 前端镜像制作 +* master Master节点镜像制作 +* worker Worker节点镜像制作 +* run_docker_master.sh 运行Master节点示例脚本 +* run_docker_worker.sh 运行Worker节点示例脚本 \ No newline at end of file diff --git a/examples/frontend/Dockerfile b/examples/frontend/Dockerfile new file mode 100644 index 00000000..d313cdcb --- /dev/null +++ b/examples/frontend/Dockerfile @@ -0,0 +1,20 @@ +FROM alpine:latest + +# 安装nginx +RUN mkdir /run/nginx && apk add nginx + +# 拷贝编译文件 +COPY dist /app/dist + +# 拷贝nginx代理文件 +COPY crawlab.conf /etc/nginx/conf.d + +# 拷贝执行脚本 +COPY docker_init.sh /app/docker_init.sh + +# 定义后端API脚本 +ENV CRAWLAB_API_ADDRESS http://localhost:8000 + +EXPOSE 8080 + +CMD ["/bin/sh", "/app/docker_init.sh"] \ No newline at end of file diff --git a/examples/frontend/README.md b/examples/frontend/README.md new file mode 100644 index 00000000..efe9493f --- /dev/null +++ b/examples/frontend/README.md @@ -0,0 +1,5 @@ +# 前端镜像制作 + +前端需要手动build拿到编译后的文件,然后放入此目录进行镜像构建。 + +容器运行的时候需要指定`CRAWLAB_API_ADDRESS`环境变量,用于访问后端API接口 \ No newline at end of file diff --git a/examples/frontend/crawlab.conf b/examples/frontend/crawlab.conf new file mode 100644 index 00000000..bf8270e5 --- /dev/null +++ b/examples/frontend/crawlab.conf @@ -0,0 +1,13 @@ +server { + gzip on; + gzip_min_length 1k; + gzip_buffers 4 16k; + #gzip_http_version 1.0; + gzip_comp_level 2; + gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png; + gzip_vary off; + gzip_disable "MSIE [1-6]\."; + listen 8080; + root /app/dist; + index index.html; +} diff --git a/examples/frontend/docker_init.sh b/examples/frontend/docker_init.sh new file mode 100755 index 00000000..cfd53442 --- /dev/null +++ b/examples/frontend/docker_init.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# replace default api path to new one +if [ "${CRAWLAB_API_ADDRESS}" = "" ]; +then + : +else + jspath=`ls /app/dist/js/app.*.js` + sed -i "s?localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath} +fi + +# replace base url +if [ "${CRAWLAB_BASE_URL}" = "" ]; +then + : +else + indexpath=/app/dist/index.html + sed -i "s?/js/?${CRAWLAB_BASE_URL}/js/?g" ${indexpath} + sed -i "s?/css/?${CRAWLAB_BASE_URL}/css/?g" ${indexpath} +fi + +# start nginx +nginx -g 'daemon off;' \ No newline at end of file diff --git a/examples/master/Dockerfile b/examples/master/Dockerfile new file mode 100644 index 00000000..7f12c733 --- /dev/null +++ b/examples/master/Dockerfile @@ -0,0 +1,20 @@ +FROM alpine:latest + +# 配置工作目录 +WORKDIR /opt/crawlab + +# 拷贝配置文件 +COPY config.yml /opt/crawlab/conf/config.yml + +# 拷贝可执行文件 +COPY crawlab /usr/local/bin + +# 创建spiders文件用于存放爬虫, 授权可执行文件 +RUN mkdir -p /opt/crawlab/spiders && chmod +x /usr/local/bin/crawlab + +# 指定为Master节点 +ENV CRAWLAB_SERVER_MASTER Y + +EXPOSE 8000 + +CMD ["crawlab"] \ No newline at end of file diff --git a/examples/master/README.md b/examples/master/README.md new file mode 100644 index 00000000..df68e725 --- /dev/null +++ b/examples/master/README.md @@ -0,0 +1,8 @@ +# Master 节点镜像制作 + +在Dockerfile里面的二进制包,需要手动在源码目录下进行构建然后再放进来。 + +## Linux 二进制包构建 +``` +CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go +``` \ No newline at end of file diff --git a/examples/master/config.yml b/examples/master/config.yml new file mode 100644 index 00000000..3e692679 --- /dev/null +++ b/examples/master/config.yml @@ -0,0 +1,32 @@ +api: + address: "localhost:8000" +mongo: + host: "192.168.235.26" + port: 27017 + db: crawlab_local + username: "root" + password: "example" + authSource: "admin" +redis: + address: 192.168.235.0 + password: redis-1.0 + database: 29 + port: 16379 +log: + level: info + path: "/logs/crawlab" +server: + host: 0.0.0.0 + port: 8000 + master: "Y" + secret: "crawlab" + register: + # mac 或者 ip,如果是ip,则需要手动指定IP + type: "mac" + ip: "192.168.0.104" +spider: + path: "/spiders" +task: + workers: 4 +other: + tmppath: "/tmp" diff --git a/examples/worker/Dockerfile b/examples/worker/python/Dockerfile similarity index 100% rename from examples/worker/Dockerfile rename to examples/worker/python/Dockerfile diff --git a/examples/worker/README.md b/examples/worker/python/README.md similarity index 94% rename from examples/worker/README.md rename to examples/worker/python/README.md index e8638ccd..a0c2b6ed 100644 --- a/examples/worker/README.md +++ b/examples/worker/python/README.md @@ -1,4 +1,4 @@ -# worker节点 +# 本地开发环境worker节点制作 由于master和worker节点的存储信息是在redis上,并且使用节点所在的mac地址作为key,所以在开发本地需要启动master和worker节点会比较麻烦。 这里是一个运行worker节点的一个例子。 diff --git a/examples/worker/conf/config.yml b/examples/worker/python/conf/config.yml similarity index 100% rename from examples/worker/conf/config.yml rename to examples/worker/python/conf/config.yml diff --git a/examples/worker/docker-compose.yml b/examples/worker/python/docker-compose.yml similarity index 100% rename from examples/worker/docker-compose.yml rename to examples/worker/python/docker-compose.yml diff --git a/examples/worker/requirements.txt b/examples/worker/python/requirements.txt similarity index 100% rename from examples/worker/requirements.txt rename to examples/worker/python/requirements.txt