mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
78
Dockerfile.master.apline
Normal file
78
Dockerfile.master.apline
Normal file
@@ -0,0 +1,78 @@
|
||||
FROM golang:1.12-alpine AS backend-build
|
||||
|
||||
WORKDIR /go/src/app
|
||||
COPY ./backend .
|
||||
|
||||
ENV GO111MODULE on
|
||||
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
|
||||
|
||||
RUN go install -v ./...
|
||||
|
||||
FROM node:8.16.0-alpine AS frontend-build
|
||||
|
||||
ADD ./frontend /app
|
||||
WORKDIR /app
|
||||
|
||||
# install frontend
|
||||
RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org
|
||||
|
||||
RUN npm run build:prod
|
||||
|
||||
# images
|
||||
FROM python:alpine
|
||||
|
||||
ADD . /app
|
||||
|
||||
|
||||
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
|
||||
|
||||
# install packages
|
||||
RUN apk update && apk add --no-cache --virtual .build-deps \
|
||||
gcc \
|
||||
linux-headers \
|
||||
musl-dev \
|
||||
libffi-dev \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
openssl-dev
|
||||
|
||||
# install backend
|
||||
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# copy backend files
|
||||
COPY --from=backend-build /go/src/app/conf ./conf
|
||||
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
|
||||
|
||||
# install nginx
|
||||
RUN apk add --no-cache nginx openrc
|
||||
|
||||
RUN apk del .build-deps
|
||||
# copy frontend files
|
||||
COPY --from=frontend-build /app/dist /app/dist
|
||||
COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d
|
||||
|
||||
VOLUME [ “/sys/fs/cgroup” ]
|
||||
|
||||
RUN sed -i 's/#rc_sys=""/rc_sys="lxc"/g' /etc/rc.conf && \
|
||||
echo 'rc_provide="loopback net"' >> /etc/rc.conf && \
|
||||
sed -i 's/^#\(rc_logger="YES"\)$/\1/' /etc/rc.conf && \
|
||||
sed -i '/tty/d' /etc/inittab && \
|
||||
sed -i 's/hostname $opts/# hostname $opts/g' /etc/init.d/hostname && \
|
||||
sed -i 's/mount -t tmpfs/# mount -t tmpfs/g' /lib/rc/sh/init.sh && \
|
||||
sed -i 's/cgroup_add_service /# cgroup_add_service /g' /lib/rc/sh/openrc-run.sh && \
|
||||
rm -rf /var/cache/apk/* && \
|
||||
mkdir -p /run/openrc && \
|
||||
touch /run/openrc/softlevel && \
|
||||
/sbin/openrc
|
||||
|
||||
# working directory
|
||||
WORKDIR /app/backend
|
||||
|
||||
# frontend port
|
||||
EXPOSE 8080
|
||||
|
||||
# backend port
|
||||
EXPOSE 8000
|
||||
|
||||
# start backend
|
||||
CMD ["/bin/sh", "/app/docker_init.sh"]
|
||||
43
Dockerfile.worker.apline
Normal file
43
Dockerfile.worker.apline
Normal file
@@ -0,0 +1,43 @@
|
||||
FROM golang:1.12-alpine AS backend-build
|
||||
|
||||
WORKDIR /go/src/app
|
||||
COPY ./backend .
|
||||
|
||||
ENV GO111MODULE on
|
||||
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
|
||||
|
||||
RUN go install -v ./...
|
||||
|
||||
# images
|
||||
FROM python:alpine
|
||||
|
||||
ADD . /app
|
||||
|
||||
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
|
||||
|
||||
RUN apk update && apk add --no-cache --virtual .build-deps \
|
||||
gcc \
|
||||
linux-headers \
|
||||
musl-dev \
|
||||
libffi-dev \
|
||||
libxml2-dev \
|
||||
libxslt-dev \
|
||||
openssl-dev
|
||||
|
||||
# install backend
|
||||
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
# copy backend files
|
||||
COPY --from=backend-build /go/src/app/conf ./conf
|
||||
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
|
||||
|
||||
RUN apk del .build-deps
|
||||
|
||||
# working directory
|
||||
WORKDIR /app/backend
|
||||
|
||||
# backend port
|
||||
EXPOSE 8000
|
||||
|
||||
# start backend
|
||||
CMD crawlab
|
||||
@@ -17,8 +17,8 @@
|
||||
## 安装
|
||||
|
||||
三种方式:
|
||||
1. [Docker](https://tikazyq.github.io/crawlab/Installation/Docker.html)(推荐)
|
||||
2. [直接部署](https://tikazyq.github.io/crawlab/Installation/Direct.html)(了解内核)
|
||||
1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)(推荐)
|
||||
2. [直接部署](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)(了解内核)
|
||||
|
||||
### 要求(Docker)
|
||||
- Docker 18.03+
|
||||
@@ -87,11 +87,11 @@ services:
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
Docker部署的详情,请见[相关文档](https://tikazyq.github.io/crawlab/Installation/Docker.html)。
|
||||
Docker部署的详情,请见[相关文档](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)。
|
||||
|
||||
### 直接部署
|
||||
|
||||
请参考[相关文档](https://tikazyq.github.io/crawlab/Installation/Direct.html)。
|
||||
请参考[相关文档](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)。
|
||||
|
||||
## 截图
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ Golang-based distributed web crawler management platform, supporting various lan
|
||||
## Installation
|
||||
|
||||
Two methods:
|
||||
1. [Docker](https://tikazyq.github.io/crawlab/Installation/Docker.html) (Recommended)
|
||||
2. [Direct Deploy](https://tikazyq.github.io/crawlab/Installation/Direct.html) (Check Internal Kernel)
|
||||
1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html) (Recommended)
|
||||
2. [Direct Deploy](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html) (Check Internal Kernel)
|
||||
|
||||
### Pre-requisite (Docker)
|
||||
- Docker 18.03+
|
||||
@@ -88,7 +88,7 @@ Then execute the command below, and Crawlab Master Node + MongoDB + Redis will s
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
For Docker Deployment details, please refer to [relevant documentation](https://tikazyq.github.io/crawlab/Installation/Docker.html).
|
||||
For Docker Deployment details, please refer to [relevant documentation](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html).
|
||||
|
||||
|
||||
## Screenshot
|
||||
|
||||
@@ -20,6 +20,10 @@ server:
|
||||
port: 8000
|
||||
master: "N"
|
||||
secret: "crawlab"
|
||||
register:
|
||||
# mac地址 或者 ip地址,如果是ip,则需要手动指定IP
|
||||
type: "mac"
|
||||
ip: ""
|
||||
spider:
|
||||
path: "/app/spiders"
|
||||
task:
|
||||
|
||||
@@ -5,12 +5,12 @@ import (
|
||||
"crawlab/database"
|
||||
"crawlab/lib/cron"
|
||||
"crawlab/model"
|
||||
"crawlab/services/register"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"github.com/spf13/viper"
|
||||
"net"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
)
|
||||
@@ -49,43 +49,10 @@ const (
|
||||
No = "N"
|
||||
)
|
||||
|
||||
// 获取本机的IP地址
|
||||
// TODO: 考虑多个IP地址的情况
|
||||
func GetIp() (string, error) {
|
||||
addrList, err := net.InterfaceAddrs()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, value := range addrList {
|
||||
if ipNet, ok := value.(*net.IPNet); ok && !ipNet.IP.IsLoopback() {
|
||||
if ipNet.IP.To4() != nil {
|
||||
return ipNet.IP.String(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// 获取本机的MAC地址
|
||||
func GetMac() (string, error) {
|
||||
interfaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
debug.PrintStack()
|
||||
return "", err
|
||||
}
|
||||
for _, inter := range interfaces {
|
||||
if inter.HardwareAddr != nil {
|
||||
mac := inter.HardwareAddr.String()
|
||||
return mac, nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// 获取本机节点
|
||||
func GetCurrentNode() (model.Node, error) {
|
||||
// 获取本机MAC地址
|
||||
mac, err := GetMac()
|
||||
value, err := register.GetRegister().GetValue()
|
||||
if err != nil {
|
||||
debug.PrintStack()
|
||||
return model.Node{}, err
|
||||
@@ -101,14 +68,14 @@ func GetCurrentNode() (model.Node, error) {
|
||||
}
|
||||
|
||||
// 尝试获取节点
|
||||
node, err = model.GetNodeByMac(mac)
|
||||
node, err = model.GetNodeByMac(value)
|
||||
|
||||
// 如果获取失败
|
||||
if err != nil {
|
||||
// 如果为主节点,表示为第一次注册,插入节点信息
|
||||
if IsMaster() {
|
||||
// 获取本机IP地址
|
||||
ip, err := GetIp()
|
||||
ip, err := register.GetRegister().GetIp()
|
||||
if err != nil {
|
||||
debug.PrintStack()
|
||||
return model.Node{}, err
|
||||
@@ -117,8 +84,8 @@ func GetCurrentNode() (model.Node, error) {
|
||||
node = model.Node{
|
||||
Id: bson.NewObjectId(),
|
||||
Ip: ip,
|
||||
Name: mac,
|
||||
Mac: mac,
|
||||
Name: value,
|
||||
Mac: value,
|
||||
IsMaster: true,
|
||||
}
|
||||
if err := node.Add(); err != nil {
|
||||
@@ -155,12 +122,12 @@ func IsMasterNode(id string) bool {
|
||||
|
||||
// 获取节点数据
|
||||
func GetNodeData() (Data, error) {
|
||||
mac, err := GetMac()
|
||||
val, err := register.GetRegister().GetValue()
|
||||
if err != nil {
|
||||
return Data{}, err
|
||||
}
|
||||
|
||||
value, err := database.RedisClient.HGet("nodes", mac)
|
||||
value, err := database.RedisClient.HGet("nodes", val)
|
||||
data := Data{}
|
||||
if err := json.Unmarshal([]byte(value), &data); err != nil {
|
||||
return data, err
|
||||
@@ -269,14 +236,14 @@ func UpdateNodeStatus() {
|
||||
// 更新节点数据
|
||||
func UpdateNodeData() {
|
||||
// 获取MAC地址
|
||||
mac, err := GetMac()
|
||||
val, err := register.GetRegister().GetValue()
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
// 获取IP地址
|
||||
ip, err := GetIp()
|
||||
ip, err := register.GetRegister().GetIp()
|
||||
if err != nil {
|
||||
log.Errorf(err.Error())
|
||||
return
|
||||
@@ -284,7 +251,7 @@ func UpdateNodeData() {
|
||||
|
||||
// 构造节点数据
|
||||
data := Data{
|
||||
Mac: mac,
|
||||
Mac: val,
|
||||
Ip: ip,
|
||||
Master: IsMaster(),
|
||||
UpdateTs: time.Now(),
|
||||
@@ -298,7 +265,7 @@ func UpdateNodeData() {
|
||||
debug.PrintStack()
|
||||
return
|
||||
}
|
||||
if err := database.RedisClient.HSet("nodes", mac, string(dataBytes)); err != nil {
|
||||
if err := database.RedisClient.HSet("nodes", val, string(dataBytes)); err != nil {
|
||||
log.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
105
backend/services/register/register.go
Normal file
105
backend/services/register/register.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package register
|
||||
|
||||
import (
|
||||
"github.com/apex/log"
|
||||
"github.com/spf13/viper"
|
||||
"net"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
)
|
||||
|
||||
type Register interface {
|
||||
// 注册的key类型
|
||||
GetKey() string
|
||||
// 注册的key的值,唯一标识节点
|
||||
GetValue() (string, error)
|
||||
// 注册的节点IP
|
||||
GetIp() (string, error)
|
||||
}
|
||||
|
||||
// mac 地址注册
|
||||
type MacRegister struct{}
|
||||
|
||||
func (mac *MacRegister) GetKey() string {
|
||||
return "mac"
|
||||
}
|
||||
|
||||
func (mac *MacRegister) GetValue() (string, error) {
|
||||
interfaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
log.Errorf("get interfaces error:" + err.Error())
|
||||
debug.PrintStack()
|
||||
return "", err
|
||||
}
|
||||
for _, inter := range interfaces {
|
||||
if inter.HardwareAddr != nil {
|
||||
mac := inter.HardwareAddr.String()
|
||||
return mac, nil
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (mac *MacRegister) GetIp() (string, error) {
|
||||
return getIp()
|
||||
}
|
||||
|
||||
// ip 注册
|
||||
type IpRegister struct {
|
||||
Ip string
|
||||
}
|
||||
|
||||
func (ip *IpRegister) GetKey() string {
|
||||
return "ip"
|
||||
}
|
||||
|
||||
func (ip *IpRegister) GetValue() (string, error) {
|
||||
return ip.Ip, nil
|
||||
}
|
||||
|
||||
func (ip *IpRegister) GetIp() (string, error) {
|
||||
return ip.Ip, nil
|
||||
}
|
||||
|
||||
// 获取本机的IP地址
|
||||
// TODO: 考虑多个IP地址的情况
|
||||
func getIp() (string, error) {
|
||||
addrList, err := net.InterfaceAddrs()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
for _, value := range addrList {
|
||||
if ipNet, ok := value.(*net.IPNet); ok && !ipNet.IP.IsLoopback() {
|
||||
if ipNet.IP.To4() != nil {
|
||||
return ipNet.IP.String(), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
var register Register
|
||||
|
||||
// 获得注册器
|
||||
func GetRegister() Register {
|
||||
if register != nil {
|
||||
return register
|
||||
}
|
||||
|
||||
registerType := viper.GetString("server.register.type")
|
||||
if registerType == "mac" {
|
||||
register = &MacRegister{}
|
||||
} else {
|
||||
ip := viper.GetString("server.register.ip")
|
||||
if ip == "" {
|
||||
log.Error("server.register.ip is empty")
|
||||
debug.PrintStack()
|
||||
return nil
|
||||
}
|
||||
register = &IpRegister{
|
||||
Ip: ip,
|
||||
}
|
||||
}
|
||||
log.Info("register type is :" + reflect.TypeOf(register).String())
|
||||
return register
|
||||
}
|
||||
@@ -106,7 +106,7 @@ func DeCompress(srcFile *os.File, dstPath string) error {
|
||||
}
|
||||
|
||||
// 创建新文件
|
||||
newFile, err := os.Create(filepath.Join(dstPath, innerFile.Name))
|
||||
newFile, err := os.OpenFile(filepath.Join(dstPath, innerFile.Name), os.O_RDWR|os.O_CREATE|os.O_TRUNC, info.Mode())
|
||||
if err != nil {
|
||||
log.Errorf("Unzip File Error : " + err.Error())
|
||||
debug.PrintStack()
|
||||
|
||||
7
examples/README.md
Normal file
7
examples/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Examples
|
||||
|
||||
* frontend 前端镜像制作
|
||||
* master Master节点镜像制作
|
||||
* worker Worker节点镜像制作
|
||||
* run_docker_master.sh 运行Master节点示例脚本
|
||||
* run_docker_worker.sh 运行Worker节点示例脚本
|
||||
20
examples/frontend/Dockerfile
Normal file
20
examples/frontend/Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM alpine:latest
|
||||
|
||||
# 安装nginx
|
||||
RUN mkdir /run/nginx && apk add nginx
|
||||
|
||||
# 拷贝编译文件
|
||||
COPY dist /app/dist
|
||||
|
||||
# 拷贝nginx代理文件
|
||||
COPY crawlab.conf /etc/nginx/conf.d
|
||||
|
||||
# 拷贝执行脚本
|
||||
COPY docker_init.sh /app/docker_init.sh
|
||||
|
||||
# 定义后端API脚本
|
||||
ENV CRAWLAB_API_ADDRESS http://localhost:8000
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD ["/bin/sh", "/app/docker_init.sh"]
|
||||
5
examples/frontend/README.md
Normal file
5
examples/frontend/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# 前端镜像制作
|
||||
|
||||
前端需要手动build拿到编译后的文件,然后放入此目录进行镜像构建。
|
||||
|
||||
容器运行的时候需要指定`CRAWLAB_API_ADDRESS`环境变量,用于访问后端API接口
|
||||
13
examples/frontend/crawlab.conf
Normal file
13
examples/frontend/crawlab.conf
Normal file
@@ -0,0 +1,13 @@
|
||||
server {
|
||||
gzip on;
|
||||
gzip_min_length 1k;
|
||||
gzip_buffers 4 16k;
|
||||
#gzip_http_version 1.0;
|
||||
gzip_comp_level 2;
|
||||
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png;
|
||||
gzip_vary off;
|
||||
gzip_disable "MSIE [1-6]\.";
|
||||
listen 8080;
|
||||
root /app/dist;
|
||||
index index.html;
|
||||
}
|
||||
23
examples/frontend/docker_init.sh
Executable file
23
examples/frontend/docker_init.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/sh
|
||||
|
||||
# replace default api path to new one
|
||||
if [ "${CRAWLAB_API_ADDRESS}" = "" ];
|
||||
then
|
||||
:
|
||||
else
|
||||
jspath=`ls /app/dist/js/app.*.js`
|
||||
sed -i "s?localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath}
|
||||
fi
|
||||
|
||||
# replace base url
|
||||
if [ "${CRAWLAB_BASE_URL}" = "" ];
|
||||
then
|
||||
:
|
||||
else
|
||||
indexpath=/app/dist/index.html
|
||||
sed -i "s?/js/?${CRAWLAB_BASE_URL}/js/?g" ${indexpath}
|
||||
sed -i "s?/css/?${CRAWLAB_BASE_URL}/css/?g" ${indexpath}
|
||||
fi
|
||||
|
||||
# start nginx
|
||||
nginx -g 'daemon off;'
|
||||
20
examples/master/Dockerfile
Normal file
20
examples/master/Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM alpine:latest
|
||||
|
||||
# 配置工作目录
|
||||
WORKDIR /opt/crawlab
|
||||
|
||||
# 拷贝配置文件
|
||||
COPY config.yml /opt/crawlab/conf/config.yml
|
||||
|
||||
# 拷贝可执行文件
|
||||
COPY crawlab /usr/local/bin
|
||||
|
||||
# 创建spiders文件用于存放爬虫, 授权可执行文件
|
||||
RUN mkdir -p /opt/crawlab/spiders && chmod +x /usr/local/bin/crawlab
|
||||
|
||||
# 指定为Master节点
|
||||
ENV CRAWLAB_SERVER_MASTER Y
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["crawlab"]
|
||||
8
examples/master/README.md
Normal file
8
examples/master/README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# Master 节点镜像制作
|
||||
|
||||
在Dockerfile里面的二进制包,需要手动在源码目录下进行构建然后再放进来。
|
||||
|
||||
## Linux 二进制包构建
|
||||
```
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
|
||||
```
|
||||
32
examples/master/config.yml
Normal file
32
examples/master/config.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
api:
|
||||
address: "localhost:8000"
|
||||
mongo:
|
||||
host: "192.168.235.26"
|
||||
port: 27017
|
||||
db: crawlab_local
|
||||
username: "root"
|
||||
password: "example"
|
||||
authSource: "admin"
|
||||
redis:
|
||||
address: 192.168.235.0
|
||||
password: redis-1.0
|
||||
database: 29
|
||||
port: 16379
|
||||
log:
|
||||
level: info
|
||||
path: "/logs/crawlab"
|
||||
server:
|
||||
host: 0.0.0.0
|
||||
port: 8000
|
||||
master: "Y"
|
||||
secret: "crawlab"
|
||||
register:
|
||||
# mac 或者 ip,如果是ip,则需要手动指定IP
|
||||
type: "mac"
|
||||
ip: "192.168.0.104"
|
||||
spider:
|
||||
path: "/spiders"
|
||||
task:
|
||||
workers: 4
|
||||
other:
|
||||
tmppath: "/tmp"
|
||||
@@ -1,4 +1,4 @@
|
||||
# worker节点
|
||||
# 本地开发环境worker节点制作
|
||||
由于master和worker节点的存储信息是在redis上,并且使用节点所在的mac地址作为key,所以在开发本地需要启动master和worker节点会比较麻烦。
|
||||
这里是一个运行worker节点的一个例子。
|
||||
|
||||
Reference in New Issue
Block a user