Merge pull request #618 from crawlab-team/release

Release
This commit is contained in:
Marvin Zhang
2020-03-05 11:03:22 +08:00
committed by GitHub
62 changed files with 566 additions and 542 deletions

View File

@@ -6,6 +6,7 @@ on:
branches:
- master
- release
- develop
# Publish `v1.2.3` tags as releases.
tags:
@@ -60,3 +61,6 @@ jobs:
if [ "$VERSION" == "release" ]; then
curl ${{ secrets.JENKINS_RELEASE_URL }}
fi
if [ "$VERSION" == "develop" ]; then
curl ${{ secrets.JENKINS_DEVELOP_URL }}
fi

View File

@@ -51,7 +51,9 @@ RUN cp /opt/bin/crawlab /usr/local/bin/crawlab-server
# copy frontend files
COPY --from=frontend-build /app/dist /app/dist
COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d
# copy nginx config files
COPY ./nginx/crawlab.conf /etc/nginx/conf.d
# working directory
WORKDIR /app/backend

View File

@@ -49,7 +49,9 @@ RUN cp /opt/bin/crawlab /usr/local/bin/crawlab-server
# copy frontend files
COPY --from=frontend-build /app/dist /app/dist
COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d
# copy nginx config files
COPY ./nginx/crawlab.conf /etc/nginx/conf.d
# working directory
WORKDIR /app/backend

View File

@@ -23,7 +23,7 @@ server:
master: "Y"
secret: "crawlab"
register:
# mac地址 或者 ip地址如果是ip则需要手动指定IP
# mac地址/ip地址/hostname, 如果是ip则需要手动指定IP
type: "mac"
ip: ""
lang: # 安装语言环境, Y 为安装N 为不安装只对 Docker 有效

View File

@@ -0,0 +1,7 @@
package constants
const (
RegisterTypeMac = "mac"
RegisterTypeIp = "ip"
RegisterTypeHostname = "hostname"
)

View File

@@ -15,11 +15,11 @@ type Executable struct {
}
type Lang struct {
Name string `json:"name"`
ExecutableName string `json:"executable_name"`
ExecutablePath string `json:"executable_path"`
DepExecutablePath string `json:"dep_executable_path"`
Installed bool `json:"installed"`
Name string `json:"name"`
ExecutableName string `json:"executable_name"`
ExecutablePaths []string `json:"executable_paths"`
DepExecutablePath string `json:"dep_executable_path"`
Installed bool `json:"installed"`
}
type Dependency struct {

View File

@@ -20,6 +20,7 @@ type Node struct {
Ip string `json:"ip" bson:"ip"`
Port string `json:"port" bson:"port"`
Mac string `json:"mac" bson:"mac"`
Hostname string `json:"hostname" bson:"hostname"`
Description string `json:"description" bson:"description"`
// 用于唯一标识节点可能是mac地址可能是ip地址
Key string `json:"key" bson:"key"`
@@ -42,6 +43,7 @@ func IsMaster() bool {
}
// 获取本机节点
// TODO: 这里职责不单一,需要重构
func GetCurrentNode() (Node, error) {
// 获得注册的key值
key, err := register.GetRegister().GetKey()
@@ -67,7 +69,7 @@ func GetCurrentNode() (Node, error) {
//只在master节点运行的时候才检测master节点的信息是否存在
if IsMaster() && err == mgo.ErrNotFound {
// 获取本机信息
ip, mac, key, err := GetNodeBaseInfo()
ip, mac, hostname, key, err := GetNodeBaseInfo()
if err != nil {
debug.PrintStack()
return node, err
@@ -80,6 +82,7 @@ func GetCurrentNode() (Node, error) {
Ip: ip,
Name: ip,
Mac: mac,
Hostname: hostname,
IsMaster: true,
}
if err := node.Add(); err != nil {
@@ -239,25 +242,31 @@ func GetNodeCount(query interface{}) (int, error) {
}
// 节点基本信息
func GetNodeBaseInfo() (ip string, mac string, key string, error error) {
func GetNodeBaseInfo() (ip string, mac string, hostname string, key string, error error) {
ip, err := register.GetRegister().GetIp()
if err != nil {
debug.PrintStack()
return "", "", "", err
return "", "", "", "", err
}
mac, err = register.GetRegister().GetMac()
if err != nil {
debug.PrintStack()
return "", "", "", err
return "", "", "", "", err
}
hostname, err = register.GetRegister().GetHostname()
if err != nil {
debug.PrintStack()
return "", "", "", "", err
}
key, err = register.GetRegister().GetKey()
if err != nil {
debug.PrintStack()
return "", "", "", err
return "", "", "", "", err
}
return ip, mac, key, nil
return ip, mac, key, hostname, nil
}
// 根据redis的key值重置node节点为offline

View File

@@ -399,7 +399,7 @@ func UploadSpider(c *gin.Context) {
}
// 更新file_id
spider.FileId = fid
if err := spider.Add(); err != nil {
if err := spider.Save(); err != nil {
log.Error("add spider error: " + err.Error())
debug.PrintStack()
HandleError(http.StatusInternalServerError, c, err)

11
backend/scripts/install-java.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/env bash
# lock
touch /tmp/install-java.lock
# install java
apt-get update && apt-get install -y default-jdk --fix-missing
ln -s /usr/bin/java /usr/local/bin/java
# unlock
rm /tmp/install-java.lock

View File

@@ -1,5 +1,8 @@
#!/bin/env bash
# lock
touch /tmp/install-nodejs.lock
# install nvm
BASE_DIR=`dirname $0`
/bin/bash ${BASE_DIR}/install-nvm.sh
@@ -33,3 +36,6 @@ apt-get update && apt-get install -y --no-install-recommends gconf-service libas
PUPPETEER_DOWNLOAD_HOST=https://npm.taobao.org/mirrors
npm config set puppeteer_download_host=https://npm.taobao.org/mirrors
npm install puppeteer-chromium-resolver crawlab-sdk -g --unsafe-perm=true --registry=https://registry.npm.taobao.org
# unlock
rm /tmp/install-nodejs.lock

View File

@@ -23,6 +23,7 @@ type Data struct {
Key string `json:"key"`
Mac string `json:"mac"`
Ip string `json:"ip"`
Hostname string `json:"hostname"`
Master bool `json:"master"`
UpdateTs time.Time `json:"update_ts"`
UpdateTsUnix int64 `json:"update_ts_unix"`
@@ -161,6 +162,14 @@ func UpdateNodeData() {
log.Errorf(err.Error())
return
}
// 获取Hostname
hostname, err := register.GetRegister().GetHostname()
if err != nil {
log.Errorf(err.Error())
return
}
// 获取redis的key
key, err := register.GetRegister().GetKey()
if err != nil {
@@ -174,6 +183,7 @@ func UpdateNodeData() {
Key: key,
Mac: mac,
Ip: ip,
Hostname: hostname,
Master: model.IsMaster(),
UpdateTs: time.Now(),
UpdateTsUnix: time.Now().Unix(),
@@ -191,7 +201,6 @@ func UpdateNodeData() {
log.Errorf(err.Error())
return
}
}
func MasterNodeCallback(message redis.Message) (err error) {

View File

@@ -1,9 +1,13 @@
package register
import (
"bytes"
"crawlab/constants"
"fmt"
"github.com/apex/log"
"github.com/spf13/viper"
"net"
"os/exec"
"reflect"
"runtime/debug"
"sync"
@@ -18,6 +22,8 @@ type Register interface {
GetIp() (string, error)
// 注册节点的mac地址
GetMac() (string, error)
// 注册节点的Hostname
GetHostname() (string, error)
}
// ===================== mac 地址注册 =====================
@@ -39,6 +45,10 @@ func (mac *MacRegister) GetIp() (string, error) {
return getIp()
}
func (mac *MacRegister) GetHostname() (string, error) {
return getHostname()
}
// ===================== ip 地址注册 =====================
type IpRegister struct {
Ip string
@@ -60,6 +70,33 @@ func (ip *IpRegister) GetMac() (string, error) {
return getMac()
}
func (ip *IpRegister) GetHostname() (string, error) {
return getHostname()
}
// ===================== mac 地址注册 =====================
type HostnameRegister struct{}
func (h *HostnameRegister) GetType() string {
return "mac"
}
func (h *HostnameRegister) GetKey() (string, error) {
return h.GetHostname()
}
func (h *HostnameRegister) GetMac() (string, error) {
return getMac()
}
func (h *HostnameRegister) GetIp() (string, error) {
return getIp()
}
func (h *HostnameRegister) GetHostname() (string, error) {
return getHostname()
}
// ===================== 公共方法 =====================
// 获取本机的IP地址
// TODO: 考虑多个IP地址的情况
@@ -94,6 +131,23 @@ func getMac() (string, error) {
return "", nil
}
func getHostname() (string, error) {
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd := exec.Command("hostname")
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Errorf(err.Error())
log.Errorf(fmt.Sprintf("error: %s", stderr.String()))
debug.PrintStack()
return "", err
}
return stdout.String(), nil
}
// ===================== 获得注册简单工厂 =====================
var register Register
@@ -108,9 +162,9 @@ func GetRegister() Register {
}
registerType := viper.GetString("server.register.type")
if registerType == "mac" {
if registerType == constants.RegisterTypeMac {
register = &MacRegister{}
} else {
} else if registerType == constants.RegisterTypeIp {
ip := viper.GetString("server.register.ip")
if ip == "" {
log.Error("server.register.ip is empty")
@@ -120,6 +174,8 @@ func GetRegister() Register {
register = &IpRegister{
Ip: ip,
}
} else if registerType == constants.RegisterTypeHostname {
register = &HostnameRegister{}
}
log.Info("register type is :" + reflect.TypeOf(register).String())

View File

@@ -65,9 +65,9 @@ func GetSystemInfo(nodeId string) (sysInfo entity.SystemInfo, err error) {
// 获取语言列表
func GetLangList(nodeId string) []entity.Lang {
list := []entity.Lang{
{Name: "Python", ExecutableName: "python", ExecutablePath: "/usr/local/bin/python", DepExecutablePath: "/usr/local/bin/pip"},
{Name: "Node.js", ExecutableName: "node", ExecutablePath: "/usr/local/bin/node", DepExecutablePath: "/usr/local/bin/npm"},
//{Name: "Java", ExecutableName: "java", ExecutablePath: "/usr/local/bin/java"},
{Name: "Python", ExecutableName: "python", ExecutablePaths: []string{"/usr/bin/python", "/usr/local/bin/python"}, DepExecutablePath: "/usr/local/bin/pip"},
{Name: "Node.js", ExecutableName: "node", ExecutablePaths: []string{"/usr/bin/node", "/usr/local/bin/node"}, DepExecutablePath: "/usr/local/bin/npm"},
//{Name: "Java", ExecutableName: "java", ExecutablePaths: []string{"/usr/bin/java", "/usr/local/bin/java"}},
}
for i, lang := range list {
list[i].Installed = IsInstalledLang(nodeId, lang)
@@ -93,8 +93,10 @@ func IsInstalledLang(nodeId string, lang entity.Lang) bool {
return false
}
for _, exec := range sysInfo.Executables {
if exec.Path == lang.ExecutablePath {
return true
for _, path := range lang.ExecutablePaths {
if exec.Path == path {
return true
}
}
}
return false

View File

@@ -1,5 +0,0 @@
server {
listen 8080;
root /opt/crawlab/frontend/dist;
index index.html;
}

View File

@@ -0,0 +1,50 @@
apiVersion: v1
kind: Service
metadata:
name: crawlab
namespace: crawlab-develop
spec:
ports:
- port: 8080
targetPort: 8080
nodePort: 30108
selector:
app: crawlab-master
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-master
namespace: crawlab-develop
spec:
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-master
template:
metadata:
labels:
app: crawlab-master
spec:
containers:
- image: tikazyq/crawlab:develop
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "Y"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
- name: CRAWLAB_SETTING_ALLOWREGISTER
value: "Y"
- name: CRAWLAB_SERVER_LANG_NODE
value: "Y"
- name: CRAWLAB_SERVER_LANG_JAVA
value: "Y"
ports:
- containerPort: 8080
name: crawlab

View File

@@ -0,0 +1,33 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-worker
namespace: crawlab-develop
spec:
replicas: 2
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-worker
template:
metadata:
labels:
app: crawlab-worker
spec:
containers:
- image: tikazyq/crawlab:develop
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "N"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
- name: CRAWLAB_SERVER_LANG_NODE
value: "Y"
- name: CRAWLAB_SERVER_LANG_JAVA
value: "Y"

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: mongo-pv-volume-develop
namespace: crawlab-develop
labels:
type: local
spec:
storageClassName: manual
capacity:
storage: 2Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/data/crawlab-develop/mongodb/data"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mongo-pv-claim-develop
namespace: crawlab-develop
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi

41
devops/develop/mongo.yaml Normal file
View File

@@ -0,0 +1,41 @@
apiVersion: v1
kind: Service
metadata:
name: mongo
namespace: crawlab-develop
spec:
ports:
- port: 27017
selector:
app: mongo
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: mongo
namespace: crawlab-develop
spec:
selector:
matchLabels:
app: mongo
strategy:
type: Recreate
template:
metadata:
labels:
app: mongo
spec:
containers:
- image: mongo:4
name: mongo
ports:
- containerPort: 27017
name: mongo
volumeMounts:
- name: mongo-persistent-storage
mountPath: /data/db
volumes:
- name: mongo-persistent-storage
persistentVolumeClaim:
claimName: mongo-pv-claim-develop

4
devops/develop/ns.yaml Normal file
View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: crawlab-develop

34
devops/develop/redis.yaml Normal file
View File

@@ -0,0 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: crawlab-develop
spec:
ports:
- port: 6379
selector:
app: redis
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: redis
namespace: crawlab-develop
spec:
selector:
matchLabels:
app: redis
strategy:
type: Recreate
template:
metadata:
labels:
app: redis
spec:
containers:
- image: redis
name: redis
ports:
- containerPort: 6379
name: redis

View File

@@ -0,0 +1,50 @@
apiVersion: v1
kind: Service
metadata:
name: crawlab
namespace: crawlab-release
spec:
ports:
- port: 8080
targetPort: 8080
nodePort: 30098
selector:
app: crawlab-master
type: NodePort
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-master
namespace: crawlab-release
spec:
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-master
template:
metadata:
labels:
app: crawlab-master
spec:
containers:
- image: tikazyq/crawlab:release
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "Y"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
- name: CRAWLAB_SETTING_ALLOWREGISTER
value: "Y"
- name: CRAWLAB_SERVER_LANG_NODE
value: "Y"
- name: CRAWLAB_SERVER_LANG_JAVA
value: "Y"
ports:
- containerPort: 8080
name: crawlab

View File

@@ -0,0 +1,32 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: crawlab-worker
namespace: crawlab-release
spec:
replicas: 2
strategy:
type: Recreate
selector:
matchLabels:
app: crawlab-worker
template:
metadata:
labels:
app: crawlab-worker
spec:
containers:
- image: tikazyq/crawlab:release
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
value: "N"
- name: CRAWLAB_MONGO_HOST
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
- name: CRAWLAB_SERVER_LANG_NODE
value: "Y"
- name: CRAWLAB_SERVER_LANG_JAVA
value: "Y"

View File

@@ -0,0 +1,28 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: mongo-pv-volume-release
namespace: crawlab-release
labels:
type: local
spec:
storageClassName: manual
capacity:
storage: 5Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/data/crawlab-release/mongodb/data"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mongo-pv-claim-release
namespace: crawlab-release
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi

41
devops/release/mongo.yaml Normal file
View File

@@ -0,0 +1,41 @@
apiVersion: v1
kind: Service
metadata:
name: mongo
namespace: crawlab-release
spec:
ports:
- port: 27017
selector:
app: mongo
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: mongo
namespace: crawlab-release
spec:
selector:
matchLabels:
app: mongo
strategy:
type: Recreate
template:
metadata:
labels:
app: mongo
spec:
containers:
- image: mongo:4
name: mongo
ports:
- containerPort: 27017
name: mongo
volumeMounts:
- name: mongo-persistent-storage
mountPath: /data/db
volumes:
- name: mongo-persistent-storage
persistentVolumeClaim:
claimName: mongo-pv-claim-release

4
devops/release/ns.yaml Normal file
View File

@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: crawlab-release

34
devops/release/redis.yaml Normal file
View File

@@ -0,0 +1,34 @@
apiVersion: v1
kind: Service
metadata:
name: redis
namespace: crawlab-release
spec:
ports:
- port: 6379
selector:
app: redis
clusterIP: None
---
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2
kind: Deployment
metadata:
name: redis
namespace: crawlab-release
spec:
selector:
matchLabels:
app: redis
strategy:
type: Recreate
template:
metadata:
labels:
app: redis
spec:
containers:
- image: redis
name: redis
ports:
- containerPort: 6379
name: redis

View File

@@ -23,6 +23,7 @@ services:
# CRAWLAB_SERVER_REGISTER_IP: "127.0.0.1" # node register ip 节点注册IP. 节点唯一识别号只有当 CRAWLAB_SERVER_REGISTER_TYPE "ip" 时才生效
# CRAWLAB_TASK_WORKERS: 4 # number of task executors 任务执行器个数并行执行任务数
# CRAWLAB_SERVER_LANG_NODE: "Y" # whether to pre-install Node.js 预安装 Node.js 语言环境
# CRAWLAB_SERVER_LANG_JAVA: "Y" # whether to pre-install Java 预安装 Java 语言环境
# CRAWLAB_SETTING_ALLOWREGISTER: "N" # whether to allow user registration 是否允许用户注册
# CRAWLAB_SETTING_ENABLETUTORIAL: "N" # whether to enable tutorial 是否启用教程
# CRAWLAB_NOTIFICATION_MAIL_SERVER: smtp.exmaple.com # STMP server address STMP 服务器地址

View File

@@ -1,27 +0,0 @@
FROM node:8.16.0 AS frontend-build
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org
RUN npm run build:prod
FROM alpine:latest
# 安装nginx
RUN mkdir /run/nginx && apk add nginx
# 拷贝编译文件
COPY --from=frontend-build /app/dist /app/dist
# 拷贝nginx代理文件
COPY crawlab.conf /etc/nginx/conf.d
# 拷贝执行脚本
COPY docker_init.sh /app/docker_init.sh
EXPOSE 8080
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,86 +0,0 @@
FROM golang:1.12-alpine AS backend-build
WORKDIR /go/src/app
COPY ./backend .
ENV GO111MODULE on
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
RUN go install -v ./...
FROM node:lts-alpine AS frontend-build
ARG NPM_DISABLE_SAFE_PERM=false
RUN if [ ${NPM_DISABLE_SAFE_PERM} = true ]; then \
# run the install
echo "info: use npm unsafe-perm mode" \
&& npm config set unsafe-perm true \
;fi
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install --registry=https://registry.npm.taobao.org
RUN npm run build:prod
# images
FROM python:alpine
ADD . /app
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
# install packages
RUN apk update && apk add --no-cache --virtual .build-deps \
gcc \
linux-headers \
musl-dev \
libffi-dev \
libxml2-dev \
libxslt-dev \
openssl-dev
# install backend
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
# copy backend files
COPY --from=backend-build /go/src/app/conf ./conf
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
# install nginx
RUN apk add --no-cache nginx openrc
RUN apk del .build-deps
# copy frontend files
COPY --from=frontend-build /app/dist /app/dist
COPY --from=frontend-build /app/conf/crawlab.conf /etc/nginx/conf.d
VOLUME [ /sys/fs/cgroup ]
RUN sed -i 's/#rc_sys=""/rc_sys="lxc"/g' /etc/rc.conf && \
echo 'rc_provide="loopback net"' >> /etc/rc.conf && \
sed -i 's/^#\(rc_logger="YES"\)$/\1/' /etc/rc.conf && \
sed -i '/tty/d' /etc/inittab && \
sed -i 's/hostname $opts/# hostname $opts/g' /etc/init.d/hostname && \
sed -i 's/mount -t tmpfs/# mount -t tmpfs/g' /lib/rc/sh/init.sh && \
sed -i 's/cgroup_add_service /# cgroup_add_service /g' /lib/rc/sh/openrc-run.sh && \
rm -rf /var/cache/apk/* && \
mkdir -p /run/openrc && \
touch /run/openrc/softlevel && \
/sbin/openrc
# working directory
WORKDIR /app/backend
ENV PYTHONIOENCODING utf-8
# frontend port
EXPOSE 8080
# backend port
EXPOSE 8000
# start backend
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,43 +0,0 @@
FROM golang:1.12-alpine AS backend-build
WORKDIR /go/src/app
COPY ./backend .
ENV GO111MODULE on
ENV GOPROXY https://mirrors.aliyun.com/goproxy/
RUN go install -v ./...
# images
FROM python:alpine
ADD . /app
RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories
RUN apk update && apk add --no-cache --virtual .build-deps \
gcc \
linux-headers \
musl-dev \
libffi-dev \
libxml2-dev \
libxslt-dev \
openssl-dev
# install backend
RUN pip install scrapy pymongo bs4 requests -i https://pypi.tuna.tsinghua.edu.cn/simple
# copy backend files
COPY --from=backend-build /go/src/app/conf ./conf
COPY --from=backend-build /go/bin/crawlab /usr/local/bin
RUN apk del .build-deps
# working directory
WORKDIR /app/backend
ENV PYTHONIOENCODING utf-8
# backend port
EXPOSE 8000
# start backend
CMD crawlab

View File

@@ -26,7 +26,14 @@ service nginx start
if [ "${CRAWLAB_SERVER_LANG_NODE}" = "Y" ];
then
echo "installing node.js"
/bin/sh /app/backend/scripts/install-nodejs.sh
/bin/sh /app/backend/scripts/install-nodejs.sh >> /var/log/install-nodejs.sh.log 2>&1 &
fi
# install languages: Java
if [ "${CRAWLAB_SERVER_LANG_JAVA}" = "Y" ];
then
echo "installing java"
/bin/sh /app/backend/scripts/install-java.sh >> /var/log/install-java.sh.log 2>&1 &
fi
# generate ssh

View File

@@ -1,7 +0,0 @@
# Examples
* frontend 前端镜像制作
* master Master节点镜像制作
* worker Worker节点镜像制作
* run_docker_master.sh 运行Master节点示例脚本
* run_docker_worker.sh 运行Worker节点示例脚本

View File

@@ -1,30 +0,0 @@
FROM node:8.16.0-alpine AS build
ADD ./frontend /app
WORKDIR /app
# install frontend
RUN npm install -g yarn && yarn install
RUN npm run build:prod
FROM alpine:latest
# 安装nginx
RUN mkdir /run/nginx && apk add nginx
# 拷贝编译文件
COPY --from=build /app/dist /app/dist
# 拷贝nginx代理文件
COPY crawlab.conf /etc/nginx/conf.d
# 拷贝执行脚本
COPY docker_init.sh /app/docker_init.sh
# 定义后端API脚本
ENV CRAWLAB_API_ADDRESS http://localhost:8000
EXPOSE 8080
CMD ["/bin/sh", "/app/docker_init.sh"]

View File

@@ -1,5 +0,0 @@
# 前端镜像制作
前端需要手动build拿到编译后的文件然后放入此目录进行镜像构建
容器运行的时候需要指定`CRAWLAB_API_ADDRESS`环境变量用于访问后端API接口

View File

@@ -1,23 +0,0 @@
#!/bin/sh
# replace default api path to new one
if [ "${CRAWLAB_API_ADDRESS}" = "" ];
then
:
else
jspath=`ls /app/dist/js/app.*.js`
sed -i "s?localhost:8000?${CRAWLAB_API_ADDRESS}?g" ${jspath}
fi
# replace base url
if [ "${CRAWLAB_BASE_URL}" = "" ];
then
:
else
indexpath=/app/dist/index.html
sed -i "s?/js/?${CRAWLAB_BASE_URL}/js/?g" ${indexpath}
sed -i "s?/css/?${CRAWLAB_BASE_URL}/css/?g" ${indexpath}
fi
# start nginx
nginx -g 'daemon off;'

View File

@@ -1,20 +0,0 @@
FROM alpine:latest
# 配置工作目录
WORKDIR /opt/crawlab
# 拷贝配置文件
COPY config.yml /opt/crawlab/conf/config.yml
# 拷贝可执行文件
COPY crawlab /usr/local/bin
# 创建spiders文件用于存放爬虫 授权可执行文件
RUN mkdir -p /opt/crawlab/spiders && chmod +x /usr/local/bin/crawlab
# 指定为Master节点
ENV CRAWLAB_SERVER_MASTER Y
EXPOSE 8000
CMD ["crawlab"]

View File

@@ -1,8 +0,0 @@
# Master 节点镜像制作
在Dockerfile里面的二进制包需要手动在源码目录下进行构建然后再放进来
## Linux 二进制包构建
```
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
```

View File

@@ -1,32 +0,0 @@
api:
address: "localhost:8000"
mongo:
host: "192.168.235.26"
port: 27017
db: crawlab_local
username: "root"
password: "example"
authSource: "admin"
redis:
address: 192.168.235.0
password: redis-1.0
database: 29
port: 16379
log:
level: info
path: "/logs/crawlab"
server:
host: 0.0.0.0
port: 8000
master: "Y"
secret: "crawlab"
register:
# mac 或者 ip如果是ip则需要手动指定IP
type: "mac"
ip: "192.168.0.104"
spider:
path: "/spiders"
task:
workers: 4
other:
tmppath: "/tmp"

View File

@@ -1,10 +0,0 @@
docker run -d --restart always --name crawlab \
-e CRAWLAB_REDIS_ADDRESS=192.168.99.1:6379 \
-e CRAWLAB_MONGO_HOST=192.168.99.1 \
-e CRAWLAB_SERVER_MASTER=Y \
-e CRAWLAB_API_ADDRESS=192.168.99.100:8000 \
-e CRAWLAB_SPIDER_PATH=/app/spiders \
-p 8080:8080 \
-p 8000:8000 \
-v /var/logs/crawlab:/var/logs/crawlab \
tikazyq/crawlab:0.3.0

View File

@@ -1,6 +0,0 @@
docker run --restart always --name crawlab \
-e CRAWLAB_REDIS_ADDRESS=192.168.99.1:6379 \
-e CRAWLAB_MONGO_HOST=192.168.99.1 \
-e CRAWLAB_SERVER_MASTER=N \
-v /var/logs/crawlab:/var/logs/crawlab \
tikazyq/crawlab:0.3.0

View File

@@ -1,24 +0,0 @@
FROM ubuntu:latest
ENV DEBIAN_FRONTEND=noninteractive
# 添加依赖描述文件
ADD requirements.txt /opt/crawlab/
# 添加二进制包
ADD crawlab /usr/local/bin/
RUN chmod +x /usr/local/bin/crawlab
# 安装基本环境
RUN apt-get update \
&& apt-get install -y curl git net-tools iputils-ping ntp python3 python3-pip \
&& apt-get clean \
&& ln -s /usr/bin/pip3 /usr/local/bin/pip \
&& ln -s /usr/bin/python3 /usr/local/bin/python
# 安装依赖
RUN pip install -r /opt/crawlab/requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
WORKDIR /opt/crawlab
ENTRYPOINT ["crawlab"]

View File

@@ -1,26 +0,0 @@
# 本地开发环境worker节点制作
由于master和worker节点的存储信息是在redis上并且使用节点所在的mac地址作为key所以在开发本地需要启动master和worker节点会比较麻烦
这里是一个运行worker节点的一个例子
基本思路是worker节点所需的依赖制作成一个镜像然后把crawlab编译成二进制包接着把配置文件和二进制包通过volumes的形式挂载到容器内部
这样就可以正常的运行worker节点了之后对于容器编排的worker节点可以直接把该镜像当成worker节点的基础镜像
### 制作二进制包
`backend`目录下执行以下命令生成二进制包
```
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o crawlab main.go
```
### 构建worker镜像
```
docker build -t crawlab:worker .
```
### 运行worker节点
```
docker-compose up -d
```
如果在多台服务器使用`docker-compose.yml`进行编排可能出现节点注册不上的问题因为mac地址冲突了
可以使用`networks`定义当前节点的IP段这样就可以正常注册到redis

View File

@@ -1,25 +0,0 @@
mongo:
host: 127.0.0.1
port: 27017
db: crawlab_test
username: ""
password: ""
redis:
address: 127.0.0.1
password: ""
database: 1
port: 6379
log:
level: info
path: "/opt/crawlab/logs"
server:
host: 0.0.0.0
port: 8000
master: "N"
secret: "crawlab"
spider:
path: "/opt/crawlab/spiders"
task:
workers: 4
other:
tmppath: "/tmp"

View File

@@ -1,18 +0,0 @@
version: '3'
services:
worker:
image: crawlab:worker
container_name: crawlab-worker
volumes:
- $PWD/conf/config.yml:/opt/crawlab/conf/config.yml
# 二进制包使用源码生成
- $PWD/crawlab:/usr/local/bin/crawlab
networks:
- crawlabnet
networks:
crawlabnet:
ipam:
driver: default
config:
- subnet: 172.30.0.0/16

View File

@@ -1,7 +0,0 @@
geohash2==1.1
Scrapy==1.5.0
APScheduler==3.5.1
fonttools==3.34.2
elasticsearch==5.5.3
requests==2.22.0
pymysql==0.9.3

View File

@@ -1,18 +0,0 @@
server {
gzip on;
gzip_min_length 1k;
gzip_buffers 4 16k;
#gzip_http_version 1.0;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png;
gzip_vary off;
gzip_disable "MSIE [1-6]\.";
listen 8080;
root /app/dist;
index index.html;
location /api/ {
rewrite /api/(.*) /$1 break;
proxy_pass http://localhost:8000/;
}
}

View File

@@ -152,6 +152,7 @@ export default {
methods: {
async getDepList () {
this.loading = true
this.depList = []
const res = await this.$request.get(`/nodes/${this.nodeForm._id}/deps`, {
lang: this.activeLang.executable_name,
dep_name: this.depName
@@ -175,6 +176,7 @@ export default {
},
async getInstalledDepList () {
this.loading = true
this.installedDepList = []
const res = await this.$request.get(`/nodes/${this.nodeForm._id}/deps/installed`, {
lang: this.activeLang.executable_name
})

View File

@@ -0,0 +1,20 @@
<template>
<div class="node-installation-matrix">
</div>
</template>
<script>
export default {
name: 'NodeInstallationMatrix',
props: {
activeTab: {
type: String,
default: ''
}
}
}
</script>
<style scoped>
</style>

View File

@@ -21,6 +21,7 @@ export default {
startTour: (vm, tourName) => {
if (localStorage.getItem('enableTutorial') === '0') return
vm.$tours[tourName].start()
vm.$st.sendEv('教程', '开始', tourName)
},
finishTour: (tourName) => {
let data

View File

@@ -125,7 +125,6 @@ export default {
mounted () {
if (!this.$utils.tour.isFinishedTour('node-detail')) {
this.$utils.tour.startTour(this, 'node-detail')
this.$st.sendEv('教程', '开始', 'node-detail')
}
}
}

View File

@@ -142,6 +142,9 @@
<el-tab-pane :label="$t('Network')">
<node-network :active-tab="activeTab"/>
</el-tab-pane>
<el-tab-pane :label="$t('Installation')">
<node-installation-matrix :active-tab="activeTab"/>
</el-tab-pane>
</el-tabs>
</div>
</template>
@@ -153,10 +156,11 @@ import {
} from 'vuex'
import 'github-markdown-css/github-markdown.css'
import NodeNetwork from '../../components/Node/NodeNetwork'
import NodeInstallationMatrix from '../../components/Node/NodeInstallationMatrix'
export default {
name: 'NodeList',
components: { NodeNetwork },
components: { NodeInstallationMatrix, NodeNetwork },
data () {
return {
pagination: {

View File

@@ -468,7 +468,6 @@ export default {
if (!this.$utils.tour.isFinishedTour('schedule-list-add')) {
setTimeout(() => {
this.$utils.tour.startTour(this, 'schedule-list-add')
this.$st.sendEv('教程', '开始', 'schedule-list-add')
}, 500)
}
},
@@ -618,7 +617,6 @@ export default {
if (!this.isDisabledSpiderSchedule) {
if (!this.$utils.tour.isFinishedTour('schedule-list')) {
this.$utils.tour.startTour(this, 'schedule-list')
this.$st.sendEv('教程', '开始', 'schedule-list')
}
}
}

View File

@@ -315,7 +315,6 @@ export default {
mounted () {
if (!this.$utils.tour.isFinishedTour('setting')) {
this.$utils.tour.startTour(this, 'setting')
this.$st.sendEv('教程', '开始', 'setting')
}
}
}

View File

@@ -209,7 +209,6 @@ export default {
if (!this.$utils.tour.isFinishedTour('spider-detail-config')) {
setTimeout(() => {
this.$utils.tour.startTour(this, 'spider-detail-config')
this.$st.sendEv('教程', '开始', 'spider-detail-config')
}, 100)
}
} else if (this.activeTabName === 'scrapy-settings') {
@@ -261,7 +260,6 @@ export default {
mounted () {
if (!this.$utils.tour.isFinishedTour('spider-detail')) {
this.$utils.tour.startTour(this, 'spider-detail')
this.$st.sendEv('教程', '开始', 'spider-detail')
}
}
}

View File

@@ -888,7 +888,6 @@ export default {
setTimeout(() => {
if (!this.$utils.tour.isFinishedTour('spider-list-add')) {
this.$utils.tour.startTour(this, 'spider-list-add')
this.$st.sendEv('教程', '开始', 'spider-list-add')
}
}, 300)
},
@@ -1242,7 +1241,6 @@ export default {
if (!this.$utils.tour.isFinishedTour('spider-list')) {
this.$utils.tour.startTour(this, 'spider-list')
this.$st.sendEv('教程', '开始', 'spider-list')
}
},
destroyed () {

View File

@@ -203,7 +203,6 @@ export default {
mounted () {
if (!this.$utils.tour.isFinishedTour('task-detail')) {
this.$utils.tour.startTour(this, 'task-detail')
this.$st.sendEv('教程', '开始', 'task-detail')
}
},
destroyed () {

View File

@@ -413,7 +413,6 @@ export default {
if (!this.$utils.tour.isFinishedTour('task-list')) {
this.$utils.tour.startTour(this, 'task-list')
this.$st.sendEv('教程', '开始', 'task-list')
}
},
destroyed () {

View File

@@ -1,32 +0,0 @@
version: '3.3'
services:
master:
image: "tikazyq/crawlab:develop"
environment:
CRAWLAB_API_ADDRESS: "crawlab.cn/dev/api"
CRAWLAB_BASE_URL: "/dev"
CRAWLAB_SERVER_MASTER: "Y"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_LOG_PATH: "/var/logs/crawlab"
ports:
- "8090:8080" # frontend
- "8010:8000" # backend
depends_on:
- mongo
- redis
worker:
image: "tikazyq/crawlab:develop"
environment:
CRAWLAB_SERVER_MASTER: "N"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
depends_on:
- mongo
- redis
mongo:
image: mongo:latest
restart: always
redis:
image: redis:latest
restart: always

View File

@@ -1,51 +0,0 @@
version: '3.3'
services:
master:
image: "tikazyq/crawlab:master"
environment:
CRAWLAB_API_ADDRESS: "https://crawlab.cn/api"
CRAWLAB_BASE_URL: "/demo"
CRAWLAB_SERVER_MASTER: "Y"
CRAWLAB_SERVER_REGISTER_TYPE: "ip"
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.1"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_LOG_PATH: "/var/logs/crawlab"
CRAWLAB_SETTING_ALLOWREGISTER: "Y"
CRAWLAB_SERVER_LANG_NODE: "Y"
ports:
- "8080:8080" # frontend
depends_on:
- mongo
- redis
volumes:
- "/opt/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
worker:
image: "tikazyq/crawlab:master"
environment:
CRAWLAB_SERVER_MASTER: "N"
CRAWLAB_SERVER_REGISTER_TYPE: "ip"
CRAWLAB_SERVER_REGISTER_IP: "172.19.0.2"
CRAWLAB_MONGO_HOST: "mongo"
CRAWLAB_REDIS_ADDRESS: "redis"
CRAWLAB_SERVER_LANG_NODE: "Y"
depends_on:
- mongo
- redis
volumes:
- "/opt/crawlab/log:/var/logs/crawlab" # log persistent 日志持久化
mongo:
image: mongo:latest
restart: always
volumes:
- "/opt/crawlab/mongo/data/db:/data/db"
- "/opt/crawlab/mongo/tmp:/tmp"
redis:
image: redis:latest
restart: always
volumes:
- "/opt/crawlab/redis/data:/data"
splash: # use Splash to run spiders on dynamic pages
image: scrapinghub/splash
# ports:
# - "8050:8050"

View File

@@ -30,6 +30,7 @@ spec:
spec:
containers:
- image: tikazyq/crawlab:latest
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
@@ -38,6 +39,10 @@ spec:
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
# - name: CRAWLAB_SERVER_LANG_NODE
# value: "Y"
# - name: CRAWLAB_SERVER_LANG_JAVA
# value: "Y"
ports:
- containerPort: 8080
name: crawlab

View File

@@ -17,6 +17,7 @@ spec:
spec:
containers:
- image: tikazyq/crawlab:latest
imagePullPolicy: Always
name: crawlab
env:
- name: CRAWLAB_SERVER_MASTER
@@ -25,3 +26,7 @@ spec:
value: "mongo"
- name: CRAWLAB_REDIS_ADDRESS
value: "redis"
# - name: CRAWLAB_SERVER_LANG_NODE
# value: "Y"
# - name: CRAWLAB_SERVER_LANG_JAVA
# value: "Y"

View File

@@ -2,12 +2,17 @@ server {
gzip on;
gzip_min_length 1k;
gzip_buffers 4 16k;
#gzip_http_version 1.0;
gzip_comp_level 2;
gzip_types text/plain application/javascript application/x-javascript text/css application/xml text/javascript application/x-httpd-php image/jpeg image/gif image/png;
gzip_vary off;
gzip_disable "MSIE [1-6]\.";
listen 8080;
root /app/dist;
index index.html;
client_max_body_size 50m;
listen 8080;
root /app/dist;
index index.html;
location /api/ {
rewrite /api/(.*) /$1 break;
proxy_pass http://localhost:8000/;
}
}