diff --git a/CHANGELOG.md b/CHANGELOG.md index 6dd14e2c..0973aa8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.4.2 (unknown) +### Features / Enhancement +- **Disclaimer**. Added page for Disclaimer. + # 0.4.1 (2019-12-13) ### Features / Enhancement - **Spiderfile Optimization**. Stages changed from dictionary to array. [#358](https://github.com/crawlab-team/crawlab/issues/358) diff --git a/Dockerfile b/Dockerfile index ddb4d47e..390ece66 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,4 +56,4 @@ EXPOSE 8080 EXPOSE 8000 # start backend -CMD ["/bin/sh", "/app/docker_init.sh"] +CMD ["/bin/sh", "./wait-for-it.sh", "mongo:27017", "--", "/bin/sh", "/app/docker_init.sh"] diff --git a/backend/model/node.go b/backend/model/node.go index a24b36e3..d662ab6d 100644 --- a/backend/model/node.go +++ b/backend/model/node.go @@ -143,6 +143,7 @@ func (n *Node) GetTasks() ([]Task, error) { return tasks, nil } +// 节点列表 func GetNodeList(filter interface{}) ([]Node, error) { s, c := database.GetCol("nodes") defer s.Close() @@ -156,6 +157,7 @@ func GetNodeList(filter interface{}) ([]Node, error) { return results, nil } +// 节点信息 func GetNode(id bson.ObjectId) (Node, error) { var node Node @@ -176,6 +178,7 @@ func GetNode(id bson.ObjectId) (Node, error) { return node, nil } +// 节点信息 func GetNodeByKey(key string) (Node, error) { s, c := database.GetCol("nodes") defer s.Close() @@ -191,6 +194,7 @@ func GetNodeByKey(key string) (Node, error) { return node, nil } +// 更新节点 func UpdateNode(id bson.ObjectId, item Node) error { s, c := database.GetCol("nodes") defer s.Close() @@ -206,6 +210,7 @@ func UpdateNode(id bson.ObjectId, item Node) error { return nil } +// 任务列表 func GetNodeTaskList(id bson.ObjectId) ([]Task, error) { node, err := GetNode(id) if err != nil { @@ -218,6 +223,7 @@ func GetNodeTaskList(id bson.ObjectId) ([]Task, error) { return tasks, nil } +// 节点数 func GetNodeCount(query interface{}) (int, error) { s, c := database.GetCol("nodes") defer s.Close() diff --git a/backend/services/log.go b/backend/services/log.go index 60909c61..2034794d 100644 --- a/backend/services/log.go +++ b/backend/services/log.go @@ -49,10 +49,8 @@ func GetRemoteLog(task model.Task) (logStr string, err error) { select { case logStr = <-ch: log.Infof("get remote log") - break case <-time.After(30 * time.Second): logStr = "get remote log timeout" - break } return logStr, nil diff --git a/backend/services/node.go b/backend/services/node.go index 36934746..e6c2ac08 100644 --- a/backend/services/node.go +++ b/backend/services/node.go @@ -50,36 +50,44 @@ func GetNodeData() (Data, error) { return data, err } +func GetRedisNode(key string) (*Data, error) { + // 获取节点数据 + value, err := database.RedisClient.HGet("nodes", key) + if err != nil { + log.Errorf(err.Error()) + return nil, err + } + + // 解析节点列表数据 + var data Data + if err := json.Unmarshal([]byte(value), &data); err != nil { + log.Errorf(err.Error()) + return nil, err + } + return &data, nil +} + // 更新所有节点状态 func UpdateNodeStatus() { // 从Redis获取节点keys list, err := database.RedisClient.HKeys("nodes") if err != nil { - log.Errorf(err.Error()) + log.Errorf("get redis node keys error: %s", err.Error()) return } // 遍历节点keys for _, key := range list { - // 获取节点数据 - value, err := database.RedisClient.HGet("nodes", key) + + data, err := GetRedisNode(key) if err != nil { - log.Errorf(err.Error()) - return + continue } - - // 解析节点列表数据 - var data Data - if err := json.Unmarshal([]byte(value), &data); err != nil { - log.Errorf(err.Error()) - return - } - // 如果记录的更新时间超过60秒,该节点被认为离线 if time.Now().Unix()-data.UpdateTsUnix > 60 { // 在Redis中删除该节点 if err := database.RedisClient.HDel("nodes", data.Key); err != nil { - log.Errorf(err.Error()) + log.Errorf("delete redis node key error:%s, key:%s", err.Error(), data.Key) } continue } @@ -94,7 +102,8 @@ func UpdateNodeStatus() { model.ResetNodeStatusToOffline(list) } -func handleNodeInfo(key string, data Data) { +// 处理接到信息 +func handleNodeInfo(key string, data *Data) { // 添加同步锁 v, err := database.RedisClient.Lock(key) if err != nil { diff --git a/docker-compose.yml b/docker-compose.yml index 270c986c..3ed6c76a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,13 +4,13 @@ services: image: tikazyq/crawlab:latest container_name: master environment: - CRAWLAB_API_ADDRESS: "http://localhost:8000" - CRAWLAB_SERVER_MASTER: "Y" - CRAWLAB_MONGO_HOST: "mongo" - CRAWLAB_REDIS_ADDRESS: "redis" + CRAWLAB_API_ADDRESS: "http://localhost:8000" # backend API address 后端 API 地址,设置为 http://<宿主机IP>:<端口>,端口为映射出来的端口 + CRAWLAB_SERVER_MASTER: "Y" # whether to be master node 是否为主节点,主节点为 Y,工作节点为 N + CRAWLAB_MONGO_HOST: "mongo" # MongoDB host address MongoDB 的地址,在 docker compose 网络中,直接引用服务名称 + CRAWLAB_REDIS_ADDRESS: "redis" # Redis host address Redis 的地址,在 docker compose 网络中,直接引用服务名称 ports: - - "8080:8080" # frontend - - "8000:8000" # backend + - "8080:8080" # frontend port mapping 前端端口映射 + - "8000:8000" # backend port mapping 后端端口映射 depends_on: - mongo - redis @@ -27,14 +27,14 @@ services: mongo: image: mongo:latest restart: always - volumes: - - "/opt/crawlab/mongo/data/db:/data/db" - ports: - - "27017:27017" + # volumes: + # - "/opt/crawlab/mongo/data/db:/data/db" # make data persistent 持久化 + # ports: + # - "27017:27017" # expose port to host machine 暴露接口到宿主机 redis: image: redis:latest restart: always - volumes: - - "/opt/crawlab/redis/data:/data" - ports: - - "6379:6379" \ No newline at end of file + # volumes: + # - "/opt/crawlab/redis/data:/data" # make data persistent 持久化 + # ports: + # - "6379:6379" # expose port to host machine 暴露接口到宿主机 \ No newline at end of file diff --git a/wait-for-it.sh b/wait-for-it.sh new file mode 100755 index 00000000..607a7d67 --- /dev/null +++ b/wait-for-it.sh @@ -0,0 +1,178 @@ +#!/usr/bin/env bash +# Use this script to test if a given TCP host/port are available + +WAITFORIT_cmdname=${0##*/} + +echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } + +usage() +{ + cat << USAGE >&2 +Usage: + $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] + -h HOST | --host=HOST Host or IP under test + -p PORT | --port=PORT TCP port under test + Alternatively, you specify the host and port as host:port + -s | --strict Only execute subcommand if the test succeeds + -q | --quiet Don't output any status messages + -t TIMEOUT | --timeout=TIMEOUT + Timeout in seconds, zero for no timeout + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit 1 +} + +wait_for() +{ + if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then + echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" + else + echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout" + fi + WAITFORIT_start_ts=$(date +%s) + while : + do + if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then + nc -z $WAITFORIT_HOST $WAITFORIT_PORT + WAITFORIT_result=$? + else + (echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 + WAITFORIT_result=$? + fi + if [[ $WAITFORIT_result -eq 0 ]]; then + WAITFORIT_end_ts=$(date +%s) + echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" + break + fi + sleep 1 + done + return $WAITFORIT_result +} + +wait_for_wrapper() +{ + # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 + if [[ $WAITFORIT_QUIET -eq 1 ]]; then + timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & + else + timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & + fi + WAITFORIT_PID=$! + trap "kill -INT -$WAITFORIT_PID" INT + wait $WAITFORIT_PID + WAITFORIT_RESULT=$? + if [[ $WAITFORIT_RESULT -ne 0 ]]; then + echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" + fi + return $WAITFORIT_RESULT +} + +# process arguments +while [[ $# -gt 0 ]] +do + case "$1" in + *:* ) + WAITFORIT_hostport=(${1//:/ }) + WAITFORIT_HOST=${WAITFORIT_hostport[0]} + WAITFORIT_PORT=${WAITFORIT_hostport[1]} + shift 1 + ;; + --child) + WAITFORIT_CHILD=1 + shift 1 + ;; + -q | --quiet) + WAITFORIT_QUIET=1 + shift 1 + ;; + -s | --strict) + WAITFORIT_STRICT=1 + shift 1 + ;; + -h) + WAITFORIT_HOST="$2" + if [[ $WAITFORIT_HOST == "" ]]; then break; fi + shift 2 + ;; + --host=*) + WAITFORIT_HOST="${1#*=}" + shift 1 + ;; + -p) + WAITFORIT_PORT="$2" + if [[ $WAITFORIT_PORT == "" ]]; then break; fi + shift 2 + ;; + --port=*) + WAITFORIT_PORT="${1#*=}" + shift 1 + ;; + -t) + WAITFORIT_TIMEOUT="$2" + if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi + shift 2 + ;; + --timeout=*) + WAITFORIT_TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + WAITFORIT_CLI=("$@") + break + ;; + --help) + usage + ;; + *) + echoerr "Unknown argument: $1" + usage + ;; + esac +done + +if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then + echoerr "Error: you need to provide a host and port to test." + usage +fi + +WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} +WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} +WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} +WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} + +# check to see if timeout is from busybox? +WAITFORIT_TIMEOUT_PATH=$(type -p timeout) +WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) +if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then + WAITFORIT_ISBUSY=1 + WAITFORIT_BUSYTIMEFLAG="-t" + +else + WAITFORIT_ISBUSY=0 + WAITFORIT_BUSYTIMEFLAG="" +fi + +if [[ $WAITFORIT_CHILD -gt 0 ]]; then + wait_for + WAITFORIT_RESULT=$? + exit $WAITFORIT_RESULT +else + if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then + wait_for_wrapper + WAITFORIT_RESULT=$? + else + wait_for + WAITFORIT_RESULT=$? + fi +fi + +if [[ $WAITFORIT_CLI != "" ]]; then + if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then + echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" + exit $WAITFORIT_RESULT + fi + exec "${WAITFORIT_CLI[@]}" +else + exit $WAITFORIT_RESULT +fi \ No newline at end of file