added system tasks

This commit is contained in:
marvzhang
2020-07-31 13:13:49 +08:00
parent 0205dc5f11
commit 74f7f59036
13 changed files with 118 additions and 66 deletions

View File

@@ -5,6 +5,7 @@ import (
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
@@ -515,3 +516,19 @@ func UpdateTaskErrorLogs(taskId string, errorRegexPattern string) error {
return nil
}
func GetTaskByFilter(filter bson.M) (t Task, err error) {
s, c := database.GetCol("tasks")
defer s.Close()
if err := c.Find(filter).One(&t); err != nil {
if err != mgo.ErrNotFound {
log.Errorf("find task by filter error: " + err.Error())
debug.PrintStack()
return t, err
}
return t, err
}
return t, nil
}

View File

@@ -1,26 +1,28 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock
# lock
touch /tmp/install-chromedriver.lock
export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install \
unzip \
&& \
DL=https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& curl -sL "$DL" > /tmp/chrome.deb \
&& apt install --no-install-recommends --no-install-suggests -y \
/tmp/chrome.deb \
&& CHROMIUM_FLAGS='--no-sandbox --disable-dev-shm-usage' \
&& sed -i '${s/$/'" $CHROMIUM_FLAGS"'/}' /opt/google/chrome/google-chrome \
&& BASE_URL=https://chromedriver.storage.googleapis.com \
&& VERSION=$(curl -sL "$BASE_URL/LATEST_RELEASE") \
&& curl -sL "$BASE_URL/$VERSION/chromedriver_linux64.zip" -o /tmp/driver.zip \
&& unzip /tmp/driver.zip \
&& chmod 755 chromedriver \
&& mv chromedriver /usr/local/bin
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install unzip
DL=https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
curl -sL "$DL" > /tmp/chrome.deb
apt install --no-install-recommends --no-install-suggests -y /tmp/chrome.deb
CHROMIUM_FLAGS='--no-sandbox --disable-dev-shm-usage'
sed -i '${s/$/'" $CHROMIUM_FLAGS"'/}' /opt/google/chrome/google-chrome
BASE_URL=https://chromedriver.storage.googleapis.com
VERSION=$(curl -sL "$BASE_URL/LATEST_RELEASE")
curl -sL "$BASE_URL/$VERSION/chromedriver_linux64.zip" -o /tmp/driver.zip
unzip /tmp/driver.zip
chmod 755 chromedriver
mv chromedriver /usr/local/bin
# unlock global
rm /tmp/install.lock

View File

@@ -1,3 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock

View File

@@ -1,3 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock

View File

@@ -1,4 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock

View File

@@ -1,5 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock
@@ -7,9 +10,9 @@ touch /tmp/install.lock
touch /tmp/install-java.lock
# install java
apt-get clean && \
apt-get update --fix-missing && \
apt-get install -y --fix-missing default-jdk
apt-get clean
apt-get update --fix-missing
apt-get install -y --fix-missing default-jdk
ln -s /usr/bin/java /usr/local/bin/java
# unlock

View File

@@ -1,5 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock
@@ -20,7 +23,7 @@ apt-get update && apt-get install -yq libgconf-2-4
# Note: this installs the necessary libs to make the bundled version
# of Chromium that Puppeteer
# installs, work.
RUN apt-get update \
apt-get update \
&& apt-get install -y wget gnupg \
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \

View File

@@ -1,4 +1,7 @@
#!/usr/bin/env bash
#!/bin/bash
# fail immediately if error
set -e
{ # this ensures the entire script is downloaded #

View File

@@ -1,3 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# lock global
touch /tmp/install.lock

View File

@@ -1,5 +1,8 @@
#!/bin/bash
# fail immediately if error
set -e
# install node.js
if [ "${CRAWLAB_SERVER_LANG_NODE}" = "Y" ];
then

View File

@@ -48,17 +48,17 @@ func GetLangLocal(lang entity.Lang) entity.Lang {
}
}
// 检查是否正在安装
if utils.Exists(lang.LockPath) {
lang.InstallStatus = constants.InstallStatusInstalling
return lang
}
// 检查其他语言是否在安装
if utils.Exists("/tmp/install.lock") {
lang.InstallStatus = constants.InstallStatusInstallingOther
return lang
}
//// 检查是否正在安装
//if utils.Exists(lang.LockPath) {
// lang.InstallStatus = constants.InstallStatusInstalling
// return lang
//}
//
//// 检查其他语言是否在安装
//if utils.Exists("/tmp/install.lock") {
// lang.InstallStatus = constants.InstallStatusInstallingOther
// return lang
//}
lang.InstallStatus = constants.InstallStatusNotInstalled
return lang

View File

@@ -5,12 +5,15 @@ import (
"crawlab/database"
"crawlab/entity"
"crawlab/lib/cron"
"crawlab/model"
"crawlab/services/rpc"
"crawlab/utils"
"encoding/json"
"errors"
"fmt"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"github.com/imroc/req"
"os/exec"
"regexp"
@@ -73,6 +76,22 @@ func GetLangList(nodeId string) []entity.Lang {
// 获取语言安装状态
func GetLangInstallStatus(nodeId string, lang entity.Lang) (string, error) {
_, err := model.GetTaskByFilter(bson.M{
"node_id": nodeId,
"cmd": fmt.Sprintf("sh %s", utils.GetSystemScriptPath(lang.InstallScript)),
"status": bson.M{
"$in": []string{constants.StatusPending, constants.StatusRunning},
},
})
if err == nil {
// 任务正在运行,正在安装
return constants.InstallStatusInstalling, nil
}
if err != mgo.ErrNotFound {
// 发生错误
return "", err
}
// 获取状态
if IsMasterNode(nodeId) {
lang := rpc.GetLangLocal(lang)
return lang.InstallStatus, nil

View File

@@ -408,37 +408,17 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider, u
if err := WaitTaskProcess(cmd, t, s); err != nil {
return err
}
ch <- constants.TaskFinish
return nil
}
// 生成日志目录
func MakeLogDir(t model.Task) (fileDir string, err error) {
// 日志目录
fileDir = filepath.Join(viper.GetString("log.path"), t.SpiderId.Hex())
// 如果日志目录不存在,生成该目录
if !utils.Exists(fileDir) {
if err := os.MkdirAll(fileDir, 0777); err != nil {
log.Errorf("execute task, make log dir error: %s", err.Error())
debug.PrintStack()
return "", err
}
// 如果返回值不为0返回错误
returnCode := cmd.ProcessState.ExitCode()
if returnCode != 0 {
log.Errorf(fmt.Sprintf("task returned code not zero: %d", returnCode))
debug.PrintStack()
return errors.New(fmt.Sprintf("task returned code not zero: %d", returnCode))
}
return fileDir, nil
}
// 获取日志文件路径
func GetLogFilePaths(fileDir string, t model.Task) (filePath string) {
// 时间戳
ts := time.Now()
tsStr := ts.Format("20060102150405")
// stdout日志文件
filePath = filepath.Join(fileDir, t.Id+"_"+tsStr+".log")
return filePath
ch <- constants.TaskFinish
return nil
}
// 生成执行任务方法
@@ -695,11 +675,13 @@ func ExecuteTask(id int) {
func FinishUpTask(s model.Spider, t model.Task) {
// 更新任务结果数
go func() {
if err := model.UpdateTaskResultCount(t.Id); err != nil {
return
}
}()
if t.Type == constants.TaskTypeSpider {
go func() {
if err := model.UpdateTaskResultCount(t.Id); err != nil {
return
}
}()
}
// 更新任务错误日志
go func() {
@@ -814,6 +796,7 @@ func RestartTask(id string, uid bson.ObjectId) (err error) {
newTask := model.Task{
SpiderId: oldTask.SpiderId,
NodeId: oldTask.NodeId,
Cmd: oldTask.Cmd,
Param: oldTask.Param,
UserId: uid,
RunType: oldTask.RunType,