mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
added system tasks
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"crawlab/database"
|
||||
"crawlab/utils"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
@@ -515,3 +516,19 @@ func UpdateTaskErrorLogs(taskId string, errorRegexPattern string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetTaskByFilter(filter bson.M) (t Task, err error) {
|
||||
s, c := database.GetCol("tasks")
|
||||
defer s.Close()
|
||||
|
||||
if err := c.Find(filter).One(&t); err != nil {
|
||||
if err != mgo.ErrNotFound {
|
||||
log.Errorf("find task by filter error: " + err.Error())
|
||||
debug.PrintStack()
|
||||
return t, err
|
||||
}
|
||||
return t, err
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
@@ -1,26 +1,28 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
# lock
|
||||
touch /tmp/install-chromedriver.lock
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive \
|
||||
&& apt-get update \
|
||||
&& apt-get install \
|
||||
unzip \
|
||||
&& \
|
||||
DL=https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
|
||||
&& curl -sL "$DL" > /tmp/chrome.deb \
|
||||
&& apt install --no-install-recommends --no-install-suggests -y \
|
||||
/tmp/chrome.deb \
|
||||
&& CHROMIUM_FLAGS='--no-sandbox --disable-dev-shm-usage' \
|
||||
&& sed -i '${s/$/'" $CHROMIUM_FLAGS"'/}' /opt/google/chrome/google-chrome \
|
||||
&& BASE_URL=https://chromedriver.storage.googleapis.com \
|
||||
&& VERSION=$(curl -sL "$BASE_URL/LATEST_RELEASE") \
|
||||
&& curl -sL "$BASE_URL/$VERSION/chromedriver_linux64.zip" -o /tmp/driver.zip \
|
||||
&& unzip /tmp/driver.zip \
|
||||
&& chmod 755 chromedriver \
|
||||
&& mv chromedriver /usr/local/bin
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
apt-get update
|
||||
apt-get install unzip
|
||||
DL=https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||
curl -sL "$DL" > /tmp/chrome.deb
|
||||
apt install --no-install-recommends --no-install-suggests -y /tmp/chrome.deb
|
||||
CHROMIUM_FLAGS='--no-sandbox --disable-dev-shm-usage'
|
||||
sed -i '${s/$/'" $CHROMIUM_FLAGS"'/}' /opt/google/chrome/google-chrome
|
||||
BASE_URL=https://chromedriver.storage.googleapis.com
|
||||
VERSION=$(curl -sL "$BASE_URL/LATEST_RELEASE")
|
||||
curl -sL "$BASE_URL/$VERSION/chromedriver_linux64.zip" -o /tmp/driver.zip
|
||||
unzip /tmp/driver.zip
|
||||
chmod 755 chromedriver
|
||||
mv chromedriver /usr/local/bin
|
||||
|
||||
# unlock global
|
||||
rm /tmp/install.lock
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
@@ -7,9 +10,9 @@ touch /tmp/install.lock
|
||||
touch /tmp/install-java.lock
|
||||
|
||||
# install java
|
||||
apt-get clean && \
|
||||
apt-get update --fix-missing && \
|
||||
apt-get install -y --fix-missing default-jdk
|
||||
apt-get clean
|
||||
apt-get update --fix-missing
|
||||
apt-get install -y --fix-missing default-jdk
|
||||
ln -s /usr/bin/java /usr/local/bin/java
|
||||
|
||||
# unlock
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
@@ -20,7 +23,7 @@ apt-get update && apt-get install -yq libgconf-2-4
|
||||
# Note: this installs the necessary libs to make the bundled version
|
||||
# of Chromium that Puppeteer
|
||||
# installs, work.
|
||||
RUN apt-get update \
|
||||
apt-get update \
|
||||
&& apt-get install -y wget gnupg \
|
||||
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
|
||||
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
{ # this ensures the entire script is downloaded #
|
||||
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# lock global
|
||||
touch /tmp/install.lock
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# fail immediately if error
|
||||
set -e
|
||||
|
||||
# install node.js
|
||||
if [ "${CRAWLAB_SERVER_LANG_NODE}" = "Y" ];
|
||||
then
|
||||
|
||||
@@ -48,17 +48,17 @@ func GetLangLocal(lang entity.Lang) entity.Lang {
|
||||
}
|
||||
}
|
||||
|
||||
// 检查是否正在安装
|
||||
if utils.Exists(lang.LockPath) {
|
||||
lang.InstallStatus = constants.InstallStatusInstalling
|
||||
return lang
|
||||
}
|
||||
|
||||
// 检查其他语言是否在安装
|
||||
if utils.Exists("/tmp/install.lock") {
|
||||
lang.InstallStatus = constants.InstallStatusInstallingOther
|
||||
return lang
|
||||
}
|
||||
//// 检查是否正在安装
|
||||
//if utils.Exists(lang.LockPath) {
|
||||
// lang.InstallStatus = constants.InstallStatusInstalling
|
||||
// return lang
|
||||
//}
|
||||
//
|
||||
//// 检查其他语言是否在安装
|
||||
//if utils.Exists("/tmp/install.lock") {
|
||||
// lang.InstallStatus = constants.InstallStatusInstallingOther
|
||||
// return lang
|
||||
//}
|
||||
|
||||
lang.InstallStatus = constants.InstallStatusNotInstalled
|
||||
return lang
|
||||
|
||||
@@ -5,12 +5,15 @@ import (
|
||||
"crawlab/database"
|
||||
"crawlab/entity"
|
||||
"crawlab/lib/cron"
|
||||
"crawlab/model"
|
||||
"crawlab/services/rpc"
|
||||
"crawlab/utils"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/apex/log"
|
||||
"github.com/globalsign/mgo"
|
||||
"github.com/globalsign/mgo/bson"
|
||||
"github.com/imroc/req"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
@@ -73,6 +76,22 @@ func GetLangList(nodeId string) []entity.Lang {
|
||||
|
||||
// 获取语言安装状态
|
||||
func GetLangInstallStatus(nodeId string, lang entity.Lang) (string, error) {
|
||||
_, err := model.GetTaskByFilter(bson.M{
|
||||
"node_id": nodeId,
|
||||
"cmd": fmt.Sprintf("sh %s", utils.GetSystemScriptPath(lang.InstallScript)),
|
||||
"status": bson.M{
|
||||
"$in": []string{constants.StatusPending, constants.StatusRunning},
|
||||
},
|
||||
})
|
||||
if err == nil {
|
||||
// 任务正在运行,正在安装
|
||||
return constants.InstallStatusInstalling, nil
|
||||
}
|
||||
if err != mgo.ErrNotFound {
|
||||
// 发生错误
|
||||
return "", err
|
||||
}
|
||||
// 获取状态
|
||||
if IsMasterNode(nodeId) {
|
||||
lang := rpc.GetLangLocal(lang)
|
||||
return lang.InstallStatus, nil
|
||||
|
||||
@@ -408,37 +408,17 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider, u
|
||||
if err := WaitTaskProcess(cmd, t, s); err != nil {
|
||||
return err
|
||||
}
|
||||
ch <- constants.TaskFinish
|
||||
return nil
|
||||
}
|
||||
|
||||
// 生成日志目录
|
||||
func MakeLogDir(t model.Task) (fileDir string, err error) {
|
||||
// 日志目录
|
||||
fileDir = filepath.Join(viper.GetString("log.path"), t.SpiderId.Hex())
|
||||
|
||||
// 如果日志目录不存在,生成该目录
|
||||
if !utils.Exists(fileDir) {
|
||||
if err := os.MkdirAll(fileDir, 0777); err != nil {
|
||||
log.Errorf("execute task, make log dir error: %s", err.Error())
|
||||
debug.PrintStack()
|
||||
return "", err
|
||||
}
|
||||
// 如果返回值不为0,返回错误
|
||||
returnCode := cmd.ProcessState.ExitCode()
|
||||
if returnCode != 0 {
|
||||
log.Errorf(fmt.Sprintf("task returned code not zero: %d", returnCode))
|
||||
debug.PrintStack()
|
||||
return errors.New(fmt.Sprintf("task returned code not zero: %d", returnCode))
|
||||
}
|
||||
|
||||
return fileDir, nil
|
||||
}
|
||||
|
||||
// 获取日志文件路径
|
||||
func GetLogFilePaths(fileDir string, t model.Task) (filePath string) {
|
||||
// 时间戳
|
||||
ts := time.Now()
|
||||
tsStr := ts.Format("20060102150405")
|
||||
|
||||
// stdout日志文件
|
||||
filePath = filepath.Join(fileDir, t.Id+"_"+tsStr+".log")
|
||||
|
||||
return filePath
|
||||
ch <- constants.TaskFinish
|
||||
return nil
|
||||
}
|
||||
|
||||
// 生成执行任务方法
|
||||
@@ -695,11 +675,13 @@ func ExecuteTask(id int) {
|
||||
|
||||
func FinishUpTask(s model.Spider, t model.Task) {
|
||||
// 更新任务结果数
|
||||
go func() {
|
||||
if err := model.UpdateTaskResultCount(t.Id); err != nil {
|
||||
return
|
||||
}
|
||||
}()
|
||||
if t.Type == constants.TaskTypeSpider {
|
||||
go func() {
|
||||
if err := model.UpdateTaskResultCount(t.Id); err != nil {
|
||||
return
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// 更新任务错误日志
|
||||
go func() {
|
||||
@@ -814,6 +796,7 @@ func RestartTask(id string, uid bson.ObjectId) (err error) {
|
||||
newTask := model.Task{
|
||||
SpiderId: oldTask.SpiderId,
|
||||
NodeId: oldTask.NodeId,
|
||||
Cmd: oldTask.Cmd,
|
||||
Param: oldTask.Param,
|
||||
UserId: uid,
|
||||
RunType: oldTask.RunType,
|
||||
|
||||
Reference in New Issue
Block a user