mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-25 17:42:25 +01:00
feat: implement task management service operations, stream manager, and worker pool
- Added service_operations.go for task management including run, cancel, and execution logic. - Introduced stream_manager.go to handle task streams and manage cancellation signals. - Created worker_pool.go to manage a bounded pool of workers for executing tasks concurrently. - Implemented graceful shutdown and cleanup mechanisms for task runners and streams. - Enhanced error handling and logging throughout the task management process.
This commit is contained in:
179
core/task/handler/runner_config.go
Normal file
179
core/task/handler/runner_config.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/crawlab-team/crawlab/core/entity"
|
||||
"github.com/crawlab-team/crawlab/core/models/client"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
)
|
||||
|
||||
// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations
|
||||
func (r *Runner) configurePythonPath() {
|
||||
// Configure global node_modules path
|
||||
pyenvRoot := utils.GetPyenvPath()
|
||||
pyenvShimsPath := pyenvRoot + "/shims"
|
||||
pyenvBinPath := pyenvRoot + "/bin"
|
||||
|
||||
// Configure global pyenv path
|
||||
_ = os.Setenv("PYENV_ROOT", pyenvRoot)
|
||||
_ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH"))
|
||||
_ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH"))
|
||||
}
|
||||
|
||||
// configureNodePath sets up the Node.js environment paths, handling both nvm and default installations
|
||||
func (r *Runner) configureNodePath() {
|
||||
// Configure nvm-based Node.js paths
|
||||
envPath := os.Getenv("PATH")
|
||||
|
||||
// Configure global node_modules path
|
||||
nodePath := utils.GetNodeModulesPath()
|
||||
if !strings.Contains(envPath, nodePath) {
|
||||
_ = os.Setenv("PATH", nodePath+":"+envPath)
|
||||
}
|
||||
_ = os.Setenv("NODE_PATH", nodePath)
|
||||
|
||||
// Configure global node_bin path
|
||||
nodeBinPath := utils.GetNodeBinPath()
|
||||
if !strings.Contains(envPath, nodeBinPath) {
|
||||
_ = os.Setenv("PATH", nodeBinPath+":"+os.Getenv("PATH"))
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Runner) configureGoPath() {
|
||||
// Configure global go path
|
||||
goPath := utils.GetGoPath()
|
||||
if goPath != "" {
|
||||
_ = os.Setenv("GOPATH", goPath)
|
||||
}
|
||||
}
|
||||
|
||||
// configureEnv sets up the environment variables for the task process, including:
|
||||
// - Node.js paths
|
||||
// - Crawlab-specific variables
|
||||
// - Global environment variables from the system
|
||||
func (r *Runner) configureEnv() {
|
||||
// Configure Python path
|
||||
r.configurePythonPath()
|
||||
|
||||
// Configure Node.js path
|
||||
r.configureNodePath()
|
||||
|
||||
// Configure Go path
|
||||
r.configureGoPath()
|
||||
|
||||
// Default envs
|
||||
r.cmd.Env = os.Environ()
|
||||
|
||||
// Remove CRAWLAB_ prefixed environment variables
|
||||
for i := 0; i < len(r.cmd.Env); i++ {
|
||||
env := r.cmd.Env[i]
|
||||
if strings.HasPrefix(env, "CRAWLAB_") {
|
||||
r.cmd.Env = append(r.cmd.Env[:i], r.cmd.Env[i+1:]...)
|
||||
i--
|
||||
}
|
||||
}
|
||||
|
||||
// Task-specific environment variables
|
||||
r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex())
|
||||
|
||||
// Global environment variables
|
||||
envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil)
|
||||
if err != nil {
|
||||
r.Errorf("failed to get environments: %v", err)
|
||||
}
|
||||
for _, env := range envs {
|
||||
r.cmd.Env = append(r.cmd.Env, env.Key+"="+env.Value)
|
||||
}
|
||||
|
||||
// Add environment variable for child processes to identify they're running under Crawlab
|
||||
r.cmd.Env = append(r.cmd.Env, "CRAWLAB_PARENT_PID="+fmt.Sprint(os.Getpid()))
|
||||
}
|
||||
|
||||
// configureCwd sets the working directory for the task based on the spider's configuration
|
||||
func (r *Runner) configureCwd() {
|
||||
workspacePath := utils.GetWorkspace()
|
||||
if r.s.GitId.IsZero() {
|
||||
// not git
|
||||
r.cwd = filepath.Join(workspacePath, r.s.Id.Hex())
|
||||
} else {
|
||||
// git
|
||||
r.cwd = filepath.Join(workspacePath, r.s.GitId.Hex(), r.s.GitRootPath)
|
||||
}
|
||||
}
|
||||
|
||||
// configureCmd builds and configures the command to be executed, including setting up IPC pipes
|
||||
// and processing command parameters
|
||||
func (r *Runner) configureCmd() (err error) {
|
||||
var cmdStr string
|
||||
|
||||
// command
|
||||
if r.t.Cmd == "" {
|
||||
cmdStr = r.s.Cmd
|
||||
} else {
|
||||
cmdStr = r.t.Cmd
|
||||
}
|
||||
|
||||
// parameters
|
||||
if r.t.Param != "" {
|
||||
cmdStr += " " + r.t.Param
|
||||
} else if r.s.Param != "" {
|
||||
cmdStr += " " + r.s.Param
|
||||
}
|
||||
|
||||
// get cmd instance
|
||||
r.cmd, err = utils.BuildCmd(cmdStr)
|
||||
if err != nil {
|
||||
r.Errorf("error building command: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// set working directory
|
||||
r.cmd.Dir = r.cwd
|
||||
|
||||
// ZOMBIE PREVENTION: Set process group to enable proper cleanup of child processes
|
||||
if runtime.GOOS != "windows" {
|
||||
// Create new process group on Unix systems to ensure child processes can be killed together
|
||||
r.cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setpgid: true, // Create new process group
|
||||
Pgid: 0, // Use process ID as process group ID
|
||||
}
|
||||
}
|
||||
|
||||
// Configure pipes for IPC and logs
|
||||
r.stdinPipe, err = r.cmd.StdinPipe()
|
||||
if err != nil {
|
||||
r.Errorf("error creating stdin pipe: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Add stdout pipe for IPC and logs
|
||||
r.stdoutPipe, err = r.cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
r.Errorf("error creating stdout pipe: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Add stderr pipe for error logs
|
||||
stderrPipe, err := r.cmd.StderrPipe()
|
||||
if err != nil {
|
||||
r.Errorf("error creating stderr pipe: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Create buffered readers
|
||||
r.readerStdout = bufio.NewReader(r.stdoutPipe)
|
||||
r.readerStderr = bufio.NewReader(stderrPipe)
|
||||
|
||||
// Initialize IPC channel
|
||||
r.ipcChan = make(chan entity.IPCMessage)
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user