From 7caa0127dcb60e2f6ad8bc9b1fa908c44c8cbfef Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Thu, 10 Apr 2025 17:27:52 +0800 Subject: [PATCH 1/8] fix(config): ensure workspace directory is created if it does not exist --- core/utils/config.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/utils/config.go b/core/utils/config.go index b35ae9e0..c0455681 100644 --- a/core/utils/config.go +++ b/core/utils/config.go @@ -5,6 +5,7 @@ import ( "github.com/gin-gonic/gin" "github.com/mitchellh/go-homedir" "github.com/spf13/viper" + "os" "path/filepath" "strings" ) @@ -92,6 +93,12 @@ func GetWorkspace() string { if res := viper.GetString("workspace"); res != "" { return res } + if !Exists(filepath.Join(homedirPath, DefaultWorkspace)) { + err := os.MkdirAll(filepath.Join(homedirPath, DefaultWorkspace), os.ModePerm) + if err != nil { + logger.Warnf("cannot create workspace directory: %v", err) + } + } return filepath.Join(homedirPath, DefaultWorkspace) } From e534a12a11bc587f17775ecd270df31cc1bba538 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 15 Apr 2025 21:00:53 +0800 Subject: [PATCH 2/8] feat: enhance Node.js environment configuration in task runner - Added a new utility function GetNodeBinPath to retrieve the global node_bin path from configuration, with a default fallback. - Updated configureNodePath method to set the PATH environment variable to include the node_bin path if it's not already present, improving the task runner's environment setup for Node.js development. --- core/task/handler/runner.go | 6 ++++++ core/utils/config.go | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index ef61a2e0..9baf7574 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -364,6 +364,12 @@ func (r *Runner) configureNodePath() { _ = os.Setenv("PATH", nodePath+":"+envPath) } _ = os.Setenv("NODE_PATH", nodePath) + + // Configure global node_bin path + nodeBinPath := utils.GetNodeBinPath() + if !strings.Contains(envPath, nodeBinPath) { + _ = os.Setenv("PATH", nodeBinPath+":"+envPath) + } } func (r *Runner) configureGoPath() { diff --git a/core/utils/config.go b/core/utils/config.go index c0455681..2ad35b83 100644 --- a/core/utils/config.go +++ b/core/utils/config.go @@ -33,6 +33,7 @@ const ( MetadataConfigName = "config.json" DefaultPyenvPath = "/root/.pyenv" DefaultNodeModulesPath = "/usr/lib/node_modules" + DefaultNodeBinPath = "/usr/lib/bin" DefaultGoPath = "/root/go" ) @@ -271,6 +272,13 @@ func GetNodeModulesPath() string { return DefaultNodeModulesPath } +func GetNodeBinPath() string { + if res := viper.GetString("install.node.bin"); res != "" { + return res + } + return DefaultNodeBinPath +} + func GetGoPath() string { if res := viper.GetString("install.go.path"); res != "" { return res From cfc5723c2027291684827844e802ca76f24081ff Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 15 Apr 2025 22:24:52 +0800 Subject: [PATCH 3/8] fix: correct PATH environment variable configuration in task runner - Updated the configureNodePath method to properly set the PATH environment variable by concatenating node_bin and node_modules paths, ensuring the correct environment setup for Node.js development. --- core/task/handler/runner.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index 9baf7574..b6ca0850 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -361,15 +361,17 @@ func (r *Runner) configureNodePath() { // Configure global node_modules path nodePath := utils.GetNodeModulesPath() if !strings.Contains(envPath, nodePath) { - _ = os.Setenv("PATH", nodePath+":"+envPath) + envPath = nodePath + ":" + envPath } _ = os.Setenv("NODE_PATH", nodePath) // Configure global node_bin path nodeBinPath := utils.GetNodeBinPath() if !strings.Contains(envPath, nodeBinPath) { - _ = os.Setenv("PATH", nodeBinPath+":"+envPath) + envPath = nodeBinPath + ":" + envPath } + + _ = os.Setenv("PATH", envPath) } func (r *Runner) configureGoPath() { From 7e3378c7ef9f2aa6779e99f2d9bdff0d25adb940 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 15 Apr 2025 22:52:33 +0800 Subject: [PATCH 4/8] refactor: improve environment variable configuration in task runner - Updated the configurePythonPath and configureNodePath methods to append environment variables directly to r.cmd.Env instead of using os.Setenv, enhancing the clarity and maintainability of the environment setup. - Ensured the PATH variable is updated correctly by checking for existing paths before appending, preventing duplicates. - Moved default environment variable initialization to the configureEnv method for better organization. --- core/task/handler/runner.go | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index b6ca0850..bfe97a55 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -342,20 +342,30 @@ func (r *Runner) startHealthCheck() { // configurePythonPath sets up the Python environment paths, handling both pyenv and default installations func (r *Runner) configurePythonPath() { + // Get current PATH + envPath := os.Getenv("PATH") + // Configure global node_modules path pyenvRoot := utils.GetPyenvPath() pyenvShimsPath := pyenvRoot + "/shims" pyenvBinPath := pyenvRoot + "/bin" // Configure global pyenv path - _ = os.Setenv("PYENV_ROOT", pyenvRoot) - _ = os.Setenv("PATH", pyenvShimsPath+":"+os.Getenv("PATH")) - _ = os.Setenv("PATH", pyenvBinPath+":"+os.Getenv("PATH")) + r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+pyenvRoot) + if !strings.Contains(envPath, pyenvShimsPath) { + envPath = pyenvShimsPath + ":" + envPath + } + if !strings.Contains(envPath, pyenvBinPath) { + envPath = pyenvBinPath + ":" + envPath + } + + // Update PATH environment variable + r.cmd.Env = append(r.cmd.Env, "PATH="+envPath) } // configureNodePath sets up the Node.js environment paths, handling both nvm and default installations func (r *Runner) configureNodePath() { - // Configure nvm-based Node.js paths + // Get current PATH envPath := os.Getenv("PATH") // Configure global node_modules path @@ -363,7 +373,7 @@ func (r *Runner) configureNodePath() { if !strings.Contains(envPath, nodePath) { envPath = nodePath + ":" + envPath } - _ = os.Setenv("NODE_PATH", nodePath) + r.cmd.Env = append(r.cmd.Env, "NODE_PATH="+nodePath) // Configure global node_bin path nodeBinPath := utils.GetNodeBinPath() @@ -371,14 +381,15 @@ func (r *Runner) configureNodePath() { envPath = nodeBinPath + ":" + envPath } - _ = os.Setenv("PATH", envPath) + // Update PATH environment variable + r.cmd.Env = append(r.cmd.Env, "PATH="+envPath) } func (r *Runner) configureGoPath() { // Configure global go path goPath := utils.GetGoPath() if goPath != "" { - _ = os.Setenv("GOPATH", goPath) + r.cmd.Env = append(r.cmd.Env, "GOPATH="+goPath) } } @@ -387,6 +398,10 @@ func (r *Runner) configureGoPath() { // - Crawlab-specific variables // - Global environment variables from the system func (r *Runner) configureEnv() { + // Default envs + r.cmd.Env = os.Environ() + r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) + // Configure Python path r.configurePythonPath() @@ -396,10 +411,6 @@ func (r *Runner) configureEnv() { // Configure Go path r.configureGoPath() - // Default envs - r.cmd.Env = os.Environ() - r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) - // Global environment variables envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil) if err != nil { From 94194445d83c923ba8f30f51e6d3cffd08470958 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 15 Apr 2025 23:11:53 +0800 Subject: [PATCH 5/8] refactor: improve environment variable handling in task runner - Replaced direct manipulation of r.cmd.Env with os.Setenv for setting PYENV_ROOT, PATH, NODE_PATH, and GOPATH, enhancing error handling and clarity. - Ensured proper initialization of default environment variables in the configureEnv method, maintaining organization and readability in the environment setup process. --- core/task/handler/runner.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index bfe97a55..ad0ff51b 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -351,7 +351,10 @@ func (r *Runner) configurePythonPath() { pyenvBinPath := pyenvRoot + "/bin" // Configure global pyenv path - r.cmd.Env = append(r.cmd.Env, "PYENV_ROOT="+pyenvRoot) + err := os.Setenv("PYENV_ROOT", pyenvRoot) + if err != nil { + r.Errorf("error setting PYENV_ROOT environment variable: %v", err) + } if !strings.Contains(envPath, pyenvShimsPath) { envPath = pyenvShimsPath + ":" + envPath } @@ -360,7 +363,10 @@ func (r *Runner) configurePythonPath() { } // Update PATH environment variable - r.cmd.Env = append(r.cmd.Env, "PATH="+envPath) + err = os.Setenv("PATH", envPath) + if err != nil { + r.Errorf("error setting PATH environment variable: %v", err) + } } // configureNodePath sets up the Node.js environment paths, handling both nvm and default installations @@ -373,7 +379,10 @@ func (r *Runner) configureNodePath() { if !strings.Contains(envPath, nodePath) { envPath = nodePath + ":" + envPath } - r.cmd.Env = append(r.cmd.Env, "NODE_PATH="+nodePath) + err := os.Setenv("NODE_PATH", nodePath) + if err != nil { + r.Errorf("error setting NODE_PATH environment variable: %v", err) + } // Configure global node_bin path nodeBinPath := utils.GetNodeBinPath() @@ -382,14 +391,20 @@ func (r *Runner) configureNodePath() { } // Update PATH environment variable - r.cmd.Env = append(r.cmd.Env, "PATH="+envPath) + err = os.Setenv("PATH", envPath) + if err != nil { + r.Errorf("error setting PATH environment variable: %v", err) + } } func (r *Runner) configureGoPath() { // Configure global go path goPath := utils.GetGoPath() if goPath != "" { - r.cmd.Env = append(r.cmd.Env, "GOPATH="+goPath) + err := os.Setenv("GOPATH", goPath) + if err != nil { + r.Errorf("error setting GOPATH environment variable: %v", err) + } } } @@ -398,10 +413,6 @@ func (r *Runner) configureGoPath() { // - Crawlab-specific variables // - Global environment variables from the system func (r *Runner) configureEnv() { - // Default envs - r.cmd.Env = os.Environ() - r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) - // Configure Python path r.configurePythonPath() @@ -411,6 +422,10 @@ func (r *Runner) configureEnv() { // Configure Go path r.configureGoPath() + // Default envs + r.cmd.Env = os.Environ() + r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) + // Global environment variables envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil) if err != nil { From c01b300a95ea6cb8721da9d65c1e652cc0fd3714 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Tue, 15 Apr 2025 23:22:03 +0800 Subject: [PATCH 6/8] refactor: consolidate environment variable configuration in task runner - Renamed configurePythonPath, configureNodePath, and configureGoPath methods into a single configureEnv method for improved organization and clarity. - Streamlined the handling of environment variables by using a map to avoid duplicates and facilitate easier manipulation. - Enhanced logging for environment variable settings, providing better visibility during configuration. - Ensured all necessary environment variables are set correctly, including Crawlab-specific variables and global system variables. --- core/task/handler/runner.go | 136 ++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 77 deletions(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index ad0ff51b..32c8798f 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -340,91 +340,67 @@ func (r *Runner) startHealthCheck() { } } -// configurePythonPath sets up the Python environment paths, handling both pyenv and default installations -func (r *Runner) configurePythonPath() { - // Get current PATH - envPath := os.Getenv("PATH") - - // Configure global node_modules path - pyenvRoot := utils.GetPyenvPath() - pyenvShimsPath := pyenvRoot + "/shims" - pyenvBinPath := pyenvRoot + "/bin" - - // Configure global pyenv path - err := os.Setenv("PYENV_ROOT", pyenvRoot) - if err != nil { - r.Errorf("error setting PYENV_ROOT environment variable: %v", err) - } - if !strings.Contains(envPath, pyenvShimsPath) { - envPath = pyenvShimsPath + ":" + envPath - } - if !strings.Contains(envPath, pyenvBinPath) { - envPath = pyenvBinPath + ":" + envPath - } - - // Update PATH environment variable - err = os.Setenv("PATH", envPath) - if err != nil { - r.Errorf("error setting PATH environment variable: %v", err) - } -} - -// configureNodePath sets up the Node.js environment paths, handling both nvm and default installations -func (r *Runner) configureNodePath() { - // Get current PATH - envPath := os.Getenv("PATH") - - // Configure global node_modules path - nodePath := utils.GetNodeModulesPath() - if !strings.Contains(envPath, nodePath) { - envPath = nodePath + ":" + envPath - } - err := os.Setenv("NODE_PATH", nodePath) - if err != nil { - r.Errorf("error setting NODE_PATH environment variable: %v", err) - } - - // Configure global node_bin path - nodeBinPath := utils.GetNodeBinPath() - if !strings.Contains(envPath, nodeBinPath) { - envPath = nodeBinPath + ":" + envPath - } - - // Update PATH environment variable - err = os.Setenv("PATH", envPath) - if err != nil { - r.Errorf("error setting PATH environment variable: %v", err) - } -} - -func (r *Runner) configureGoPath() { - // Configure global go path - goPath := utils.GetGoPath() - if goPath != "" { - err := os.Setenv("GOPATH", goPath) - if err != nil { - r.Errorf("error setting GOPATH environment variable: %v", err) - } - } -} - // configureEnv sets up the environment variables for the task process, including: +// - Python paths // - Node.js paths +// - Go paths // - Crawlab-specific variables // - Global environment variables from the system func (r *Runner) configureEnv() { + // Start with the current environment + env := os.Environ() + + // Create a map for easier manipulation and to avoid duplicates + envMap := make(map[string]string) + for _, e := range env { + parts := strings.SplitN(e, "=", 2) + if len(parts) == 2 { + envMap[parts[0]] = parts[1] + } + } + + // Handle PATH non-existence + if _, exists := envMap["PATH"]; !exists { + envMap["PATH"] = "" + } + // Configure Python path - r.configurePythonPath() + pyenvRoot := utils.GetPyenvPath() + pyenvShimsPath := pyenvRoot + "/shims" + pyenvBinPath := pyenvRoot + "/bin" + envMap["PYENV_ROOT"] = pyenvRoot + if !strings.Contains(envMap["PATH"], pyenvShimsPath) { + envMap["PATH"] = pyenvShimsPath + ":" + envMap["PATH"] + r.Debugf("added pyenv shims path to PATH: %s", pyenvShimsPath) + } + if !strings.Contains(envMap["PATH"], pyenvBinPath) { + envMap["PATH"] = pyenvBinPath + ":" + envMap["PATH"] + r.Debugf("added pyenv bin path to PATH: %s", pyenvBinPath) + } // Configure Node.js path - r.configureNodePath() + nodePath := utils.GetNodeModulesPath() + nodeBinPath := utils.GetNodeBinPath() + envMap["NODE_PATH"] = nodePath + if !strings.Contains(envMap["PATH"], nodePath) { + envMap["PATH"] = nodePath + ":" + envMap["PATH"] + r.Debugf("added node modules path to PATH: %s", nodePath) + } + if !strings.Contains(envMap["PATH"], nodeBinPath) { + envMap["PATH"] = nodeBinPath + ":" + envMap["PATH"] + r.Debugf("added node bin path to PATH: %s", nodeBinPath) + } // Configure Go path - r.configureGoPath() + goPath := utils.GetGoPath() + if goPath != "" { + envMap["GOPATH"] = goPath + r.Debugf("set GOPATH: %s", goPath) + } - // Default envs - r.cmd.Env = os.Environ() - r.cmd.Env = append(r.cmd.Env, "CRAWLAB_TASK_ID="+r.tid.Hex()) + // Crawlab-specific variables + envMap["CRAWLAB_TASK_ID"] = r.tid.Hex() + envMap["CRAWLAB_PARENT_PID"] = fmt.Sprint(os.Getpid()) // Global environment variables envs, err := client.NewModelService[models.Environment]().GetMany(nil, nil) @@ -433,11 +409,17 @@ func (r *Runner) configureEnv() { return } for _, env := range envs { - r.cmd.Env = append(r.cmd.Env, env.Key+"="+env.Value) + envMap[env.Key] = env.Value + r.Debugf("set environment variable: %s", env.Key) } - // Add environment variable for child processes to identify they're running under Crawlab - r.cmd.Env = append(r.cmd.Env, "CRAWLAB_PARENT_PID="+fmt.Sprint(os.Getpid())) + // Convert the map back to the []string format for r.cmd.Env + r.cmd.Env = make([]string, 0, len(envMap)) + for key, value := range envMap { + r.cmd.Env = append(r.cmd.Env, key+"="+value) + } + + r.Debugf("environment configuration completed with %d variables", len(r.cmd.Env)) } func (r *Runner) createHttpRequest(method, path string) (*http.Response, error) { From 99414a4ddeb74c3861e15194f7753ce6e37acffc Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Wed, 16 Apr 2025 10:15:10 +0800 Subject: [PATCH 7/8] fix: update default Node.js binary path in configuration - Changed the default Node.js binary path from /usr/lib/bin to /usr/lib/node_bin to reflect the correct directory structure, ensuring proper environment setup for Node.js development. --- core/utils/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/utils/config.go b/core/utils/config.go index 2ad35b83..ca1f115d 100644 --- a/core/utils/config.go +++ b/core/utils/config.go @@ -33,7 +33,7 @@ const ( MetadataConfigName = "config.json" DefaultPyenvPath = "/root/.pyenv" DefaultNodeModulesPath = "/usr/lib/node_modules" - DefaultNodeBinPath = "/usr/lib/bin" + DefaultNodeBinPath = "/usr/lib/node_bin" DefaultGoPath = "/root/go" ) From 8bc0464aec985c165c44b5adae62c1a6d437fd47 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Thu, 24 Apr 2025 21:56:50 +0800 Subject: [PATCH 8/8] fix: improve error handling during file synchronization in task runner - Updated the error handling in the Run method to log a warning message when file synchronization fails, enhancing visibility into potential issues during task execution. --- core/task/handler/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index 32c8798f..c7477bb5 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -159,7 +159,7 @@ func (r *Runner) Run() (err error) { // sync files worker nodes if !utils.IsMaster() { if err := r.syncFiles(); err != nil { - return r.updateTask(constants.TaskStatusError, err) + r.Warnf("error synchronizing files: %v", err) } }