mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
fix: runner cancel issue
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
package sys_exec
|
||||
|
||||
import (
|
||||
"github.com/crawlab-team/crawlab/trace"
|
||||
"github.com/apex/log"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
"os/exec"
|
||||
)
|
||||
@@ -17,19 +17,15 @@ func KillProcess(cmd *exec.Cmd, opts *KillProcessOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// kill function
|
||||
killFunc := func(p *process.Process) error {
|
||||
return killProcessRecursive(p, opts.Force)
|
||||
}
|
||||
|
||||
// without timeout
|
||||
return killFunc(p)
|
||||
// kill process
|
||||
return killProcessRecursive(p, opts.Force)
|
||||
}
|
||||
|
||||
func killProcessRecursive(p *process.Process, force bool) (err error) {
|
||||
// children processes
|
||||
cps, err := p.Children()
|
||||
if err != nil {
|
||||
log.Errorf("failed to get children processes: %v", err)
|
||||
return killProcess(p, force)
|
||||
}
|
||||
|
||||
@@ -40,7 +36,7 @@ func killProcessRecursive(p *process.Process, force bool) (err error) {
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return killProcess(p, force)
|
||||
}
|
||||
|
||||
func killProcess(p *process.Process, force bool) (err error) {
|
||||
@@ -50,7 +46,8 @@ func killProcess(p *process.Process, force bool) (err error) {
|
||||
err = p.Terminate()
|
||||
}
|
||||
if err != nil {
|
||||
return trace.TraceError(err)
|
||||
log.Errorf("failed to kill process (force: %v): %v", force, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -6,8 +6,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/crawlab-team/crawlab/core/fs"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -17,6 +15,9 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/crawlab-team/crawlab/core/fs"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
|
||||
"github.com/apex/log"
|
||||
@@ -187,19 +188,22 @@ func (r *Runner) Cancel(force bool) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for process to be killed and goroutines to stop
|
||||
ticker := time.NewTicker(time.Second)
|
||||
// Create a context with timeout
|
||||
ctx, cancel := context.WithTimeout(context.Background(), r.svc.GetCancelTimeout())
|
||||
defer cancel()
|
||||
|
||||
// Wait for process to be killed with context
|
||||
ticker := time.NewTicker(100 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
if utils.ProcessIdExists(r.pid) {
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
case <-time.After(r.svc.GetCancelTimeout()):
|
||||
// timeout
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("timeout waiting for task to stop")
|
||||
case <-ticker.C:
|
||||
if !utils.ProcessIdExists(r.pid) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/apex/log"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
"io"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/apex/log"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
|
||||
"github.com/crawlab-team/crawlab/core/constants"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/crawlab-team/crawlab/core/models/service"
|
||||
@@ -38,7 +41,12 @@ func setupTest(t *testing.T) *Runner {
|
||||
Type: "test",
|
||||
Mode: "test",
|
||||
NodeId: primitive.NewObjectID(),
|
||||
Cmd: "python script.py",
|
||||
}
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
task.Cmd = "ping -n 10 127.0.0.1"
|
||||
default: // linux and darwin (macOS)
|
||||
task.Cmd = "sleep 10"
|
||||
}
|
||||
taskId, err := service.NewModelService[models.Task]().InsertOne(*task)
|
||||
require.NoError(t, err)
|
||||
@@ -119,21 +127,50 @@ func TestRunner_Cancel(t *testing.T) {
|
||||
// Setup
|
||||
runner := setupTest(t)
|
||||
|
||||
// Start a long-running command
|
||||
runner.t.Cmd = "sleep 10"
|
||||
// Create pipes for stdout
|
||||
pr, pw := io.Pipe()
|
||||
runner.cmd.Stdout = pw
|
||||
runner.cmd.Stderr = pw
|
||||
|
||||
// Start the command
|
||||
err := runner.cmd.Start()
|
||||
assert.NoError(t, err)
|
||||
log.Infof("started process with PID: %d", runner.cmd.Process.Pid)
|
||||
runner.pid = runner.cmd.Process.Pid
|
||||
|
||||
// Read and print command output
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(pr)
|
||||
for scanner.Scan() {
|
||||
log.Info(scanner.Text())
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait a bit longer on Windows for the process to start properly
|
||||
waitTime := 100 * time.Millisecond
|
||||
if runtime.GOOS == "windows" {
|
||||
waitTime = 1 * time.Second
|
||||
}
|
||||
time.Sleep(waitTime)
|
||||
|
||||
// Verify process exists before attempting to cancel
|
||||
if !utils.ProcessIdExists(runner.pid) {
|
||||
t.Fatalf("Process with PID %d was not started successfully", runner.pid)
|
||||
}
|
||||
|
||||
// Test cancel
|
||||
go func() {
|
||||
err = runner.Cancel(true)
|
||||
assert.NoError(t, err)
|
||||
}()
|
||||
|
||||
// Verify process was killed
|
||||
// Wait a short time for the process to be killed
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
exists := utils.ProcessIdExists(runner.pid)
|
||||
assert.False(t, exists)
|
||||
// Wait for process to be killed, with shorter timeout
|
||||
deadline := time.Now().Add(5 * time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
if !utils.ProcessIdExists(runner.pid) {
|
||||
return // Process was killed
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
t.Errorf("Process with PID %d was not killed within timeout", runner.pid)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/apex/log"
|
||||
"github.com/crawlab-team/crawlab/trace"
|
||||
"github.com/shirou/gopsutil/process"
|
||||
"os/exec"
|
||||
@@ -9,8 +11,42 @@ import (
|
||||
)
|
||||
|
||||
func ProcessIdExists(pid int) (ok bool) {
|
||||
ok, _ = process.PidExists(int32(pid))
|
||||
//// Find process by pid
|
||||
//p, err := os.FindProcess(pid)
|
||||
//if err != nil {
|
||||
// // Process not found
|
||||
// return false
|
||||
//}
|
||||
//
|
||||
//// Check if process exists
|
||||
//err = p.Signal(syscall.Signal(0))
|
||||
//if err == nil {
|
||||
// // Process exists
|
||||
// return true
|
||||
//}
|
||||
//
|
||||
//// Process not found
|
||||
//return false
|
||||
|
||||
ok, err := process.PidExists(int32(pid))
|
||||
if err != nil {
|
||||
log.Errorf("error checking if process exists: %v", err)
|
||||
}
|
||||
return ok
|
||||
|
||||
//processIds, err := process.Pids()
|
||||
//if err != nil {
|
||||
// log.Errorf("error getting process pids: %v", err)
|
||||
// return false
|
||||
//}
|
||||
//
|
||||
//for _, _pid := range processIds {
|
||||
// if int(_pid) == pid {
|
||||
// return true
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//return false
|
||||
}
|
||||
|
||||
func ListProcess(text string) (lines []string, err error) {
|
||||
@@ -24,7 +60,8 @@ func ListProcess(text string) (lines []string, err error) {
|
||||
func listProcessWindow(text string) (lines []string, err error) {
|
||||
cmd := exec.Command("tasklist", "/fi", text)
|
||||
out, err := cmd.CombinedOutput()
|
||||
_, ok := err.(*exec.ExitError)
|
||||
var exitError *exec.ExitError
|
||||
ok := errors.As(err, &exitError)
|
||||
if !ok {
|
||||
return nil, trace.TraceError(err)
|
||||
}
|
||||
@@ -35,7 +72,8 @@ func listProcessWindow(text string) (lines []string, err error) {
|
||||
func listProcessLinuxMac(text string) (lines []string, err error) {
|
||||
cmd := exec.Command("ps", "aux")
|
||||
out, err := cmd.CombinedOutput()
|
||||
_, ok := err.(*exec.ExitError)
|
||||
var exitError *exec.ExitError
|
||||
ok := errors.As(err, &exitError)
|
||||
if !ok {
|
||||
return nil, trace.TraceError(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user