fix: runner cancel issue

2026-01-22 17:31:03 +01:00 · 2024-11-22 13:32:27 +08:00
parent eee10ea08c
commit 8357dc6f30
4 changed files with 110 additions and 34 deletions
--- a/core/task/handler/runner.go
+++ b/core/task/handler/runner.go
@@ -6,8 +6,6 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"github.com/crawlab-team/crawlab/core/fs"
-	"github.com/hashicorp/go-multierror"
 	"io"
 	"net/http"
 	"os"
@@ -17,6 +15,9 @@ import (
 	"sync"
 	"time"

+	"github.com/crawlab-team/crawlab/core/fs"
+	"github.com/hashicorp/go-multierror"
+
 	"github.com/crawlab-team/crawlab/core/models/models"

 	"github.com/apex/log"
@@ -187,19 +188,22 @@ func (r *Runner) Cancel(force bool) (err error) {
 		return err
 	}

-	// Wait for process to be killed and goroutines to stop
-	ticker := time.NewTicker(time.Second)
+	// Create a context with timeout
+	ctx, cancel := context.WithTimeout(context.Background(), r.svc.GetCancelTimeout())
+	defer cancel()
+
+	// Wait for process to be killed with context
+	ticker := time.NewTicker(100 * time.Millisecond)
 	defer ticker.Stop()
+
 	for {
 		select {
-		case <-ticker.C:
-			if utils.ProcessIdExists(r.pid) {
-				continue
-			}
-			return nil
-		case <-time.After(r.svc.GetCancelTimeout()):
-			// timeout
+		case <-ctx.Done():
 			return fmt.Errorf("timeout waiting for task to stop")
+		case <-ticker.C:
+			if !utils.ProcessIdExists(r.pid) {
+				return nil
+			}
 		}
 	}
 }
--- a/core/task/handler/runner_test.go
+++ b/core/task/handler/runner_test.go
@@ -1,14 +1,17 @@
 package handler

 import (
+	"bufio"
 	"encoding/json"
 	"fmt"
-	"github.com/apex/log"
-	"github.com/crawlab-team/crawlab/core/utils"
 	"io"
+	"runtime"
 	"testing"
 	"time"

+	"github.com/apex/log"
+	"github.com/crawlab-team/crawlab/core/utils"
+
 	"github.com/crawlab-team/crawlab/core/constants"
 	"github.com/crawlab-team/crawlab/core/models/models"
 	"github.com/crawlab-team/crawlab/core/models/service"
@@ -38,7 +41,12 @@ func setupTest(t *testing.T) *Runner {
 		Type:     "test",
 		Mode:     "test",
 		NodeId:   primitive.NewObjectID(),
-		Cmd:      "python script.py",
+	}
+	switch runtime.GOOS {
+	case "windows":
+		task.Cmd = "ping -n 10 127.0.0.1"
+	default: // linux and darwin (macOS)
+		task.Cmd = "sleep 10"
 	}
 	taskId, err := service.NewModelService[models.Task]().InsertOne(*task)
 	require.NoError(t, err)
@@ -119,21 +127,50 @@ func TestRunner_Cancel(t *testing.T) {
 	// Setup
 	runner := setupTest(t)

-	// Start a long-running command
-	runner.t.Cmd = "sleep 10"
+	// Create pipes for stdout
+	pr, pw := io.Pipe()
+	runner.cmd.Stdout = pw
+	runner.cmd.Stderr = pw
+
+	// Start the command
 	err := runner.cmd.Start()
 	assert.NoError(t, err)
+	log.Infof("started process with PID: %d", runner.cmd.Process.Pid)
 	runner.pid = runner.cmd.Process.Pid

+	// Read and print command output
+	go func() {
+		scanner := bufio.NewScanner(pr)
+		for scanner.Scan() {
+			log.Info(scanner.Text())
+		}
+	}()
+
+	// Wait a bit longer on Windows for the process to start properly
+	waitTime := 100 * time.Millisecond
+	if runtime.GOOS == "windows" {
+		waitTime = 1 * time.Second
+	}
+	time.Sleep(waitTime)
+
+	// Verify process exists before attempting to cancel
+	if !utils.ProcessIdExists(runner.pid) {
+		t.Fatalf("Process with PID %d was not started successfully", runner.pid)
+	}
+
 	// Test cancel
 	go func() {
 		err = runner.Cancel(true)
 		assert.NoError(t, err)
 	}()

-	// Verify process was killed
-	// Wait a short time for the process to be killed
-	time.Sleep(100 * time.Millisecond)
-	exists := utils.ProcessIdExists(runner.pid)
-	assert.False(t, exists)
+	// Wait for process to be killed, with shorter timeout
+	deadline := time.Now().Add(5 * time.Second)
+	for time.Now().Before(deadline) {
+		if !utils.ProcessIdExists(runner.pid) {
+			return // Process was killed
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+	t.Errorf("Process with PID %d was not killed within timeout", runner.pid)
 }