mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
- Added service_operations.go for task management including run, cancel, and execution logic. - Introduced stream_manager.go to handle task streams and manage cancellation signals. - Created worker_pool.go to manage a bounded pool of workers for executing tasks concurrently. - Implemented graceful shutdown and cleanup mechanisms for task runners and streams. - Enhanced error handling and logging throughout the task management process.
113 lines
2.9 KiB
Go
113 lines
2.9 KiB
Go
package handler
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
|
)
|
|
|
|
// taskRequest represents a task execution request
|
|
type taskRequest struct {
|
|
taskId primitive.ObjectID
|
|
}
|
|
|
|
// TaskWorkerPool manages a bounded pool of workers for task execution
|
|
type TaskWorkerPool struct {
|
|
workers int
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
wg sync.WaitGroup
|
|
taskQueue chan taskRequest
|
|
service *Service
|
|
}
|
|
|
|
func NewTaskWorkerPool(workers int, service *Service) *TaskWorkerPool {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
// Use a more generous queue size to handle task bursts
|
|
// Queue size is workers * 5 to allow for better buffering
|
|
queueSize := workers * 5
|
|
if queueSize < 50 {
|
|
queueSize = 50 // Minimum queue size
|
|
}
|
|
|
|
return &TaskWorkerPool{
|
|
workers: workers,
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
taskQueue: make(chan taskRequest, queueSize),
|
|
service: service,
|
|
}
|
|
}
|
|
|
|
func (pool *TaskWorkerPool) Start() {
|
|
for i := 0; i < pool.workers; i++ {
|
|
pool.wg.Add(1)
|
|
go pool.worker(i)
|
|
}
|
|
}
|
|
|
|
func (pool *TaskWorkerPool) Stop() {
|
|
pool.cancel()
|
|
close(pool.taskQueue)
|
|
pool.wg.Wait()
|
|
}
|
|
|
|
func (pool *TaskWorkerPool) SubmitTask(taskId primitive.ObjectID) error {
|
|
req := taskRequest{
|
|
taskId: taskId,
|
|
}
|
|
|
|
select {
|
|
case pool.taskQueue <- req:
|
|
pool.service.Debugf("task[%s] queued for parallel execution, queue usage: %d/%d",
|
|
taskId.Hex(), len(pool.taskQueue), cap(pool.taskQueue))
|
|
return nil // Return immediately - task will execute in parallel
|
|
case <-pool.ctx.Done():
|
|
return fmt.Errorf("worker pool is shutting down")
|
|
default:
|
|
queueLen := len(pool.taskQueue)
|
|
queueCap := cap(pool.taskQueue)
|
|
pool.service.Warnf("task queue is full (%d/%d), consider increasing task.workers configuration",
|
|
queueLen, queueCap)
|
|
return fmt.Errorf("task queue is full (%d/%d), consider increasing task.workers configuration",
|
|
queueLen, queueCap)
|
|
}
|
|
}
|
|
|
|
func (pool *TaskWorkerPool) worker(workerID int) {
|
|
defer pool.wg.Done()
|
|
defer func() {
|
|
if r := recover(); r != nil {
|
|
pool.service.Errorf("worker %d panic recovered: %v", workerID, r)
|
|
}
|
|
}()
|
|
|
|
pool.service.Debugf("worker %d started", workerID)
|
|
|
|
for {
|
|
select {
|
|
case <-pool.ctx.Done():
|
|
pool.service.Debugf("worker %d shutting down", workerID)
|
|
return
|
|
case req, ok := <-pool.taskQueue:
|
|
if !ok {
|
|
pool.service.Debugf("worker %d: task queue closed", workerID)
|
|
return
|
|
}
|
|
|
|
// Execute task asynchronously - each worker handles one task at a time
|
|
// but multiple workers can process different tasks simultaneously
|
|
pool.service.Debugf("worker %d processing task[%s]", workerID, req.taskId.Hex())
|
|
err := pool.service.executeTask(req.taskId)
|
|
if err != nil {
|
|
pool.service.Errorf("worker %d failed to execute task[%s]: %v",
|
|
workerID, req.taskId.Hex(), err)
|
|
} else {
|
|
pool.service.Debugf("worker %d completed task[%s]", workerID, req.taskId.Hex())
|
|
}
|
|
}
|
|
}
|
|
}
|