Files
crawlab/core/node/service/worker_service.go
2024-11-19 18:32:50 +08:00

236 lines
5.5 KiB
Go

package service
import (
"context"
"fmt"
"net/http"
"sync"
"time"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/apex/log"
"github.com/cenkalti/backoff/v4"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
client2 "github.com/crawlab-team/crawlab/core/models/client"
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
)
type WorkerService struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client *client.GrpcClient
handlerSvc *handler.Service
// settings
address interfaces.Address
heartbeatInterval time.Duration
// internals
stopped bool
n *models.Node
s grpc.NodeService_SubscribeClient
isReady bool
healthPort int
}
func (svc *WorkerService) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// start health check server
svc.startHealthServer()
// register to master
svc.register()
// mark as ready after registration
svc.isReady = true
// subscribe
go svc.subscribe()
// start sending heartbeat to master
go svc.reportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerService) Wait() {
utils.DefaultWait()
}
func (svc *WorkerService) Stop() {
svc.stopped = true
_ = svc.client.Stop()
svc.handlerSvc.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerService) register() {
ctx, cancel := svc.client.Context()
defer cancel()
_, err := svc.client.NodeClient.Register(ctx, &grpc.NodeServiceRegisterRequest{
NodeKey: svc.cfgSvc.GetNodeKey(),
NodeName: svc.cfgSvc.GetNodeName(),
MaxRunners: int32(svc.cfgSvc.GetMaxRunners()),
})
if err != nil {
log.Fatalf("failed to register worker[%s] to master: %v", svc.cfgSvc.GetNodeKey(), err)
panic(err)
}
svc.n, err = client2.NewModelService[models.Node]().GetOne(bson.M{"key": svc.GetConfigService().GetNodeKey()}, nil)
if err != nil {
log.Fatalf("failed to get node: %v", err)
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.Id.Hex())
return
}
func (svc *WorkerService) reportStatus() {
ticker := time.NewTicker(svc.heartbeatInterval)
for {
// return if client is closed
if svc.client.IsClosed() {
ticker.Stop()
return
}
// send heartbeat
svc.sendHeartbeat()
// sleep
<-ticker.C
}
}
func (svc *WorkerService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerService) subscribe() {
// Configure exponential backoff
b := backoff.NewExponentialBackOff()
b.InitialInterval = 1 * time.Second
b.MaxInterval = 1 * time.Minute
b.MaxElapsedTime = 10 * time.Minute
b.Multiplier = 2.0
for {
if svc.stopped {
return
}
// Use backoff for connection attempts
operation := func() error {
stream, err := svc.client.NodeClient.Subscribe(context.Background(), &grpc.NodeServiceSubscribeRequest{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
log.Errorf("failed to subscribe to master: %v", err)
return err
}
// Handle messages
for {
if svc.stopped {
return nil
}
msg, err := stream.Recv()
if err != nil {
if svc.client.IsClosed() {
log.Errorf("connection to master is closed: %v", err)
return err
}
log.Errorf("failed to receive message from master: %v", err)
return err
}
switch msg.Code {
case grpc.NodeServiceSubscribeCode_PING:
// do nothing
}
}
}
// Execute with backoff
err := backoff.Retry(operation, b)
if err != nil {
log.Errorf("subscription failed after max retries: %v", err)
return
}
// Wait before attempting to reconnect
time.Sleep(time.Second)
}
}
func (svc *WorkerService) sendHeartbeat() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.NodeServiceSendHeartbeatRequest{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func (svc *WorkerService) startHealthServer() {
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
if svc.isReady && !svc.stopped && svc.client != nil && !svc.client.IsClosed() {
w.WriteHeader(http.StatusOK)
w.Write([]byte("ok"))
return
}
w.WriteHeader(http.StatusServiceUnavailable)
w.Write([]byte("not ready"))
})
addr := fmt.Sprintf(":%d", svc.healthPort)
go func() {
if err := http.ListenAndServe(addr, nil); err != nil {
log.Errorf("health check server failed: %v", err)
}
}()
log.Infof("health check server started on port %d", svc.healthPort)
}
func newWorkerService() *WorkerService {
return &WorkerService{
heartbeatInterval: 15 * time.Second,
cfgSvc: nodeconfig.GetNodeConfigService(),
client: client.GetGrpcClient(),
handlerSvc: handler.GetTaskHandlerService(),
isReady: false,
healthPort: 8000, // You can make this configurable through environment variables
}
}
var workerService *WorkerService
var workerServiceOnce sync.Once
func GetWorkerService() *WorkerService {
workerServiceOnce.Do(func() {
workerService = newWorkerService()
})
return workerService
}