mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
236 lines
5.5 KiB
Go
236 lines
5.5 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/crawlab-team/crawlab/core/models/models"
|
|
|
|
"github.com/apex/log"
|
|
"github.com/cenkalti/backoff/v4"
|
|
"github.com/crawlab-team/crawlab/core/grpc/client"
|
|
"github.com/crawlab-team/crawlab/core/interfaces"
|
|
client2 "github.com/crawlab-team/crawlab/core/models/client"
|
|
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
|
|
"github.com/crawlab-team/crawlab/core/task/handler"
|
|
"github.com/crawlab-team/crawlab/core/utils"
|
|
"github.com/crawlab-team/crawlab/grpc"
|
|
"github.com/crawlab-team/crawlab/trace"
|
|
"go.mongodb.org/mongo-driver/bson"
|
|
)
|
|
|
|
type WorkerService struct {
|
|
// dependencies
|
|
cfgSvc interfaces.NodeConfigService
|
|
client *client.GrpcClient
|
|
handlerSvc *handler.Service
|
|
|
|
// settings
|
|
address interfaces.Address
|
|
heartbeatInterval time.Duration
|
|
|
|
// internals
|
|
stopped bool
|
|
n *models.Node
|
|
s grpc.NodeService_SubscribeClient
|
|
isReady bool
|
|
healthPort int
|
|
}
|
|
|
|
func (svc *WorkerService) Start() {
|
|
// start grpc client
|
|
if err := svc.client.Start(); err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
// start health check server
|
|
svc.startHealthServer()
|
|
|
|
// register to master
|
|
svc.register()
|
|
|
|
// mark as ready after registration
|
|
svc.isReady = true
|
|
|
|
// subscribe
|
|
go svc.subscribe()
|
|
|
|
// start sending heartbeat to master
|
|
go svc.reportStatus()
|
|
|
|
// start handler
|
|
go svc.handlerSvc.Start()
|
|
|
|
// wait for quit signal
|
|
svc.Wait()
|
|
|
|
// stop
|
|
svc.Stop()
|
|
}
|
|
|
|
func (svc *WorkerService) Wait() {
|
|
utils.DefaultWait()
|
|
}
|
|
|
|
func (svc *WorkerService) Stop() {
|
|
svc.stopped = true
|
|
_ = svc.client.Stop()
|
|
svc.handlerSvc.Stop()
|
|
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
|
|
}
|
|
|
|
func (svc *WorkerService) register() {
|
|
ctx, cancel := svc.client.Context()
|
|
defer cancel()
|
|
_, err := svc.client.NodeClient.Register(ctx, &grpc.NodeServiceRegisterRequest{
|
|
NodeKey: svc.cfgSvc.GetNodeKey(),
|
|
NodeName: svc.cfgSvc.GetNodeName(),
|
|
MaxRunners: int32(svc.cfgSvc.GetMaxRunners()),
|
|
})
|
|
if err != nil {
|
|
log.Fatalf("failed to register worker[%s] to master: %v", svc.cfgSvc.GetNodeKey(), err)
|
|
panic(err)
|
|
}
|
|
svc.n, err = client2.NewModelService[models.Node]().GetOne(bson.M{"key": svc.GetConfigService().GetNodeKey()}, nil)
|
|
if err != nil {
|
|
log.Fatalf("failed to get node: %v", err)
|
|
panic(err)
|
|
}
|
|
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.Id.Hex())
|
|
return
|
|
}
|
|
|
|
func (svc *WorkerService) reportStatus() {
|
|
ticker := time.NewTicker(svc.heartbeatInterval)
|
|
for {
|
|
// return if client is closed
|
|
if svc.client.IsClosed() {
|
|
ticker.Stop()
|
|
return
|
|
}
|
|
|
|
// send heartbeat
|
|
svc.sendHeartbeat()
|
|
|
|
// sleep
|
|
<-ticker.C
|
|
}
|
|
}
|
|
|
|
func (svc *WorkerService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
|
|
return svc.cfgSvc
|
|
}
|
|
|
|
func (svc *WorkerService) subscribe() {
|
|
// Configure exponential backoff
|
|
b := backoff.NewExponentialBackOff()
|
|
b.InitialInterval = 1 * time.Second
|
|
b.MaxInterval = 1 * time.Minute
|
|
b.MaxElapsedTime = 10 * time.Minute
|
|
b.Multiplier = 2.0
|
|
|
|
for {
|
|
if svc.stopped {
|
|
return
|
|
}
|
|
|
|
// Use backoff for connection attempts
|
|
operation := func() error {
|
|
stream, err := svc.client.NodeClient.Subscribe(context.Background(), &grpc.NodeServiceSubscribeRequest{
|
|
NodeKey: svc.cfgSvc.GetNodeKey(),
|
|
})
|
|
if err != nil {
|
|
log.Errorf("failed to subscribe to master: %v", err)
|
|
return err
|
|
}
|
|
|
|
// Handle messages
|
|
for {
|
|
if svc.stopped {
|
|
return nil
|
|
}
|
|
|
|
msg, err := stream.Recv()
|
|
if err != nil {
|
|
if svc.client.IsClosed() {
|
|
log.Errorf("connection to master is closed: %v", err)
|
|
return err
|
|
}
|
|
log.Errorf("failed to receive message from master: %v", err)
|
|
return err
|
|
}
|
|
|
|
switch msg.Code {
|
|
case grpc.NodeServiceSubscribeCode_PING:
|
|
// do nothing
|
|
}
|
|
}
|
|
}
|
|
|
|
// Execute with backoff
|
|
err := backoff.Retry(operation, b)
|
|
if err != nil {
|
|
log.Errorf("subscription failed after max retries: %v", err)
|
|
return
|
|
}
|
|
|
|
// Wait before attempting to reconnect
|
|
time.Sleep(time.Second)
|
|
}
|
|
}
|
|
|
|
func (svc *WorkerService) sendHeartbeat() {
|
|
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
|
|
defer cancel()
|
|
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.NodeServiceSendHeartbeatRequest{
|
|
NodeKey: svc.cfgSvc.GetNodeKey(),
|
|
})
|
|
if err != nil {
|
|
trace.PrintError(err)
|
|
}
|
|
}
|
|
|
|
func (svc *WorkerService) startHealthServer() {
|
|
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
|
if svc.isReady && !svc.stopped && svc.client != nil && !svc.client.IsClosed() {
|
|
w.WriteHeader(http.StatusOK)
|
|
w.Write([]byte("ok"))
|
|
return
|
|
}
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
|
w.Write([]byte("not ready"))
|
|
})
|
|
|
|
addr := fmt.Sprintf(":%d", svc.healthPort)
|
|
go func() {
|
|
if err := http.ListenAndServe(addr, nil); err != nil {
|
|
log.Errorf("health check server failed: %v", err)
|
|
}
|
|
}()
|
|
log.Infof("health check server started on port %d", svc.healthPort)
|
|
}
|
|
|
|
func newWorkerService() *WorkerService {
|
|
return &WorkerService{
|
|
heartbeatInterval: 15 * time.Second,
|
|
cfgSvc: nodeconfig.GetNodeConfigService(),
|
|
client: client.GetGrpcClient(),
|
|
handlerSvc: handler.GetTaskHandlerService(),
|
|
isReady: false,
|
|
healthPort: 8000, // You can make this configurable through environment variables
|
|
}
|
|
}
|
|
|
|
var workerService *WorkerService
|
|
var workerServiceOnce sync.Once
|
|
|
|
func GetWorkerService() *WorkerService {
|
|
workerServiceOnce.Do(func() {
|
|
workerService = newWorkerService()
|
|
})
|
|
return workerService
|
|
}
|