From be93f9d17db840f3f69c9b57d31b77fc13ac94cb Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 20 Dec 2024 11:40:21 +0800 Subject: [PATCH] feat: added retry for worker node start --- core/node/service/worker_service.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/core/node/service/worker_service.go b/core/node/service/worker_service.go index 7b5fe933..edc558e2 100644 --- a/core/node/service/worker_service.go +++ b/core/node/service/worker_service.go @@ -43,8 +43,23 @@ type WorkerService struct { } func (svc *WorkerService) Start() { - // start grpc client - if err := svc.client.Start(); err != nil { + // start grpc client (retry if failed) + err := backoff.RetryNotify( + func() error { + return svc.client.Start() + }, + backoff.NewExponentialBackOff( + backoff.WithInitialInterval(1*time.Second), + backoff.WithMaxInterval(1*time.Minute), + backoff.WithMaxElapsedTime(10*time.Minute), + ), + func(err error, duration time.Duration) { + log.Errorf("failed to start grpc client: %v", err) + log.Infof("retrying in %s", duration) + }, + ) + if err != nil { + log.Fatalf("failed to start grpc client: %v", err) panic(err) } @@ -63,7 +78,7 @@ func (svc *WorkerService) Start() { // start sending heartbeat to master go svc.reportStatus() - // start handler + // start task handler go svc.handlerSvc.Start() // wait for quit signal