fix: unable to sync directories to work nodes

This commit is contained in:
Marvin Zhang
2024-07-01 15:59:20 +08:00
parent 7f35e1b2ee
commit 840100dbc3
40 changed files with 768 additions and 1540 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/crawlab-team/crawlab/trace"
"os"
"path/filepath"
"sync"
)
type Service struct {
@@ -93,7 +94,7 @@ func (svc *Service) SetConfigPath(path string) {
svc.path = path
}
func NewNodeConfigService() (svc2 interfaces.NodeConfigService, err error) {
func newNodeConfigService() (svc2 interfaces.NodeConfigService, err error) {
// cfg
cfg := NewConfig(nil)
@@ -115,17 +116,18 @@ func NewNodeConfigService() (svc2 interfaces.NodeConfigService, err error) {
}
var _service interfaces.NodeConfigService
var _serviceOnce = new(sync.Once)
func GetNodeConfigService() interfaces.NodeConfigService {
if _service != nil {
return _service
}
var err error
_service, err = NewNodeConfigService()
if err != nil {
panic(err)
}
_serviceOnce.Do(func() {
var err error
_service, err = newNodeConfigService()
if err != nil {
panic(err)
}
})
return _service
}

View File

@@ -6,7 +6,6 @@ import (
"github.com/cenkalti/backoff/v4"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/server"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/common"
@@ -20,7 +19,7 @@ import (
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/task/scheduler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
@@ -320,7 +319,7 @@ func (svc *MasterServiceV2) updateNodeAvailableRunners(node *models.NodeV2) (err
return nil
}
func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
func newMasterServiceV2() (res *MasterServiceV2, err error) {
// master service
svc := &MasterServiceV2{
cfgPath: config2.GetConfigPath(),
@@ -334,14 +333,8 @@ func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
serverOpts = append(serverOpts, server.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// node config service
svc.cfgSvc = config.GetNodeConfigService()
// grpc server
svc.server, err = server.GetGrpcServerV2()
@@ -368,7 +361,7 @@ func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
}
// notification service
svc.notificationSvc = notification.GetServiceV2()
svc.notificationSvc = notification.GetNotificationServiceV2()
// spider admin service
svc.spiderAdminSvc, err = admin.GetSpiderAdminServiceV2()
@@ -377,7 +370,7 @@ func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
}
// system service
svc.systemSvc = system.GetServiceV2()
svc.systemSvc = system.GetSystemServiceV2()
// init
if err := svc.Init(); err != nil {
@@ -386,3 +379,17 @@ func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
return svc, nil
}
var masterServiceV2 *MasterServiceV2
var masterServiceV2Once = new(sync.Once)
func GetMasterServiceV2() (res *MasterServiceV2, err error) {
masterServiceV2Once.Do(func() {
masterServiceV2, err = newMasterServiceV2()
if err != nil {
log.Errorf("failed to get master service: %v", err)
}
})
return masterServiceV2, err
}

View File

@@ -1,238 +0,0 @@
package service
import (
"context"
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"github.com/spf13/viper"
"time"
)
type WorkerService struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client interfaces.GrpcClient
handlerSvc interfaces.TaskHandlerService
// settings
cfgPath string
address interfaces.Address
heartbeatInterval time.Duration
// internals
n interfaces.Node
s grpc.NodeService_SubscribeClient
}
func (svc *WorkerService) Init() (err error) {
// do nothing
return nil
}
func (svc *WorkerService) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// register to master
svc.Register()
// start receiving stream messages
go svc.Recv()
// start sending heartbeat to master
go svc.ReportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerService) Wait() {
utils.DefaultWait()
}
func (svc *WorkerService) Stop() {
_ = svc.client.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerService) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.GetNodeClient().Register(ctx, req)
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
return
}
func (svc *WorkerService) Recv() {
msgCh := svc.client.GetMessageChannel()
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// receive message from channel
msg := <-msgCh
// handle message
if err := svc.handleStreamMessage(msg); err != nil {
continue
}
}
}
func (svc *WorkerService) handleStreamMessage(msg *grpc.StreamMessage) (err error) {
log.Debugf("[WorkerService] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.GetNodeClient().SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Run(t.Id); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_CANCEL_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Cancel(t.Id); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *WorkerService) ReportStatus() {
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// report status
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
}
}
func (svc *WorkerService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerService) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *WorkerService) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *WorkerService) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *WorkerService) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *WorkerService) SetHeartbeatInterval(duration time.Duration) {
svc.heartbeatInterval = duration
}
func (svc *WorkerService) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.GetNodeClient().SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerService(opts ...Option) (res *WorkerService, err error) {
svc := &WorkerService{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// apply options
for _, opt := range opts {
opt(svc)
}
// dependency options
var clientOpts []client.Option
if svc.address != nil {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
client interfaces.GrpcClient,
taskHandlerSvc interfaces.TaskHandlerService,
) {
svc.cfgSvc = cfgSvc
svc.client = client
svc.handlerSvc = taskHandlerSvc
}); err != nil {
return nil, err
}
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
func ProvideWorkerService(path string, opts ...Option) func() (interfaces.NodeWorkerService, error) {
// path
if path == "" || path == config2.GetConfigPath() {
if viper.GetString("config.path") != "" {
path = viper.GetString("config.path")
} else {
path = config2.GetConfigPath()
}
}
opts = append(opts, WithConfigPath(path))
return func() (interfaces.NodeWorkerService, error) {
return NewWorkerService(opts...)
}
}

View File

@@ -5,14 +5,17 @@ import (
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
client2 "github.com/crawlab-team/crawlab/core/models/client"
"github.com/crawlab-team/crawlab/core/models/models"
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
"sync"
"time"
)
@@ -28,7 +31,7 @@ type WorkerServiceV2 struct {
heartbeatInterval time.Duration
// internals
n interfaces.Node
n *models.NodeV2
s grpc.NodeService_SubscribeClient
}
@@ -74,15 +77,21 @@ func (svc *WorkerServiceV2) Stop() {
func (svc *WorkerServiceV2) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.NodeClient.Register(ctx, req)
_, err := svc.client.NodeClient.Register(ctx, &grpc.NodeServiceRegisterRequest{
Key: svc.cfgSvc.GetNodeKey(),
Name: svc.cfgSvc.GetNodeName(),
IsMaster: svc.cfgSvc.IsMaster(),
AuthKey: svc.cfgSvc.GetAuthKey(),
MaxRunners: int32(svc.cfgSvc.GetMaxRunners()),
})
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
svc.n, err = client2.NewModelServiceV2[models.NodeV2]().GetOne(bson.M{"key": svc.GetConfigService().GetNodeKey()}, nil)
if err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.Id.Hex())
return
}
@@ -108,7 +117,10 @@ func (svc *WorkerServiceV2) handleStreamMessage(msg *grpc.StreamMessage) (err er
log.Debugf("[WorkerServiceV2] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.NodeClient.SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
_, err := svc.client.NodeClient.SendHeartbeat(context.Background(), &grpc.NodeServiceSendHeartbeatRequest{
Key: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
@@ -133,9 +145,11 @@ func (svc *WorkerServiceV2) handleStreamMessage(msg *grpc.StreamMessage) (err er
}
func (svc *WorkerServiceV2) ReportStatus() {
ticker := time.NewTicker(svc.heartbeatInterval)
for {
// return if client is closed
if svc.client.IsClosed() {
ticker.Stop()
return
}
@@ -143,7 +157,7 @@ func (svc *WorkerServiceV2) ReportStatus() {
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
<-ticker.C
}
}
@@ -174,19 +188,21 @@ func (svc *WorkerServiceV2) SetHeartbeatInterval(duration time.Duration) {
func (svc *WorkerServiceV2) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.NodeServiceSendHeartbeatRequest{
Key: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerServiceV2() (res *WorkerServiceV2, err error) {
var workerServiceV2 *WorkerServiceV2
var workerServiceV2Once = new(sync.Once)
func newWorkerServiceV2() (res *WorkerServiceV2, err error) {
svc := &WorkerServiceV2{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// dependency options
@@ -195,14 +211,8 @@ func NewWorkerServiceV2() (res *WorkerServiceV2, err error) {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// node config service
svc.cfgSvc = nodeconfig.GetNodeConfigService()
// grpc client
svc.client = client.GetGrpcClientV2()
@@ -214,9 +224,20 @@ func NewWorkerServiceV2() (res *WorkerServiceV2, err error) {
}
// init
if err := svc.Init(); err != nil {
err = svc.Init()
if err != nil {
return nil, err
}
return svc, nil
}
func GetWorkerServiceV2() (res *WorkerServiceV2, err error) {
workerServiceV2Once.Do(func() {
workerServiceV2, err = newWorkerServiceV2()
if err != nil {
log.Errorf("failed to get worker service: %v", err)
}
})
return workerServiceV2, err
}

View File

@@ -1,206 +0,0 @@
package test
import (
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/interfaces"
service2 "github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/service"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/spf13/viper"
"go.uber.org/dig"
"io/ioutil"
"os"
"path"
"testing"
"time"
)
func init() {
var err error
T, err = NewTest()
if err != nil {
panic(err)
}
}
var T *Test
type Test struct {
DefaultSvc interfaces.NodeMasterService
MasterSvc interfaces.NodeMasterService
WorkerSvc interfaces.NodeWorkerService
MasterSvcMonitor interfaces.NodeMasterService
WorkerSvcMonitor interfaces.NodeWorkerService
ModelSvc service2.ModelService
}
func NewTest() (res *Test, err error) {
// test
t := &Test{}
// recreate config directory path
_ = os.RemoveAll(viper.GetString("metadata"))
_ = os.MkdirAll(viper.GetString("metadata"), os.FileMode(0766))
// master config and settings
masterNodeConfigName := "config-master.json"
masterNodeConfigPath := path.Join(viper.GetString("metadata"), masterNodeConfigName)
if err := ioutil.WriteFile(masterNodeConfigPath, []byte("{\"key\":\"master\",\"is_master\":true}"), os.FileMode(0766)); err != nil {
return nil, err
}
masterHost := "0.0.0.0"
masterPort := "9667"
// worker config and settings
workerNodeConfigName := "config-worker.json"
workerNodeConfigPath := path.Join(viper.GetString("metadata"), workerNodeConfigName)
if err = ioutil.WriteFile(workerNodeConfigPath, []byte("{\"key\":\"worker\",\"is_master\":false}"), os.FileMode(0766)); err != nil {
return nil, err
}
workerHost := "localhost"
workerPort := masterPort
// master for monitor config and settings
masterNodeMonitorConfigName := "config-master-monitor.json"
masterNodeMonitorConfigPath := path.Join(viper.GetString("metadata"), masterNodeMonitorConfigName)
if err := ioutil.WriteFile(masterNodeMonitorConfigPath, []byte("{\"key\":\"master-monitor\",\"is_master\":true}"), os.FileMode(0766)); err != nil {
return nil, err
}
masterMonitorHost := masterHost
masterMonitorPort := "9668"
// worker for monitor config and settings
workerNodeMonitorConfigName := "config-worker-monitor.json"
workerNodeMonitorConfigPath := path.Join(viper.GetString("metadata"), workerNodeMonitorConfigName)
if err := ioutil.WriteFile(workerNodeMonitorConfigPath, []byte("{\"key\":\"worker-monitor\",\"is_master\":false}"), os.FileMode(0766)); err != nil {
return nil, err
}
workerMonitorHost := workerHost
workerMonitorPort := masterMonitorPort
// dependency injection
c := dig.New()
if err := c.Provide(service.ProvideMasterService(
masterNodeConfigPath,
service.WithMonitorInterval(3*time.Second),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: masterHost,
Port: masterPort,
})),
)); err != nil {
return nil, err
}
if err := c.Provide(service.ProvideWorkerService(
workerNodeConfigPath,
service.WithHeartbeatInterval(1*time.Second),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: workerHost,
Port: workerPort,
})),
)); err != nil {
return nil, err
}
if err := c.Provide(service2.NewService); err != nil {
return nil, err
}
if err := c.Invoke(func(masterSvc interfaces.NodeMasterService, workerSvc interfaces.NodeWorkerService, modelSvc service2.ModelService) {
t.MasterSvc = masterSvc
t.WorkerSvc = workerSvc
t.ModelSvc = modelSvc
}); err != nil {
return nil, err
}
// default service
t.DefaultSvc, err = service.NewMasterService()
if err != nil {
return nil, err
}
// master and worker for monitor
t.MasterSvcMonitor, err = service.NewMasterService(
service.WithConfigPath(masterNodeMonitorConfigPath),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: masterMonitorHost,
Port: masterMonitorPort,
})),
service.WithMonitorInterval(3*time.Second),
service.WithStopOnError(),
)
if err != nil {
return nil, err
}
t.WorkerSvcMonitor, err = service.NewWorkerService(
service.WithConfigPath(workerNodeMonitorConfigPath),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: workerMonitorHost,
Port: workerMonitorPort,
})),
service.WithHeartbeatInterval(1*time.Second),
service.WithStopOnError(),
)
if err != nil {
return nil, err
}
// removed all data in db
_ = t.ModelSvc.DropAll()
// visualize dependencies
if err := utils.VisualizeContainer(c); err != nil {
return nil, err
}
return t, nil
}
func (t *Test) Setup(t2 *testing.T) {
if err := t.ModelSvc.DropAll(); err != nil {
panic(err)
}
_ = os.RemoveAll(viper.GetString("metadata"))
t2.Cleanup(t.Cleanup)
}
func (t *Test) Cleanup() {
if err := t.ModelSvc.DropAll(); err != nil {
panic(err)
}
_ = os.RemoveAll(viper.GetString("metadata"))
}
func (t *Test) StartMasterWorker() {
startMasterWorker()
}
func (t *Test) StopMasterWorker() {
stopMasterWorker()
}
func startMasterWorker() {
go T.MasterSvc.Start()
time.Sleep(1 * time.Second)
go T.WorkerSvc.Start()
time.Sleep(1 * time.Second)
}
func stopMasterWorker() {
go T.MasterSvc.Stop()
time.Sleep(1 * time.Second)
go T.WorkerSvc.Stop()
time.Sleep(1 * time.Second)
}
func startMasterWorkerMonitor() {
go T.MasterSvcMonitor.Start()
time.Sleep(1 * time.Second)
go T.WorkerSvcMonitor.Start()
time.Sleep(1 * time.Second)
}
func stopMasterWorkerMonitor() {
go T.MasterSvcMonitor.Stop()
time.Sleep(1 * time.Second)
go T.WorkerSvcMonitor.Stop()
time.Sleep(1 * time.Second)
}

View File

@@ -1,67 +0,0 @@
package test
import (
"github.com/crawlab-team/crawlab/core/constants"
"github.com/stretchr/testify/require"
"testing"
"time"
)
func TestNodeServices_Master_Worker(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
startMasterWorker()
// validate master
masterNodeKey := T.MasterSvc.GetConfigService().GetNodeKey()
masterNode, err := T.ModelSvc.GetNodeByKey(masterNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, masterNode.Status)
require.Equal(t, masterNodeKey, masterNode.Key)
require.True(t, masterNode.IsMaster)
// validate worker
workerNodeKey := T.WorkerSvc.GetConfigService().GetNodeKey()
workerNode, err := T.ModelSvc.GetNodeByKey(workerNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, workerNode.Status)
require.Equal(t, workerNodeKey, workerNode.Key)
require.False(t, workerNode.IsMaster)
stopMasterWorker()
}
func TestNodeServices_Default(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
go T.DefaultSvc.Start()
time.Sleep(1 * time.Second)
// validate default
defaultNodeKey := T.DefaultSvc.GetConfigService().GetNodeKey()
defaultNode, err := T.ModelSvc.GetNodeByKey(defaultNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, defaultNode.Status)
require.Equal(t, defaultNodeKey, defaultNode.Key)
require.True(t, defaultNode.IsMaster)
T.DefaultSvc.Stop()
time.Sleep(1 * time.Second)
}
func TestNodeServices_Monitor(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
startMasterWorkerMonitor()
time.Sleep(3 * time.Second)
// stop worker
T.WorkerSvcMonitor.Stop()
time.Sleep(5 * time.Second)
// validate
require.True(t, T.MasterSvcMonitor.GetServer().IsStopped())
stopMasterWorkerMonitor()
}