feat: added modules

This commit is contained in:
Marvin Zhang
2024-06-14 15:42:50 +08:00
parent 4d0adcb6f0
commit c4d795f47f
626 changed files with 60104 additions and 0 deletions

View File

@@ -0,0 +1,368 @@
package service
import (
"github.com/apex/log"
"github.com/crawlab-team/crawlab-db/mongo"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/errors"
"github.com/crawlab-team/crawlab/core/grpc/server"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/common"
"github.com/crawlab-team/crawlab/core/models/delegate"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/notification"
"github.com/crawlab-team/crawlab/core/system"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"github.com/spf13/viper"
"go.mongodb.org/mongo-driver/bson"
mongo2 "go.mongodb.org/mongo-driver/mongo"
"time"
)
type MasterService struct {
// dependencies
modelSvc service.ModelService
cfgSvc interfaces.NodeConfigService
server interfaces.GrpcServer
schedulerSvc interfaces.TaskSchedulerService
handlerSvc interfaces.TaskHandlerService
scheduleSvc interfaces.ScheduleService
notificationSvc *notification.Service
spiderAdminSvc interfaces.SpiderAdminService
systemSvc *system.Service
// settings
cfgPath string
address interfaces.Address
monitorInterval time.Duration
stopOnError bool
}
func (svc *MasterService) Init() (err error) {
// do nothing
return nil
}
func (svc *MasterService) Start() {
// create indexes
common.CreateIndexes()
// start grpc server
if err := svc.server.Start(); err != nil {
panic(err)
}
// register to db
if err := svc.Register(); err != nil {
panic(err)
}
// start monitoring worker nodes
go svc.Monitor()
// start task handler
go svc.handlerSvc.Start()
// start task scheduler
go svc.schedulerSvc.Start()
// start schedule service
go svc.scheduleSvc.Start()
// start notification service
go svc.notificationSvc.Start()
// start spider admin service
go svc.spiderAdminSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *MasterService) Wait() {
utils.DefaultWait()
}
func (svc *MasterService) Stop() {
_ = svc.server.Stop()
log.Infof("master[%s] service has stopped", svc.GetConfigService().GetNodeKey())
}
func (svc *MasterService) Monitor() {
log.Infof("master[%s] monitoring started", svc.GetConfigService().GetNodeKey())
for {
if err := svc.monitor(); err != nil {
trace.PrintError(err)
if svc.stopOnError {
log.Errorf("master[%s] monitor error, now stopping...", svc.GetConfigService().GetNodeKey())
svc.Stop()
return
}
}
time.Sleep(svc.monitorInterval)
}
}
func (svc *MasterService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *MasterService) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *MasterService) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *MasterService) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *MasterService) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *MasterService) SetMonitorInterval(duration time.Duration) {
svc.monitorInterval = duration
}
func (svc *MasterService) Register() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
nodeName := svc.GetConfigService().GetNodeName()
node, err := svc.modelSvc.GetNodeByKey(nodeKey, nil)
if err != nil && err.Error() == mongo2.ErrNoDocuments.Error() {
// not exists
log.Infof("master[%s] does not exist in db", nodeKey)
node := &models.Node{
Key: nodeKey,
Name: nodeName,
MaxRunners: config.DefaultConfigOptions.MaxRunners,
IsMaster: true,
Status: constants.NodeStatusOnline,
Enabled: true,
Active: true,
ActiveTs: time.Now(),
}
if viper.GetInt("task.handler.maxRunners") > 0 {
node.MaxRunners = viper.GetInt("task.handler.maxRunners")
}
nodeD := delegate.NewModelNodeDelegate(node)
if err := nodeD.Add(); err != nil {
return err
}
log.Infof("added master[%s] in db. id: %s", nodeKey, nodeD.GetModel().GetId().Hex())
return nil
} else if err == nil {
// exists
log.Infof("master[%s] exists in db", nodeKey)
nodeD := delegate.NewModelNodeDelegate(node)
if err := nodeD.UpdateStatusOnline(); err != nil {
return err
}
log.Infof("updated master[%s] in db. id: %s", nodeKey, nodeD.GetModel().GetId().Hex())
return nil
} else {
// error
return err
}
}
func (svc *MasterService) StopOnError() {
svc.stopOnError = true
}
func (svc *MasterService) GetServer() (svr interfaces.GrpcServer) {
return svc.server
}
func (svc *MasterService) monitor() (err error) {
// update master node status in db
if err := svc.updateMasterNodeStatus(); err != nil {
if err.Error() == mongo2.ErrNoDocuments.Error() {
return nil
}
return err
}
// all worker nodes
nodes, err := svc.getAllWorkerNodes()
if err != nil {
return err
}
// error flag
isErr := false
// iterate all nodes
for _, n := range nodes {
// subscribe
if err := svc.subscribeNode(&n); err != nil {
isErr = true
continue
}
// ping client
if err := svc.pingNodeClient(&n); err != nil {
isErr = true
continue
}
// update node available runners
if err := svc.updateNodeAvailableRunners(&n); err != nil {
isErr = true
continue
}
}
if isErr {
return trace.TraceError(errors.ErrorNodeMonitorError)
}
return nil
}
func (svc *MasterService) getAllWorkerNodes() (nodes []models.Node, err error) {
query := bson.M{
"key": bson.M{"$ne": svc.cfgSvc.GetNodeKey()}, // not self
"active": true, // active
}
nodes, err = svc.modelSvc.GetNodeList(query, nil)
if err != nil {
if err == mongo2.ErrNoDocuments {
return nil, nil
}
return nil, trace.TraceError(err)
}
return nodes, nil
}
func (svc *MasterService) updateMasterNodeStatus() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
node, err := svc.modelSvc.GetNodeByKey(nodeKey, nil)
if err != nil {
return err
}
nodeD := delegate.NewModelNodeDelegate(node)
return nodeD.UpdateStatusOnline()
}
func (svc *MasterService) setWorkerNodeOffline(n interfaces.Node) (err error) {
return delegate.NewModelNodeDelegate(n).UpdateStatusOffline()
}
func (svc *MasterService) subscribeNode(n interfaces.Node) (err error) {
_, err = svc.server.GetSubscribe("node:" + n.GetKey())
if err != nil {
log.Errorf("cannot subscribe worker node[%s]: %v", n.GetKey(), err)
if err := svc.setWorkerNodeOffline(n); err != nil {
return trace.TraceError(err)
}
return trace.TraceError(err)
}
return nil
}
func (svc *MasterService) pingNodeClient(n interfaces.Node) (err error) {
if err := svc.server.SendStreamMessage("node:"+n.GetKey(), grpc.StreamMessageCode_PING); err != nil {
log.Errorf("cannot ping worker node client[%s]: %v", n.GetKey(), err)
if err := svc.setWorkerNodeOffline(n); err != nil {
return trace.TraceError(err)
}
return trace.TraceError(err)
}
return nil
}
func (svc *MasterService) updateNodeAvailableRunners(n interfaces.Node) (err error) {
query := bson.M{
"node_id": n.GetId(),
"status": constants.TaskStatusRunning,
}
runningTasksCount, err := mongo.GetMongoCol(interfaces.ModelColNameTask).Count(query)
if err != nil {
return trace.TraceError(err)
}
n.SetAvailableRunners(n.GetMaxRunners() - runningTasksCount)
return delegate.NewModelDelegate(n).Save()
}
func NewMasterService(opts ...Option) (res interfaces.NodeMasterService, err error) {
// master service
svc := &MasterService{
cfgPath: config2.GetConfigPath(),
monitorInterval: 15 * time.Second,
stopOnError: false,
}
// apply options
for _, opt := range opts {
opt(svc)
}
// server options
var serverOpts []server.Option
if svc.address != nil {
serverOpts = append(serverOpts, server.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
modelSvc service.ModelService,
server interfaces.GrpcServer,
schedulerSvc interfaces.TaskSchedulerService,
handlerSvc interfaces.TaskHandlerService,
scheduleSvc interfaces.ScheduleService,
spiderAdminSvc interfaces.SpiderAdminService,
) {
svc.cfgSvc = cfgSvc
svc.modelSvc = modelSvc
svc.server = server
svc.schedulerSvc = schedulerSvc
svc.handlerSvc = handlerSvc
svc.scheduleSvc = scheduleSvc
svc.spiderAdminSvc = spiderAdminSvc
}); err != nil {
return nil, err
}
// notification service
svc.notificationSvc = notification.GetService()
// system service
svc.systemSvc = system.GetService()
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
func ProvideMasterService(path string, opts ...Option) func() (interfaces.NodeMasterService, error) {
// path
if path == "" || path == config2.GetConfigPath() {
if viper.GetString("config.path") != "" {
path = viper.GetString("config.path")
} else {
path = config2.GetConfigPath()
}
}
opts = append(opts, WithConfigPath(path))
return func() (interfaces.NodeMasterService, error) {
return NewMasterService(opts...)
}
}

View File

@@ -0,0 +1,388 @@
package service
import (
"errors"
"github.com/apex/log"
"github.com/cenkalti/backoff/v4"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/server"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/common"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/notification"
"github.com/crawlab-team/crawlab/core/schedule"
"github.com/crawlab-team/crawlab/core/spider/admin"
"github.com/crawlab-team/crawlab/core/system"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/task/scheduler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
mongo2 "go.mongodb.org/mongo-driver/mongo"
"sync"
"time"
)
type MasterServiceV2 struct {
// dependencies
cfgSvc interfaces.NodeConfigService
server *server.GrpcServerV2
schedulerSvc *scheduler.ServiceV2
handlerSvc *handler.ServiceV2
scheduleSvc *schedule.ServiceV2
notificationSvc *notification.Service
spiderAdminSvc *admin.ServiceV2
systemSvc *system.Service
// settings
cfgPath string
address interfaces.Address
monitorInterval time.Duration
stopOnError bool
}
func (svc *MasterServiceV2) Init() (err error) {
// do nothing
return nil
}
func (svc *MasterServiceV2) Start() {
// create indexes
common.CreateIndexes()
// start grpc server
if err := svc.server.Start(); err != nil {
panic(err)
}
// register to db
if err := svc.Register(); err != nil {
panic(err)
}
// start monitoring worker nodes
go svc.Monitor()
// start task handler
go svc.handlerSvc.Start()
// start task scheduler
go svc.schedulerSvc.Start()
// start schedule service
go svc.scheduleSvc.Start()
// start notification service
go svc.notificationSvc.Start()
// start spider admin service
go svc.spiderAdminSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *MasterServiceV2) Wait() {
utils.DefaultWait()
}
func (svc *MasterServiceV2) Stop() {
_ = svc.server.Stop()
log.Infof("master[%s] service has stopped", svc.GetConfigService().GetNodeKey())
}
func (svc *MasterServiceV2) Monitor() {
log.Infof("master[%s] monitoring started", svc.GetConfigService().GetNodeKey())
for {
if err := svc.monitor(); err != nil {
trace.PrintError(err)
if svc.stopOnError {
log.Errorf("master[%s] monitor error, now stopping...", svc.GetConfigService().GetNodeKey())
svc.Stop()
return
}
}
time.Sleep(svc.monitorInterval)
}
}
func (svc *MasterServiceV2) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *MasterServiceV2) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *MasterServiceV2) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *MasterServiceV2) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *MasterServiceV2) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *MasterServiceV2) SetMonitorInterval(duration time.Duration) {
svc.monitorInterval = duration
}
func (svc *MasterServiceV2) Register() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
nodeName := svc.GetConfigService().GetNodeName()
node, err := service.NewModelServiceV2[models.NodeV2]().GetOne(bson.M{"key": nodeKey}, nil)
if err != nil && err.Error() == mongo2.ErrNoDocuments.Error() {
// not exists
log.Infof("master[%s] does not exist in db", nodeKey)
node := models.NodeV2{
Key: nodeKey,
Name: nodeName,
MaxRunners: config.DefaultConfigOptions.MaxRunners,
IsMaster: true,
Status: constants.NodeStatusOnline,
Enabled: true,
Active: true,
ActiveAt: time.Now(),
}
node.SetCreated(primitive.NilObjectID)
node.SetUpdated(primitive.NilObjectID)
id, err := service.NewModelServiceV2[models.NodeV2]().InsertOne(node)
if err != nil {
return err
}
log.Infof("added master[%s] in db. id: %s", nodeKey, id.Hex())
return nil
} else if err == nil {
// exists
log.Infof("master[%s] exists in db", nodeKey)
node.Status = constants.NodeStatusOnline
node.Active = true
node.ActiveAt = time.Now()
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
log.Infof("updated master[%s] in db. id: %s", nodeKey, node.Id.Hex())
return nil
} else {
// error
return err
}
}
func (svc *MasterServiceV2) StopOnError() {
svc.stopOnError = true
}
func (svc *MasterServiceV2) GetServer() (svr interfaces.GrpcServer) {
return svc.server
}
func (svc *MasterServiceV2) monitor() (err error) {
// update master node status in db
if err := svc.updateMasterNodeStatus(); err != nil {
if err.Error() == mongo2.ErrNoDocuments.Error() {
return nil
}
return err
}
// all worker nodes
workerNodes, err := svc.getAllWorkerNodes()
if err != nil {
return err
}
// iterate all worker nodes
wg := sync.WaitGroup{}
wg.Add(len(workerNodes))
for _, n := range workerNodes {
go func(n *models.NodeV2) {
// subscribe
ok := svc.subscribeNode(n)
if !ok {
go svc.setWorkerNodeOffline(n)
wg.Done()
return
}
// ping client
ok = svc.pingNodeClient(n)
if !ok {
go svc.setWorkerNodeOffline(n)
wg.Done()
return
}
// update node available runners
if err := svc.updateNodeAvailableRunners(n); err != nil {
trace.PrintError(err)
wg.Done()
return
}
// done
wg.Done()
}(&n)
}
wg.Wait()
return nil
}
func (svc *MasterServiceV2) getAllWorkerNodes() (nodes []models.NodeV2, err error) {
query := bson.M{
"key": bson.M{"$ne": svc.cfgSvc.GetNodeKey()}, // not self
"active": true, // active
}
nodes, err = service.NewModelServiceV2[models.NodeV2]().GetMany(query, nil)
if err != nil {
if errors.Is(err, mongo2.ErrNoDocuments) {
return nil, nil
}
return nil, trace.TraceError(err)
}
return nodes, nil
}
func (svc *MasterServiceV2) updateMasterNodeStatus() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
node, err := service.NewModelServiceV2[models.NodeV2]().GetOne(bson.M{"key": nodeKey}, nil)
if err != nil {
return err
}
node.Status = constants.NodeStatusOnline
node.Active = true
node.ActiveAt = time.Now()
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
return nil
}
func (svc *MasterServiceV2) setWorkerNodeOffline(node *models.NodeV2) {
node.Status = constants.NodeStatusOffline
node.Active = false
err := backoff.Retry(func() error {
return service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
}, backoff.WithMaxRetries(backoff.NewConstantBackOff(1*time.Second), 3))
if err != nil {
trace.PrintError(err)
}
}
func (svc *MasterServiceV2) subscribeNode(n *models.NodeV2) (ok bool) {
_, err := svc.server.GetSubscribe("node:" + n.Key)
if err != nil {
log.Errorf("cannot subscribe worker node[%s]: %v", n.Key, err)
return false
}
return true
}
func (svc *MasterServiceV2) pingNodeClient(n *models.NodeV2) (ok bool) {
if err := svc.server.SendStreamMessage("node:"+n.Key, grpc.StreamMessageCode_PING); err != nil {
log.Errorf("cannot ping worker node client[%s]: %v", n.Key, err)
return false
}
return true
}
func (svc *MasterServiceV2) updateNodeAvailableRunners(node *models.NodeV2) (err error) {
query := bson.M{
"node_id": node.Id,
"status": constants.TaskStatusRunning,
}
runningTasksCount, err := service.NewModelServiceV2[models.TaskV2]().Count(query)
if err != nil {
return trace.TraceError(err)
}
node.AvailableRunners = node.MaxRunners - runningTasksCount
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
return nil
}
func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
// master service
svc := &MasterServiceV2{
cfgPath: config2.GetConfigPath(),
monitorInterval: 15 * time.Second,
stopOnError: false,
}
// server options
var serverOpts []server.Option
if svc.address != nil {
serverOpts = append(serverOpts, server.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// grpc server
svc.server, err = server.GetGrpcServerV2()
if err != nil {
return nil, err
}
// scheduler service
svc.schedulerSvc, err = scheduler.GetTaskSchedulerServiceV2()
if err != nil {
return nil, err
}
// handler service
svc.handlerSvc, err = handler.GetTaskHandlerServiceV2()
if err != nil {
return nil, err
}
// schedule service
svc.scheduleSvc, err = schedule.GetScheduleServiceV2()
if err != nil {
return nil, err
}
// notification service
svc.notificationSvc = notification.GetService()
// spider admin service
svc.spiderAdminSvc, err = admin.GetSpiderAdminServiceV2()
if err != nil {
return nil, err
}
// system service
svc.systemSvc = system.GetService()
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}

View File

@@ -0,0 +1,47 @@
package service
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"time"
)
type Option func(svc interfaces.NodeService)
func WithConfigPath(path string) Option {
return func(svc interfaces.NodeService) {
svc.SetConfigPath(path)
}
}
func WithAddress(address interfaces.Address) Option {
return func(svc interfaces.NodeService) {
svc.SetAddress(address)
}
}
func WithMonitorInterval(duration time.Duration) Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeMasterService)
if ok {
svc2.SetMonitorInterval(duration)
}
}
}
func WithStopOnError() Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeMasterService)
if ok {
svc2.StopOnError()
}
}
}
func WithHeartbeatInterval(duration time.Duration) Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeWorkerService)
if ok {
svc2.SetHeartbeatInterval(duration)
}
}
}

View File

@@ -0,0 +1,238 @@
package service
import (
"context"
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"github.com/spf13/viper"
"time"
)
type WorkerService struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client interfaces.GrpcClient
handlerSvc interfaces.TaskHandlerService
// settings
cfgPath string
address interfaces.Address
heartbeatInterval time.Duration
// internals
n interfaces.Node
s grpc.NodeService_SubscribeClient
}
func (svc *WorkerService) Init() (err error) {
// do nothing
return nil
}
func (svc *WorkerService) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// register to master
svc.Register()
// start receiving stream messages
go svc.Recv()
// start sending heartbeat to master
go svc.ReportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerService) Wait() {
utils.DefaultWait()
}
func (svc *WorkerService) Stop() {
_ = svc.client.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerService) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.GetNodeClient().Register(ctx, req)
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
return
}
func (svc *WorkerService) Recv() {
msgCh := svc.client.GetMessageChannel()
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// receive message from channel
msg := <-msgCh
// handle message
if err := svc.handleStreamMessage(msg); err != nil {
continue
}
}
}
func (svc *WorkerService) handleStreamMessage(msg *grpc.StreamMessage) (err error) {
log.Debugf("[WorkerService] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.GetNodeClient().SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Run(t.Id); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_CANCEL_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Cancel(t.Id); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *WorkerService) ReportStatus() {
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// report status
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
}
}
func (svc *WorkerService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerService) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *WorkerService) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *WorkerService) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *WorkerService) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *WorkerService) SetHeartbeatInterval(duration time.Duration) {
svc.heartbeatInterval = duration
}
func (svc *WorkerService) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.GetNodeClient().SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerService(opts ...Option) (res *WorkerService, err error) {
svc := &WorkerService{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// apply options
for _, opt := range opts {
opt(svc)
}
// dependency options
var clientOpts []client.Option
if svc.address != nil {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
client interfaces.GrpcClient,
taskHandlerSvc interfaces.TaskHandlerService,
) {
svc.cfgSvc = cfgSvc
svc.client = client
svc.handlerSvc = taskHandlerSvc
}); err != nil {
return nil, err
}
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
func ProvideWorkerService(path string, opts ...Option) func() (interfaces.NodeWorkerService, error) {
// path
if path == "" || path == config2.GetConfigPath() {
if viper.GetString("config.path") != "" {
path = viper.GetString("config.path")
} else {
path = config2.GetConfigPath()
}
}
opts = append(opts, WithConfigPath(path))
return func() (interfaces.NodeWorkerService, error) {
return NewWorkerService(opts...)
}
}

View File

@@ -0,0 +1,225 @@
package service
import (
"context"
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"time"
)
type WorkerServiceV2 struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client *client.GrpcClientV2
handlerSvc *handler.ServiceV2
// settings
cfgPath string
address interfaces.Address
heartbeatInterval time.Duration
// internals
n interfaces.Node
s grpc.NodeService_SubscribeClient
}
func (svc *WorkerServiceV2) Init() (err error) {
// do nothing
return nil
}
func (svc *WorkerServiceV2) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// register to master
svc.Register()
// start receiving stream messages
go svc.Recv()
// start sending heartbeat to master
go svc.ReportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerServiceV2) Wait() {
utils.DefaultWait()
}
func (svc *WorkerServiceV2) Stop() {
_ = svc.client.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerServiceV2) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.NodeClient.Register(ctx, req)
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
return
}
func (svc *WorkerServiceV2) Recv() {
msgCh := svc.client.GetMessageChannel()
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// receive message from channel
msg := <-msgCh
// handle message
if err := svc.handleStreamMessage(msg); err != nil {
continue
}
}
}
func (svc *WorkerServiceV2) handleStreamMessage(msg *grpc.StreamMessage) (err error) {
log.Debugf("[WorkerServiceV2] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.NodeClient.SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Run(t.Id); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_CANCEL_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Cancel(t.Id); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *WorkerServiceV2) ReportStatus() {
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// report status
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
}
}
func (svc *WorkerServiceV2) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerServiceV2) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *WorkerServiceV2) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *WorkerServiceV2) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *WorkerServiceV2) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *WorkerServiceV2) SetHeartbeatInterval(duration time.Duration) {
svc.heartbeatInterval = duration
}
func (svc *WorkerServiceV2) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerServiceV2() (res *WorkerServiceV2, err error) {
svc := &WorkerServiceV2{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// dependency options
var clientOpts []client.Option
if svc.address != nil {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// grpc client
svc.client, err = client.NewGrpcClientV2()
if err != nil {
return nil, err
}
// handler service
svc.handlerSvc, err = handler.GetTaskHandlerServiceV2()
if err != nil {
return nil, err
}
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}