feat: added modules

This commit is contained in:
Marvin Zhang
2024-06-14 15:42:50 +08:00
parent 4d0adcb6f0
commit c4d795f47f
626 changed files with 60104 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
package config
import (
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/spf13/viper"
)
type Config entity.NodeInfo
type Options struct {
Key string
Name string
IsMaster bool
AuthKey string
MaxRunners int
}
var DefaultMaxRunner = 8
var DefaultConfigOptions = &Options{
Key: utils.NewUUIDString(),
IsMaster: utils.IsMaster(),
AuthKey: constants.DefaultGrpcAuthKey,
MaxRunners: 0,
}
func NewConfig(opts *Options) (cfg *Config) {
if opts == nil {
opts = DefaultConfigOptions
}
if opts.Key == "" {
if viper.GetString("node.key") != "" {
opts.Key = viper.GetString("node.key")
} else {
opts.Key = utils.NewUUIDString()
}
}
if opts.Name == "" {
if viper.GetString("node.name") != "" {
opts.Name = viper.GetString("node.name")
} else {
opts.Name = opts.Key
}
}
if opts.AuthKey == "" {
if viper.GetString("grpc.authKey") != "" {
opts.AuthKey = viper.GetString("grpc.authKey")
} else {
opts.AuthKey = constants.DefaultGrpcAuthKey
}
}
if opts.MaxRunners == 0 {
if viper.GetInt("task.handler.maxRunners") != 0 {
opts.MaxRunners = viper.GetInt("task.handler.maxRunners")
} else {
opts.MaxRunners = DefaultMaxRunner
}
}
return &Config{
Key: opts.Key,
Name: opts.Name,
IsMaster: opts.IsMaster,
AuthKey: opts.AuthKey,
MaxRunners: opts.MaxRunners,
}
}

View File

@@ -0,0 +1,130 @@
package config
import (
"encoding/json"
"github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/go-trace"
"os"
"path"
)
type Service struct {
cfg *Config
path string
}
func (svc *Service) Init() (err error) {
// check config directory path
configDirPath := path.Dir(svc.path)
if !utils.Exists(configDirPath) {
if err := os.MkdirAll(configDirPath, os.FileMode(0766)); err != nil {
return trace.TraceError(err)
}
}
if !utils.Exists(svc.path) {
// not exists, set to default config
// and create a config file for persistence
svc.cfg = NewConfig(nil)
data, err := json.Marshal(svc.cfg)
if err != nil {
return trace.TraceError(err)
}
if err := os.WriteFile(svc.path, data, os.FileMode(0766)); err != nil {
return trace.TraceError(err)
}
} else {
// exists, read and set to config
data, err := os.ReadFile(svc.path)
if err != nil {
return trace.TraceError(err)
}
if err := json.Unmarshal(data, svc.cfg); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *Service) Reload() (err error) {
return svc.Init()
}
func (svc *Service) GetBasicNodeInfo() (res interfaces.Entity) {
return &entity.NodeInfo{
Key: svc.GetNodeKey(),
Name: svc.GetNodeName(),
IsMaster: svc.IsMaster(),
AuthKey: svc.GetAuthKey(),
MaxRunners: svc.GetMaxRunners(),
}
}
func (svc *Service) GetNodeKey() (res string) {
return svc.cfg.Key
}
func (svc *Service) GetNodeName() (res string) {
return svc.cfg.Name
}
func (svc *Service) IsMaster() (res bool) {
return svc.cfg.IsMaster
}
func (svc *Service) GetAuthKey() (res string) {
return svc.cfg.AuthKey
}
func (svc *Service) GetMaxRunners() (res int) {
return svc.cfg.MaxRunners
}
func (svc *Service) GetConfigPath() (path string) {
return svc.path
}
func (svc *Service) SetConfigPath(path string) {
svc.path = path
}
func NewNodeConfigService() (svc2 interfaces.NodeConfigService, err error) {
// cfg
cfg := NewConfig(nil)
// config service
svc := &Service{
cfg: cfg,
}
// normalize config path
cfgPath := config.GetConfigPath()
svc.SetConfigPath(cfgPath)
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
var _service interfaces.NodeConfigService
func GetNodeConfigService() interfaces.NodeConfigService {
if _service != nil {
return _service
}
var err error
_service, err = NewNodeConfigService()
if err != nil {
panic(err)
}
return _service
}

View File

@@ -0,0 +1,13 @@
package config
import (
"github.com/crawlab-team/crawlab/core/interfaces"
)
type Option func(svc interfaces.NodeConfigService)
func WithConfigPath(path string) Option {
return func(svc interfaces.NodeConfigService) {
svc.SetConfigPath(path)
}
}

View File

@@ -0,0 +1,368 @@
package service
import (
"github.com/apex/log"
"github.com/crawlab-team/crawlab-db/mongo"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/errors"
"github.com/crawlab-team/crawlab/core/grpc/server"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/common"
"github.com/crawlab-team/crawlab/core/models/delegate"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/notification"
"github.com/crawlab-team/crawlab/core/system"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"github.com/spf13/viper"
"go.mongodb.org/mongo-driver/bson"
mongo2 "go.mongodb.org/mongo-driver/mongo"
"time"
)
type MasterService struct {
// dependencies
modelSvc service.ModelService
cfgSvc interfaces.NodeConfigService
server interfaces.GrpcServer
schedulerSvc interfaces.TaskSchedulerService
handlerSvc interfaces.TaskHandlerService
scheduleSvc interfaces.ScheduleService
notificationSvc *notification.Service
spiderAdminSvc interfaces.SpiderAdminService
systemSvc *system.Service
// settings
cfgPath string
address interfaces.Address
monitorInterval time.Duration
stopOnError bool
}
func (svc *MasterService) Init() (err error) {
// do nothing
return nil
}
func (svc *MasterService) Start() {
// create indexes
common.CreateIndexes()
// start grpc server
if err := svc.server.Start(); err != nil {
panic(err)
}
// register to db
if err := svc.Register(); err != nil {
panic(err)
}
// start monitoring worker nodes
go svc.Monitor()
// start task handler
go svc.handlerSvc.Start()
// start task scheduler
go svc.schedulerSvc.Start()
// start schedule service
go svc.scheduleSvc.Start()
// start notification service
go svc.notificationSvc.Start()
// start spider admin service
go svc.spiderAdminSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *MasterService) Wait() {
utils.DefaultWait()
}
func (svc *MasterService) Stop() {
_ = svc.server.Stop()
log.Infof("master[%s] service has stopped", svc.GetConfigService().GetNodeKey())
}
func (svc *MasterService) Monitor() {
log.Infof("master[%s] monitoring started", svc.GetConfigService().GetNodeKey())
for {
if err := svc.monitor(); err != nil {
trace.PrintError(err)
if svc.stopOnError {
log.Errorf("master[%s] monitor error, now stopping...", svc.GetConfigService().GetNodeKey())
svc.Stop()
return
}
}
time.Sleep(svc.monitorInterval)
}
}
func (svc *MasterService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *MasterService) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *MasterService) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *MasterService) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *MasterService) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *MasterService) SetMonitorInterval(duration time.Duration) {
svc.monitorInterval = duration
}
func (svc *MasterService) Register() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
nodeName := svc.GetConfigService().GetNodeName()
node, err := svc.modelSvc.GetNodeByKey(nodeKey, nil)
if err != nil && err.Error() == mongo2.ErrNoDocuments.Error() {
// not exists
log.Infof("master[%s] does not exist in db", nodeKey)
node := &models.Node{
Key: nodeKey,
Name: nodeName,
MaxRunners: config.DefaultConfigOptions.MaxRunners,
IsMaster: true,
Status: constants.NodeStatusOnline,
Enabled: true,
Active: true,
ActiveTs: time.Now(),
}
if viper.GetInt("task.handler.maxRunners") > 0 {
node.MaxRunners = viper.GetInt("task.handler.maxRunners")
}
nodeD := delegate.NewModelNodeDelegate(node)
if err := nodeD.Add(); err != nil {
return err
}
log.Infof("added master[%s] in db. id: %s", nodeKey, nodeD.GetModel().GetId().Hex())
return nil
} else if err == nil {
// exists
log.Infof("master[%s] exists in db", nodeKey)
nodeD := delegate.NewModelNodeDelegate(node)
if err := nodeD.UpdateStatusOnline(); err != nil {
return err
}
log.Infof("updated master[%s] in db. id: %s", nodeKey, nodeD.GetModel().GetId().Hex())
return nil
} else {
// error
return err
}
}
func (svc *MasterService) StopOnError() {
svc.stopOnError = true
}
func (svc *MasterService) GetServer() (svr interfaces.GrpcServer) {
return svc.server
}
func (svc *MasterService) monitor() (err error) {
// update master node status in db
if err := svc.updateMasterNodeStatus(); err != nil {
if err.Error() == mongo2.ErrNoDocuments.Error() {
return nil
}
return err
}
// all worker nodes
nodes, err := svc.getAllWorkerNodes()
if err != nil {
return err
}
// error flag
isErr := false
// iterate all nodes
for _, n := range nodes {
// subscribe
if err := svc.subscribeNode(&n); err != nil {
isErr = true
continue
}
// ping client
if err := svc.pingNodeClient(&n); err != nil {
isErr = true
continue
}
// update node available runners
if err := svc.updateNodeAvailableRunners(&n); err != nil {
isErr = true
continue
}
}
if isErr {
return trace.TraceError(errors.ErrorNodeMonitorError)
}
return nil
}
func (svc *MasterService) getAllWorkerNodes() (nodes []models.Node, err error) {
query := bson.M{
"key": bson.M{"$ne": svc.cfgSvc.GetNodeKey()}, // not self
"active": true, // active
}
nodes, err = svc.modelSvc.GetNodeList(query, nil)
if err != nil {
if err == mongo2.ErrNoDocuments {
return nil, nil
}
return nil, trace.TraceError(err)
}
return nodes, nil
}
func (svc *MasterService) updateMasterNodeStatus() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
node, err := svc.modelSvc.GetNodeByKey(nodeKey, nil)
if err != nil {
return err
}
nodeD := delegate.NewModelNodeDelegate(node)
return nodeD.UpdateStatusOnline()
}
func (svc *MasterService) setWorkerNodeOffline(n interfaces.Node) (err error) {
return delegate.NewModelNodeDelegate(n).UpdateStatusOffline()
}
func (svc *MasterService) subscribeNode(n interfaces.Node) (err error) {
_, err = svc.server.GetSubscribe("node:" + n.GetKey())
if err != nil {
log.Errorf("cannot subscribe worker node[%s]: %v", n.GetKey(), err)
if err := svc.setWorkerNodeOffline(n); err != nil {
return trace.TraceError(err)
}
return trace.TraceError(err)
}
return nil
}
func (svc *MasterService) pingNodeClient(n interfaces.Node) (err error) {
if err := svc.server.SendStreamMessage("node:"+n.GetKey(), grpc.StreamMessageCode_PING); err != nil {
log.Errorf("cannot ping worker node client[%s]: %v", n.GetKey(), err)
if err := svc.setWorkerNodeOffline(n); err != nil {
return trace.TraceError(err)
}
return trace.TraceError(err)
}
return nil
}
func (svc *MasterService) updateNodeAvailableRunners(n interfaces.Node) (err error) {
query := bson.M{
"node_id": n.GetId(),
"status": constants.TaskStatusRunning,
}
runningTasksCount, err := mongo.GetMongoCol(interfaces.ModelColNameTask).Count(query)
if err != nil {
return trace.TraceError(err)
}
n.SetAvailableRunners(n.GetMaxRunners() - runningTasksCount)
return delegate.NewModelDelegate(n).Save()
}
func NewMasterService(opts ...Option) (res interfaces.NodeMasterService, err error) {
// master service
svc := &MasterService{
cfgPath: config2.GetConfigPath(),
monitorInterval: 15 * time.Second,
stopOnError: false,
}
// apply options
for _, opt := range opts {
opt(svc)
}
// server options
var serverOpts []server.Option
if svc.address != nil {
serverOpts = append(serverOpts, server.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
modelSvc service.ModelService,
server interfaces.GrpcServer,
schedulerSvc interfaces.TaskSchedulerService,
handlerSvc interfaces.TaskHandlerService,
scheduleSvc interfaces.ScheduleService,
spiderAdminSvc interfaces.SpiderAdminService,
) {
svc.cfgSvc = cfgSvc
svc.modelSvc = modelSvc
svc.server = server
svc.schedulerSvc = schedulerSvc
svc.handlerSvc = handlerSvc
svc.scheduleSvc = scheduleSvc
svc.spiderAdminSvc = spiderAdminSvc
}); err != nil {
return nil, err
}
// notification service
svc.notificationSvc = notification.GetService()
// system service
svc.systemSvc = system.GetService()
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
func ProvideMasterService(path string, opts ...Option) func() (interfaces.NodeMasterService, error) {
// path
if path == "" || path == config2.GetConfigPath() {
if viper.GetString("config.path") != "" {
path = viper.GetString("config.path")
} else {
path = config2.GetConfigPath()
}
}
opts = append(opts, WithConfigPath(path))
return func() (interfaces.NodeMasterService, error) {
return NewMasterService(opts...)
}
}

View File

@@ -0,0 +1,388 @@
package service
import (
"errors"
"github.com/apex/log"
"github.com/cenkalti/backoff/v4"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/server"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/common"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/notification"
"github.com/crawlab-team/crawlab/core/schedule"
"github.com/crawlab-team/crawlab/core/spider/admin"
"github.com/crawlab-team/crawlab/core/system"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/task/scheduler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
mongo2 "go.mongodb.org/mongo-driver/mongo"
"sync"
"time"
)
type MasterServiceV2 struct {
// dependencies
cfgSvc interfaces.NodeConfigService
server *server.GrpcServerV2
schedulerSvc *scheduler.ServiceV2
handlerSvc *handler.ServiceV2
scheduleSvc *schedule.ServiceV2
notificationSvc *notification.Service
spiderAdminSvc *admin.ServiceV2
systemSvc *system.Service
// settings
cfgPath string
address interfaces.Address
monitorInterval time.Duration
stopOnError bool
}
func (svc *MasterServiceV2) Init() (err error) {
// do nothing
return nil
}
func (svc *MasterServiceV2) Start() {
// create indexes
common.CreateIndexes()
// start grpc server
if err := svc.server.Start(); err != nil {
panic(err)
}
// register to db
if err := svc.Register(); err != nil {
panic(err)
}
// start monitoring worker nodes
go svc.Monitor()
// start task handler
go svc.handlerSvc.Start()
// start task scheduler
go svc.schedulerSvc.Start()
// start schedule service
go svc.scheduleSvc.Start()
// start notification service
go svc.notificationSvc.Start()
// start spider admin service
go svc.spiderAdminSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *MasterServiceV2) Wait() {
utils.DefaultWait()
}
func (svc *MasterServiceV2) Stop() {
_ = svc.server.Stop()
log.Infof("master[%s] service has stopped", svc.GetConfigService().GetNodeKey())
}
func (svc *MasterServiceV2) Monitor() {
log.Infof("master[%s] monitoring started", svc.GetConfigService().GetNodeKey())
for {
if err := svc.monitor(); err != nil {
trace.PrintError(err)
if svc.stopOnError {
log.Errorf("master[%s] monitor error, now stopping...", svc.GetConfigService().GetNodeKey())
svc.Stop()
return
}
}
time.Sleep(svc.monitorInterval)
}
}
func (svc *MasterServiceV2) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *MasterServiceV2) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *MasterServiceV2) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *MasterServiceV2) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *MasterServiceV2) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *MasterServiceV2) SetMonitorInterval(duration time.Duration) {
svc.monitorInterval = duration
}
func (svc *MasterServiceV2) Register() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
nodeName := svc.GetConfigService().GetNodeName()
node, err := service.NewModelServiceV2[models.NodeV2]().GetOne(bson.M{"key": nodeKey}, nil)
if err != nil && err.Error() == mongo2.ErrNoDocuments.Error() {
// not exists
log.Infof("master[%s] does not exist in db", nodeKey)
node := models.NodeV2{
Key: nodeKey,
Name: nodeName,
MaxRunners: config.DefaultConfigOptions.MaxRunners,
IsMaster: true,
Status: constants.NodeStatusOnline,
Enabled: true,
Active: true,
ActiveAt: time.Now(),
}
node.SetCreated(primitive.NilObjectID)
node.SetUpdated(primitive.NilObjectID)
id, err := service.NewModelServiceV2[models.NodeV2]().InsertOne(node)
if err != nil {
return err
}
log.Infof("added master[%s] in db. id: %s", nodeKey, id.Hex())
return nil
} else if err == nil {
// exists
log.Infof("master[%s] exists in db", nodeKey)
node.Status = constants.NodeStatusOnline
node.Active = true
node.ActiveAt = time.Now()
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
log.Infof("updated master[%s] in db. id: %s", nodeKey, node.Id.Hex())
return nil
} else {
// error
return err
}
}
func (svc *MasterServiceV2) StopOnError() {
svc.stopOnError = true
}
func (svc *MasterServiceV2) GetServer() (svr interfaces.GrpcServer) {
return svc.server
}
func (svc *MasterServiceV2) monitor() (err error) {
// update master node status in db
if err := svc.updateMasterNodeStatus(); err != nil {
if err.Error() == mongo2.ErrNoDocuments.Error() {
return nil
}
return err
}
// all worker nodes
workerNodes, err := svc.getAllWorkerNodes()
if err != nil {
return err
}
// iterate all worker nodes
wg := sync.WaitGroup{}
wg.Add(len(workerNodes))
for _, n := range workerNodes {
go func(n *models.NodeV2) {
// subscribe
ok := svc.subscribeNode(n)
if !ok {
go svc.setWorkerNodeOffline(n)
wg.Done()
return
}
// ping client
ok = svc.pingNodeClient(n)
if !ok {
go svc.setWorkerNodeOffline(n)
wg.Done()
return
}
// update node available runners
if err := svc.updateNodeAvailableRunners(n); err != nil {
trace.PrintError(err)
wg.Done()
return
}
// done
wg.Done()
}(&n)
}
wg.Wait()
return nil
}
func (svc *MasterServiceV2) getAllWorkerNodes() (nodes []models.NodeV2, err error) {
query := bson.M{
"key": bson.M{"$ne": svc.cfgSvc.GetNodeKey()}, // not self
"active": true, // active
}
nodes, err = service.NewModelServiceV2[models.NodeV2]().GetMany(query, nil)
if err != nil {
if errors.Is(err, mongo2.ErrNoDocuments) {
return nil, nil
}
return nil, trace.TraceError(err)
}
return nodes, nil
}
func (svc *MasterServiceV2) updateMasterNodeStatus() (err error) {
nodeKey := svc.GetConfigService().GetNodeKey()
node, err := service.NewModelServiceV2[models.NodeV2]().GetOne(bson.M{"key": nodeKey}, nil)
if err != nil {
return err
}
node.Status = constants.NodeStatusOnline
node.Active = true
node.ActiveAt = time.Now()
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
return nil
}
func (svc *MasterServiceV2) setWorkerNodeOffline(node *models.NodeV2) {
node.Status = constants.NodeStatusOffline
node.Active = false
err := backoff.Retry(func() error {
return service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
}, backoff.WithMaxRetries(backoff.NewConstantBackOff(1*time.Second), 3))
if err != nil {
trace.PrintError(err)
}
}
func (svc *MasterServiceV2) subscribeNode(n *models.NodeV2) (ok bool) {
_, err := svc.server.GetSubscribe("node:" + n.Key)
if err != nil {
log.Errorf("cannot subscribe worker node[%s]: %v", n.Key, err)
return false
}
return true
}
func (svc *MasterServiceV2) pingNodeClient(n *models.NodeV2) (ok bool) {
if err := svc.server.SendStreamMessage("node:"+n.Key, grpc.StreamMessageCode_PING); err != nil {
log.Errorf("cannot ping worker node client[%s]: %v", n.Key, err)
return false
}
return true
}
func (svc *MasterServiceV2) updateNodeAvailableRunners(node *models.NodeV2) (err error) {
query := bson.M{
"node_id": node.Id,
"status": constants.TaskStatusRunning,
}
runningTasksCount, err := service.NewModelServiceV2[models.TaskV2]().Count(query)
if err != nil {
return trace.TraceError(err)
}
node.AvailableRunners = node.MaxRunners - runningTasksCount
err = service.NewModelServiceV2[models.NodeV2]().ReplaceById(node.Id, *node)
if err != nil {
return err
}
return nil
}
func NewMasterServiceV2() (res interfaces.NodeMasterService, err error) {
// master service
svc := &MasterServiceV2{
cfgPath: config2.GetConfigPath(),
monitorInterval: 15 * time.Second,
stopOnError: false,
}
// server options
var serverOpts []server.Option
if svc.address != nil {
serverOpts = append(serverOpts, server.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// grpc server
svc.server, err = server.GetGrpcServerV2()
if err != nil {
return nil, err
}
// scheduler service
svc.schedulerSvc, err = scheduler.GetTaskSchedulerServiceV2()
if err != nil {
return nil, err
}
// handler service
svc.handlerSvc, err = handler.GetTaskHandlerServiceV2()
if err != nil {
return nil, err
}
// schedule service
svc.scheduleSvc, err = schedule.GetScheduleServiceV2()
if err != nil {
return nil, err
}
// notification service
svc.notificationSvc = notification.GetService()
// spider admin service
svc.spiderAdminSvc, err = admin.GetSpiderAdminServiceV2()
if err != nil {
return nil, err
}
// system service
svc.systemSvc = system.GetService()
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}

View File

@@ -0,0 +1,47 @@
package service
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"time"
)
type Option func(svc interfaces.NodeService)
func WithConfigPath(path string) Option {
return func(svc interfaces.NodeService) {
svc.SetConfigPath(path)
}
}
func WithAddress(address interfaces.Address) Option {
return func(svc interfaces.NodeService) {
svc.SetAddress(address)
}
}
func WithMonitorInterval(duration time.Duration) Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeMasterService)
if ok {
svc2.SetMonitorInterval(duration)
}
}
}
func WithStopOnError() Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeMasterService)
if ok {
svc2.StopOnError()
}
}
}
func WithHeartbeatInterval(duration time.Duration) Option {
return func(svc interfaces.NodeService) {
svc2, ok := svc.(interfaces.NodeWorkerService)
if ok {
svc2.SetHeartbeatInterval(duration)
}
}
}

View File

@@ -0,0 +1,238 @@
package service
import (
"context"
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"github.com/spf13/viper"
"time"
)
type WorkerService struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client interfaces.GrpcClient
handlerSvc interfaces.TaskHandlerService
// settings
cfgPath string
address interfaces.Address
heartbeatInterval time.Duration
// internals
n interfaces.Node
s grpc.NodeService_SubscribeClient
}
func (svc *WorkerService) Init() (err error) {
// do nothing
return nil
}
func (svc *WorkerService) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// register to master
svc.Register()
// start receiving stream messages
go svc.Recv()
// start sending heartbeat to master
go svc.ReportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerService) Wait() {
utils.DefaultWait()
}
func (svc *WorkerService) Stop() {
_ = svc.client.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerService) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.GetNodeClient().Register(ctx, req)
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
return
}
func (svc *WorkerService) Recv() {
msgCh := svc.client.GetMessageChannel()
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// receive message from channel
msg := <-msgCh
// handle message
if err := svc.handleStreamMessage(msg); err != nil {
continue
}
}
}
func (svc *WorkerService) handleStreamMessage(msg *grpc.StreamMessage) (err error) {
log.Debugf("[WorkerService] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.GetNodeClient().SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Run(t.Id); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_CANCEL_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Cancel(t.Id); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *WorkerService) ReportStatus() {
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// report status
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
}
}
func (svc *WorkerService) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerService) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *WorkerService) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *WorkerService) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *WorkerService) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *WorkerService) SetHeartbeatInterval(duration time.Duration) {
svc.heartbeatInterval = duration
}
func (svc *WorkerService) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.GetNodeClient().SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerService(opts ...Option) (res *WorkerService, err error) {
svc := &WorkerService{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// apply options
for _, opt := range opts {
opt(svc)
}
// dependency options
var clientOpts []client.Option
if svc.address != nil {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
client interfaces.GrpcClient,
taskHandlerSvc interfaces.TaskHandlerService,
) {
svc.cfgSvc = cfgSvc
svc.client = client
svc.handlerSvc = taskHandlerSvc
}); err != nil {
return nil, err
}
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
func ProvideWorkerService(path string, opts ...Option) func() (interfaces.NodeWorkerService, error) {
// path
if path == "" || path == config2.GetConfigPath() {
if viper.GetString("config.path") != "" {
path = viper.GetString("config.path")
} else {
path = config2.GetConfigPath()
}
}
opts = append(opts, WithConfigPath(path))
return func() (interfaces.NodeWorkerService, error) {
return NewWorkerService(opts...)
}
}

View File

@@ -0,0 +1,225 @@
package service
import (
"context"
"encoding/json"
"github.com/apex/log"
config2 "github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/task/handler"
"github.com/crawlab-team/crawlab/core/utils"
grpc "github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/go-trace"
"time"
)
type WorkerServiceV2 struct {
// dependencies
cfgSvc interfaces.NodeConfigService
client *client.GrpcClientV2
handlerSvc *handler.ServiceV2
// settings
cfgPath string
address interfaces.Address
heartbeatInterval time.Duration
// internals
n interfaces.Node
s grpc.NodeService_SubscribeClient
}
func (svc *WorkerServiceV2) Init() (err error) {
// do nothing
return nil
}
func (svc *WorkerServiceV2) Start() {
// start grpc client
if err := svc.client.Start(); err != nil {
panic(err)
}
// register to master
svc.Register()
// start receiving stream messages
go svc.Recv()
// start sending heartbeat to master
go svc.ReportStatus()
// start handler
go svc.handlerSvc.Start()
// wait for quit signal
svc.Wait()
// stop
svc.Stop()
}
func (svc *WorkerServiceV2) Wait() {
utils.DefaultWait()
}
func (svc *WorkerServiceV2) Stop() {
_ = svc.client.Stop()
log.Infof("worker[%s] service has stopped", svc.cfgSvc.GetNodeKey())
}
func (svc *WorkerServiceV2) Register() {
ctx, cancel := svc.client.Context()
defer cancel()
req := svc.client.NewRequest(svc.GetConfigService().GetBasicNodeInfo())
res, err := svc.client.NodeClient.Register(ctx, req)
if err != nil {
panic(err)
}
if err := json.Unmarshal(res.Data, svc.n); err != nil {
panic(err)
}
log.Infof("worker[%s] registered to master. id: %s", svc.GetConfigService().GetNodeKey(), svc.n.GetId().Hex())
return
}
func (svc *WorkerServiceV2) Recv() {
msgCh := svc.client.GetMessageChannel()
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// receive message from channel
msg := <-msgCh
// handle message
if err := svc.handleStreamMessage(msg); err != nil {
continue
}
}
}
func (svc *WorkerServiceV2) handleStreamMessage(msg *grpc.StreamMessage) (err error) {
log.Debugf("[WorkerServiceV2] handle msg: %v", msg)
switch msg.Code {
case grpc.StreamMessageCode_PING:
if _, err := svc.client.NodeClient.SendHeartbeat(context.Background(), svc.client.NewRequest(svc.cfgSvc.GetBasicNodeInfo())); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_RUN_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Run(t.Id); err != nil {
return trace.TraceError(err)
}
case grpc.StreamMessageCode_CANCEL_TASK:
var t models.Task
if err := json.Unmarshal(msg.Data, &t); err != nil {
return trace.TraceError(err)
}
if err := svc.handlerSvc.Cancel(t.Id); err != nil {
return trace.TraceError(err)
}
}
return nil
}
func (svc *WorkerServiceV2) ReportStatus() {
for {
// return if client is closed
if svc.client.IsClosed() {
return
}
// report status
svc.reportStatus()
// sleep
time.Sleep(svc.heartbeatInterval)
}
}
func (svc *WorkerServiceV2) GetConfigService() (cfgSvc interfaces.NodeConfigService) {
return svc.cfgSvc
}
func (svc *WorkerServiceV2) GetConfigPath() (path string) {
return svc.cfgPath
}
func (svc *WorkerServiceV2) SetConfigPath(path string) {
svc.cfgPath = path
}
func (svc *WorkerServiceV2) GetAddress() (address interfaces.Address) {
return svc.address
}
func (svc *WorkerServiceV2) SetAddress(address interfaces.Address) {
svc.address = address
}
func (svc *WorkerServiceV2) SetHeartbeatInterval(duration time.Duration) {
svc.heartbeatInterval = duration
}
func (svc *WorkerServiceV2) reportStatus() {
ctx, cancel := context.WithTimeout(context.Background(), svc.heartbeatInterval)
defer cancel()
_, err := svc.client.NodeClient.SendHeartbeat(ctx, &grpc.Request{
NodeKey: svc.cfgSvc.GetNodeKey(),
})
if err != nil {
trace.PrintError(err)
}
}
func NewWorkerServiceV2() (res *WorkerServiceV2, err error) {
svc := &WorkerServiceV2{
cfgPath: config2.GetConfigPath(),
heartbeatInterval: 15 * time.Second,
n: &models.Node{},
}
// dependency options
var clientOpts []client.Option
if svc.address != nil {
clientOpts = append(clientOpts, client.WithAddress(svc.address))
}
// dependency injection
if err := container.GetContainer().Invoke(func(
cfgSvc interfaces.NodeConfigService,
) {
svc.cfgSvc = cfgSvc
}); err != nil {
return nil, err
}
// grpc client
svc.client, err = client.NewGrpcClientV2()
if err != nil {
return nil, err
}
// handler service
svc.handlerSvc, err = handler.GetTaskHandlerServiceV2()
if err != nil {
return nil, err
}
// init
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}

206
core/node/test/base.go Normal file
View File

@@ -0,0 +1,206 @@
package test
import (
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/interfaces"
service2 "github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/node/service"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/spf13/viper"
"go.uber.org/dig"
"io/ioutil"
"os"
"path"
"testing"
"time"
)
func init() {
var err error
T, err = NewTest()
if err != nil {
panic(err)
}
}
var T *Test
type Test struct {
DefaultSvc interfaces.NodeMasterService
MasterSvc interfaces.NodeMasterService
WorkerSvc interfaces.NodeWorkerService
MasterSvcMonitor interfaces.NodeMasterService
WorkerSvcMonitor interfaces.NodeWorkerService
ModelSvc service2.ModelService
}
func NewTest() (res *Test, err error) {
// test
t := &Test{}
// recreate config directory path
_ = os.RemoveAll(viper.GetString("metadata"))
_ = os.MkdirAll(viper.GetString("metadata"), os.FileMode(0766))
// master config and settings
masterNodeConfigName := "config-master.json"
masterNodeConfigPath := path.Join(viper.GetString("metadata"), masterNodeConfigName)
if err := ioutil.WriteFile(masterNodeConfigPath, []byte("{\"key\":\"master\",\"is_master\":true}"), os.FileMode(0766)); err != nil {
return nil, err
}
masterHost := "0.0.0.0"
masterPort := "9667"
// worker config and settings
workerNodeConfigName := "config-worker.json"
workerNodeConfigPath := path.Join(viper.GetString("metadata"), workerNodeConfigName)
if err = ioutil.WriteFile(workerNodeConfigPath, []byte("{\"key\":\"worker\",\"is_master\":false}"), os.FileMode(0766)); err != nil {
return nil, err
}
workerHost := "localhost"
workerPort := masterPort
// master for monitor config and settings
masterNodeMonitorConfigName := "config-master-monitor.json"
masterNodeMonitorConfigPath := path.Join(viper.GetString("metadata"), masterNodeMonitorConfigName)
if err := ioutil.WriteFile(masterNodeMonitorConfigPath, []byte("{\"key\":\"master-monitor\",\"is_master\":true}"), os.FileMode(0766)); err != nil {
return nil, err
}
masterMonitorHost := masterHost
masterMonitorPort := "9668"
// worker for monitor config and settings
workerNodeMonitorConfigName := "config-worker-monitor.json"
workerNodeMonitorConfigPath := path.Join(viper.GetString("metadata"), workerNodeMonitorConfigName)
if err := ioutil.WriteFile(workerNodeMonitorConfigPath, []byte("{\"key\":\"worker-monitor\",\"is_master\":false}"), os.FileMode(0766)); err != nil {
return nil, err
}
workerMonitorHost := workerHost
workerMonitorPort := masterMonitorPort
// dependency injection
c := dig.New()
if err := c.Provide(service.ProvideMasterService(
masterNodeConfigPath,
service.WithMonitorInterval(3*time.Second),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: masterHost,
Port: masterPort,
})),
)); err != nil {
return nil, err
}
if err := c.Provide(service.ProvideWorkerService(
workerNodeConfigPath,
service.WithHeartbeatInterval(1*time.Second),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: workerHost,
Port: workerPort,
})),
)); err != nil {
return nil, err
}
if err := c.Provide(service2.NewService); err != nil {
return nil, err
}
if err := c.Invoke(func(masterSvc interfaces.NodeMasterService, workerSvc interfaces.NodeWorkerService, modelSvc service2.ModelService) {
t.MasterSvc = masterSvc
t.WorkerSvc = workerSvc
t.ModelSvc = modelSvc
}); err != nil {
return nil, err
}
// default service
t.DefaultSvc, err = service.NewMasterService()
if err != nil {
return nil, err
}
// master and worker for monitor
t.MasterSvcMonitor, err = service.NewMasterService(
service.WithConfigPath(masterNodeMonitorConfigPath),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: masterMonitorHost,
Port: masterMonitorPort,
})),
service.WithMonitorInterval(3*time.Second),
service.WithStopOnError(),
)
if err != nil {
return nil, err
}
t.WorkerSvcMonitor, err = service.NewWorkerService(
service.WithConfigPath(workerNodeMonitorConfigPath),
service.WithAddress(entity.NewAddress(&entity.AddressOptions{
Host: workerMonitorHost,
Port: workerMonitorPort,
})),
service.WithHeartbeatInterval(1*time.Second),
service.WithStopOnError(),
)
if err != nil {
return nil, err
}
// removed all data in db
_ = t.ModelSvc.DropAll()
// visualize dependencies
if err := utils.VisualizeContainer(c); err != nil {
return nil, err
}
return t, nil
}
func (t *Test) Setup(t2 *testing.T) {
if err := t.ModelSvc.DropAll(); err != nil {
panic(err)
}
_ = os.RemoveAll(viper.GetString("metadata"))
t2.Cleanup(t.Cleanup)
}
func (t *Test) Cleanup() {
if err := t.ModelSvc.DropAll(); err != nil {
panic(err)
}
_ = os.RemoveAll(viper.GetString("metadata"))
}
func (t *Test) StartMasterWorker() {
startMasterWorker()
}
func (t *Test) StopMasterWorker() {
stopMasterWorker()
}
func startMasterWorker() {
go T.MasterSvc.Start()
time.Sleep(1 * time.Second)
go T.WorkerSvc.Start()
time.Sleep(1 * time.Second)
}
func stopMasterWorker() {
go T.MasterSvc.Stop()
time.Sleep(1 * time.Second)
go T.WorkerSvc.Stop()
time.Sleep(1 * time.Second)
}
func startMasterWorkerMonitor() {
go T.MasterSvcMonitor.Start()
time.Sleep(1 * time.Second)
go T.WorkerSvcMonitor.Start()
time.Sleep(1 * time.Second)
}
func stopMasterWorkerMonitor() {
go T.MasterSvcMonitor.Stop()
time.Sleep(1 * time.Second)
go T.WorkerSvcMonitor.Stop()
time.Sleep(1 * time.Second)
}

View File

@@ -0,0 +1,67 @@
package test
import (
"github.com/crawlab-team/crawlab/core/constants"
"github.com/stretchr/testify/require"
"testing"
"time"
)
func TestNodeServices_Master_Worker(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
startMasterWorker()
// validate master
masterNodeKey := T.MasterSvc.GetConfigService().GetNodeKey()
masterNode, err := T.ModelSvc.GetNodeByKey(masterNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, masterNode.Status)
require.Equal(t, masterNodeKey, masterNode.Key)
require.True(t, masterNode.IsMaster)
// validate worker
workerNodeKey := T.WorkerSvc.GetConfigService().GetNodeKey()
workerNode, err := T.ModelSvc.GetNodeByKey(workerNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, workerNode.Status)
require.Equal(t, workerNodeKey, workerNode.Key)
require.False(t, workerNode.IsMaster)
stopMasterWorker()
}
func TestNodeServices_Default(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
go T.DefaultSvc.Start()
time.Sleep(1 * time.Second)
// validate default
defaultNodeKey := T.DefaultSvc.GetConfigService().GetNodeKey()
defaultNode, err := T.ModelSvc.GetNodeByKey(defaultNodeKey, nil)
require.Nil(t, err)
require.Equal(t, constants.NodeStatusOnline, defaultNode.Status)
require.Equal(t, defaultNodeKey, defaultNode.Key)
require.True(t, defaultNode.IsMaster)
T.DefaultSvc.Stop()
time.Sleep(1 * time.Second)
}
func TestNodeServices_Monitor(t *testing.T) {
T, _ = NewTest()
T.Setup(t)
startMasterWorkerMonitor()
time.Sleep(3 * time.Second)
// stop worker
T.WorkerSvcMonitor.Stop()
time.Sleep(5 * time.Second)
// validate
require.True(t, T.MasterSvcMonitor.GetServer().IsStopped())
stopMasterWorkerMonitor()
}