Files
crawlab/core/grpc/server/task_service_server.go
2024-11-01 15:19:48 +08:00

323 lines
8.5 KiB
Go

package server
import (
"context"
"encoding/json"
"errors"
"github.com/apex/log"
"github.com/crawlab-team/crawlab/core/constants"
"github.com/crawlab-team/crawlab/core/interfaces"
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
"github.com/crawlab-team/crawlab/core/models/service"
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
"github.com/crawlab-team/crawlab/core/notification"
"github.com/crawlab-team/crawlab/core/task/stats"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/crawlab/db/mongo"
"github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
mongo2 "go.mongodb.org/mongo-driver/mongo"
"io"
"strings"
"sync"
)
var taskServiceMutex = sync.Mutex{}
type TaskServiceServer struct {
grpc.UnimplementedTaskServiceServer
// dependencies
cfgSvc interfaces.NodeConfigService
statsSvc *stats.Service
// internals
subs map[primitive.ObjectID]grpc.TaskService_SubscribeServer
}
func (svr TaskServiceServer) Subscribe(req *grpc.TaskServiceSubscribeRequest, stream grpc.TaskService_SubscribeServer) (err error) {
// task id
taskId, err := primitive.ObjectIDFromHex(req.TaskId)
if err != nil {
return errors.New("invalid task id")
}
// validate stream
if stream == nil {
return errors.New("invalid stream")
}
// add stream
taskServiceMutex.Lock()
svr.subs[taskId] = stream
taskServiceMutex.Unlock()
// create a new goroutine to receive messages from the stream to listen for EOF (end of stream)
go func() {
for {
select {
case <-stream.Context().Done():
taskServiceMutex.Lock()
delete(svr.subs, taskId)
taskServiceMutex.Unlock()
return
default:
err := stream.RecvMsg(nil)
if err == io.EOF {
taskServiceMutex.Lock()
delete(svr.subs, taskId)
taskServiceMutex.Unlock()
return
}
}
}
}()
return nil
}
// Connect to task stream when a task runner in a node starts
func (svr TaskServiceServer) Connect(stream grpc.TaskService_ConnectServer) (err error) {
for {
msg, err := stream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
if strings.HasSuffix(err.Error(), "context canceled") {
return nil
}
trace.PrintError(err)
continue
}
// validate task id
taskId, err := primitive.ObjectIDFromHex(msg.TaskId)
if err != nil {
log.Errorf("invalid task id: %s", msg.TaskId)
continue
}
switch msg.Code {
case grpc.TaskServiceConnectCode_INSERT_DATA:
err = svr.handleInsertData(taskId, msg)
case grpc.TaskServiceConnectCode_INSERT_LOGS:
err = svr.handleInsertLogs(taskId, msg)
default:
err = errors.New("invalid stream message code")
log.Errorf("invalid stream message code: %d", msg.Code)
continue
}
if err != nil {
log.Errorf("grpc error[%d]: %v", msg.Code, err)
}
}
}
// FetchTask tasks to be executed by a task handler
func (svr TaskServiceServer) FetchTask(ctx context.Context, request *grpc.TaskServiceFetchTaskRequest) (response *grpc.TaskServiceFetchTaskResponse, err error) {
nodeKey := request.GetNodeKey()
if nodeKey == "" {
return nil, errors.New("invalid node key")
}
n, err := service.NewModelServiceV2[models2.NodeV2]().GetOne(bson.M{"key": nodeKey}, nil)
if err != nil {
return nil, trace.TraceError(err)
}
var tid primitive.ObjectID
opts := &mongo.FindOptions{
Sort: bson.D{
{"p", 1},
{"_id", 1},
},
Limit: 1,
}
if err := mongo.RunTransactionWithContext(ctx, func(sc mongo2.SessionContext) (err error) {
// get task queue item assigned to this node
tid, err = svr.getTaskQueueItemIdAndDequeue(bson.M{"nid": n.Id}, opts, n.Id)
if err != nil {
return err
}
if !tid.IsZero() {
return nil
}
// get task queue item assigned to any node (random mode)
tid, err = svr.getTaskQueueItemIdAndDequeue(bson.M{"nid": nil}, opts, n.Id)
if !tid.IsZero() {
return nil
}
if err != nil {
return err
}
return nil
}); err != nil {
return nil, err
}
return &grpc.TaskServiceFetchTaskResponse{TaskId: tid.Hex()}, nil
}
func (svr TaskServiceServer) SendNotification(_ context.Context, request *grpc.TaskServiceSendNotificationRequest) (response *grpc.Response, err error) {
if !utils.IsPro() {
return nil, nil
}
// task id
taskId, err := primitive.ObjectIDFromHex(request.TaskId)
if err != nil {
log.Errorf("invalid task id: %s", request.TaskId)
return nil, trace.TraceError(err)
}
// arguments
var args []any
// task
task, err := service.NewModelServiceV2[models2.TaskV2]().GetById(taskId)
if err != nil {
log.Errorf("task not found: %s", request.TaskId)
return nil, trace.TraceError(err)
}
args = append(args, task)
// task stat
taskStat, err := service.NewModelServiceV2[models2.TaskStatV2]().GetById(task.Id)
if err != nil {
log.Errorf("task stat not found for task: %s", request.TaskId)
return nil, trace.TraceError(err)
}
args = append(args, taskStat)
// spider
spider, err := service.NewModelServiceV2[models2.SpiderV2]().GetById(task.SpiderId)
if err != nil {
log.Errorf("spider not found for task: %s", request.TaskId)
return nil, trace.TraceError(err)
}
args = append(args, spider)
// node
node, err := service.NewModelServiceV2[models2.NodeV2]().GetById(task.NodeId)
if err != nil {
return nil, trace.TraceError(err)
}
args = append(args, node)
// schedule
var schedule *models2.ScheduleV2
if !task.ScheduleId.IsZero() {
schedule, err = service.NewModelServiceV2[models2.ScheduleV2]().GetById(task.ScheduleId)
if err != nil {
log.Errorf("schedule not found for task: %s", request.TaskId)
return nil, trace.TraceError(err)
}
args = append(args, schedule)
}
// settings
settings, err := service.NewModelServiceV2[models2.NotificationSettingV2]().GetMany(bson.M{
"enabled": true,
"trigger": bson.M{
"$regex": constants.NotificationTriggerPatternTask,
},
}, nil)
if err != nil {
return nil, trace.TraceError(err)
}
// notification service
svc := notification.GetNotificationServiceV2()
for _, s := range settings {
// compatible with old settings
trigger := s.Trigger
if trigger == "" {
trigger = s.TaskTrigger
}
// send notification
switch trigger {
case constants.NotificationTriggerTaskFinish:
if task.Status != constants.TaskStatusPending && task.Status != constants.TaskStatusRunning {
go svc.Send(&s, args...)
}
case constants.NotificationTriggerTaskError:
if task.Status == constants.TaskStatusError || task.Status == constants.TaskStatusAbnormal {
go svc.Send(&s, args...)
}
case constants.NotificationTriggerTaskEmptyResults:
if task.Status != constants.TaskStatusPending && task.Status != constants.TaskStatusRunning {
if taskStat.ResultCount == 0 {
go svc.Send(&s, args...)
}
}
}
}
return nil, nil
}
func (svr TaskServiceServer) GetSubscribeStream(taskId primitive.ObjectID) (stream grpc.TaskService_SubscribeServer, ok bool) {
taskServiceMutex.Lock()
defer taskServiceMutex.Unlock()
stream, ok = svr.subs[taskId]
return stream, ok
}
func (svr TaskServiceServer) handleInsertData(taskId primitive.ObjectID, msg *grpc.TaskServiceConnectRequest) (err error) {
var records []map[string]interface{}
err = json.Unmarshal(msg.Data, &records)
if err != nil {
return trace.TraceError(err)
}
return svr.statsSvc.InsertData(taskId, records...)
}
func (svr TaskServiceServer) handleInsertLogs(taskId primitive.ObjectID, msg *grpc.TaskServiceConnectRequest) (err error) {
var logs []string
err = json.Unmarshal(msg.Data, &logs)
if err != nil {
return trace.TraceError(err)
}
return svr.statsSvc.InsertLogs(taskId, logs...)
}
func (svr TaskServiceServer) getTaskQueueItemIdAndDequeue(query bson.M, opts *mongo.FindOptions, nid primitive.ObjectID) (tid primitive.ObjectID, err error) {
tq, err := service.NewModelServiceV2[models2.TaskQueueItemV2]().GetOne(query, opts)
if err != nil {
if errors.Is(err, mongo2.ErrNoDocuments) {
return tid, nil
}
return tid, trace.TraceError(err)
}
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(tq.Id)
if err == nil {
t.NodeId = nid
err = service.NewModelServiceV2[models2.TaskV2]().ReplaceById(t.Id, *t)
if err != nil {
return tid, trace.TraceError(err)
}
}
err = service.NewModelServiceV2[models2.TaskQueueItemV2]().DeleteById(tq.Id)
if err != nil {
return tid, trace.TraceError(err)
}
return tq.Id, nil
}
func NewTaskServiceServer() (res *TaskServiceServer, err error) {
// task server
svr := &TaskServiceServer{}
svr.cfgSvc = nodeconfig.GetNodeConfigService()
svr.statsSvc, err = stats.GetTaskStatsServiceV2()
if err != nil {
return nil, err
}
return svr, nil
}