From 65aeb3ed8c6981ffd4606e1a6c73fefdbda73ded Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 16 Aug 2025 17:19:21 +0800 Subject: [PATCH 1/2] feat: add PING mechanism for connection health checks; update proto and generated files - Introduced PING code in TaskServiceConnectCode enum for health checks. - Updated Runner to use proper PING messages instead of fake log messages for connection health checks. - Modified TaskServiceServer to handle PING requests and acknowledge them. - Adjusted generated gRPC files to reflect changes in proto definitions and ensure compatibility. --- core/grpc/server/task_service_server.go | 4 + core/task/handler/runner.go | 17 +- grpc/proto/services/task_service.proto | 1 + grpc/task_service.pb.go | 241 +++++++++++++++++------- grpc/task_service_grpc.pb.go | 132 +++++++++---- 5 files changed, 275 insertions(+), 120 deletions(-) diff --git a/core/grpc/server/task_service_server.go b/core/grpc/server/task_service_server.go index 2ad3d319..30658fa6 100644 --- a/core/grpc/server/task_service_server.go +++ b/core/grpc/server/task_service_server.go @@ -188,6 +188,10 @@ func (svr TaskServiceServer) Connect(stream grpc.TaskService_ConnectServer) (err case grpc.TaskServiceConnectCode_INSERT_LOGS: // handle task log insertion err = svr.handleInsertLogs(taskId, msg) + case grpc.TaskServiceConnectCode_PING: + // handle connection health check ping - no action needed, just acknowledge + svr.Debugf("received ping from task[%s]", taskId.Hex()) + err = nil default: // invalid message code received svr.Errorf("invalid stream message code: %d", msg.Code) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index a741db34..a9520ee1 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -595,18 +595,18 @@ func (r *Runner) isConnectionHealthy() bool { default: } - // Try to send a ping-like message to test connection with timeout - // Use a simple log message as ping since PING code doesn't exist - testMsg := &grpc.TaskServiceConnectRequest{ - Code: grpc.TaskServiceConnectCode_INSERT_LOGS, + // FIXED: Use proper PING mechanism instead of fake log messages + // This prevents health check messages from polluting the actual log stream + pingMsg := &grpc.TaskServiceConnectRequest{ + Code: grpc.TaskServiceConnectCode_PING, TaskId: r.tid.Hex(), - Data: []byte(`["[HEALTH CHECK] connection test"]`), + Data: nil, // No data needed for ping } // Use a channel to make the Send operation timeout-aware done := make(chan error, 1) go func() { - done <- r.conn.Send(testMsg) + done <- r.conn.Send(pingMsg) }() // Wait for either completion or timeout @@ -616,6 +616,7 @@ func (r *Runner) isConnectionHealthy() bool { r.Debugf("connection health check failed: %v", err) return false } + r.Debugf("connection health check successful") return true case <-time.After(5 * time.Second): r.Debugf("connection health check timed out") @@ -731,12 +732,12 @@ func (r *Runner) sendNotification() { r.Errorf("failed to get task client: %v", err) return } - + // Use independent context for async notification - prevents cancellation due to task lifecycle // This ensures notifications are sent even if the task runner is being cleaned up ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - + _, err = taskClient.SendNotification(ctx, req) if err != nil { if !errors.Is(ctx.Err(), context.DeadlineExceeded) { diff --git a/grpc/proto/services/task_service.proto b/grpc/proto/services/task_service.proto index ed7efde1..2c077ed4 100644 --- a/grpc/proto/services/task_service.proto +++ b/grpc/proto/services/task_service.proto @@ -22,6 +22,7 @@ message TaskServiceSubscribeResponse { enum TaskServiceConnectCode { INSERT_DATA = 0; INSERT_LOGS = 1; + PING = 2; } message TaskServiceConnectRequest { diff --git a/grpc/task_service.pb.go b/grpc/task_service.pb.go index c3f7fd38..3639c132 100644 --- a/grpc/task_service.pb.go +++ b/grpc/task_service.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.36.1 -// protoc v5.29.2 +// protoc-gen-go v1.34.2 +// protoc v5.27.2 // source: services/task_service.proto package grpc @@ -68,6 +68,7 @@ type TaskServiceConnectCode int32 const ( TaskServiceConnectCode_INSERT_DATA TaskServiceConnectCode = 0 TaskServiceConnectCode_INSERT_LOGS TaskServiceConnectCode = 1 + TaskServiceConnectCode_PING TaskServiceConnectCode = 2 ) // Enum value maps for TaskServiceConnectCode. @@ -75,10 +76,12 @@ var ( TaskServiceConnectCode_name = map[int32]string{ 0: "INSERT_DATA", 1: "INSERT_LOGS", + 2: "PING", } TaskServiceConnectCode_value = map[string]int32{ "INSERT_DATA": 0, "INSERT_LOGS": 1, + "PING": 2, } ) @@ -110,17 +113,20 @@ func (TaskServiceConnectCode) EnumDescriptor() ([]byte, []int) { } type TaskServiceSubscribeRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - TaskId string `protobuf:"bytes,1,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + TaskId string `protobuf:"bytes,1,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` } func (x *TaskServiceSubscribeRequest) Reset() { *x = TaskServiceSubscribeRequest{} - mi := &file_services_task_service_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceSubscribeRequest) String() string { @@ -131,7 +137,7 @@ func (*TaskServiceSubscribeRequest) ProtoMessage() {} func (x *TaskServiceSubscribeRequest) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[0] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -154,19 +160,22 @@ func (x *TaskServiceSubscribeRequest) GetTaskId() string { } type TaskServiceSubscribeResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - Code TaskServiceSubscribeCode `protobuf:"varint,1,opt,name=code,proto3,enum=grpc.TaskServiceSubscribeCode" json:"code,omitempty"` - TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` - Force bool `protobuf:"varint,3,opt,name=force,proto3" json:"force,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Code TaskServiceSubscribeCode `protobuf:"varint,1,opt,name=code,proto3,enum=grpc.TaskServiceSubscribeCode" json:"code,omitempty"` + TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` + Force bool `protobuf:"varint,3,opt,name=force,proto3" json:"force,omitempty"` } func (x *TaskServiceSubscribeResponse) Reset() { *x = TaskServiceSubscribeResponse{} - mi := &file_services_task_service_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceSubscribeResponse) String() string { @@ -177,7 +186,7 @@ func (*TaskServiceSubscribeResponse) ProtoMessage() {} func (x *TaskServiceSubscribeResponse) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[1] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -214,19 +223,22 @@ func (x *TaskServiceSubscribeResponse) GetForce() bool { } type TaskServiceConnectRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - Code TaskServiceConnectCode `protobuf:"varint,1,opt,name=code,proto3,enum=grpc.TaskServiceConnectCode" json:"code,omitempty"` - TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` - Data []byte `protobuf:"bytes,3,opt,name=data,proto3" json:"data,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Code TaskServiceConnectCode `protobuf:"varint,1,opt,name=code,proto3,enum=grpc.TaskServiceConnectCode" json:"code,omitempty"` + TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` + Data []byte `protobuf:"bytes,3,opt,name=data,proto3" json:"data,omitempty"` } func (x *TaskServiceConnectRequest) Reset() { *x = TaskServiceConnectRequest{} - mi := &file_services_task_service_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceConnectRequest) String() string { @@ -237,7 +249,7 @@ func (*TaskServiceConnectRequest) ProtoMessage() {} func (x *TaskServiceConnectRequest) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[2] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -274,17 +286,20 @@ func (x *TaskServiceConnectRequest) GetData() []byte { } type TaskServiceFetchTaskRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - NodeKey string `protobuf:"bytes,1,opt,name=node_key,json=nodeKey,proto3" json:"node_key,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + NodeKey string `protobuf:"bytes,1,opt,name=node_key,json=nodeKey,proto3" json:"node_key,omitempty"` } func (x *TaskServiceFetchTaskRequest) Reset() { *x = TaskServiceFetchTaskRequest{} - mi := &file_services_task_service_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceFetchTaskRequest) String() string { @@ -295,7 +310,7 @@ func (*TaskServiceFetchTaskRequest) ProtoMessage() {} func (x *TaskServiceFetchTaskRequest) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[3] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -318,17 +333,20 @@ func (x *TaskServiceFetchTaskRequest) GetNodeKey() string { } type TaskServiceFetchTaskResponse struct { - state protoimpl.MessageState `protogen:"open.v1"` - TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` } func (x *TaskServiceFetchTaskResponse) Reset() { *x = TaskServiceFetchTaskResponse{} - mi := &file_services_task_service_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceFetchTaskResponse) String() string { @@ -339,7 +357,7 @@ func (*TaskServiceFetchTaskResponse) ProtoMessage() {} func (x *TaskServiceFetchTaskResponse) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[4] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -362,18 +380,21 @@ func (x *TaskServiceFetchTaskResponse) GetTaskId() string { } type TaskServiceSendNotificationRequest struct { - state protoimpl.MessageState `protogen:"open.v1"` - NodeKey string `protobuf:"bytes,1,opt,name=node_key,json=nodeKey,proto3" json:"node_key,omitempty"` - TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` - unknownFields protoimpl.UnknownFields + state protoimpl.MessageState sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + NodeKey string `protobuf:"bytes,1,opt,name=node_key,json=nodeKey,proto3" json:"node_key,omitempty"` + TaskId string `protobuf:"bytes,2,opt,name=task_id,json=taskId,proto3" json:"task_id,omitempty"` } func (x *TaskServiceSendNotificationRequest) Reset() { *x = TaskServiceSendNotificationRequest{} - mi := &file_services_task_service_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) + if protoimpl.UnsafeEnabled { + mi := &file_services_task_service_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } } func (x *TaskServiceSendNotificationRequest) String() string { @@ -384,7 +405,7 @@ func (*TaskServiceSendNotificationRequest) ProtoMessage() {} func (x *TaskServiceSendNotificationRequest) ProtoReflect() protoreflect.Message { mi := &file_services_task_service_proto_msgTypes[5] - if x != nil { + if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -454,33 +475,33 @@ var file_services_task_service_proto_rawDesc = []byte{ 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x61, 0x73, 0x6b, 0x49, 0x64, 0x2a, 0x26, 0x0a, 0x18, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x43, - 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x10, 0x00, 0x2a, 0x3a, 0x0a, 0x16, 0x54, 0x61, 0x73, 0x6b, 0x53, + 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x10, 0x00, 0x2a, 0x44, 0x0a, 0x16, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x43, 0x6f, 0x64, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x49, 0x4e, 0x53, 0x45, 0x52, 0x54, 0x5f, 0x44, 0x41, 0x54, 0x41, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x49, 0x4e, 0x53, 0x45, 0x52, 0x54, 0x5f, 0x4c, 0x4f, 0x47, - 0x53, 0x10, 0x01, 0x32, 0xcb, 0x02, 0x0a, 0x0b, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, - 0x69, 0x63, 0x65, 0x12, 0x56, 0x0a, 0x09, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, - 0x12, 0x21, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, - 0x69, 0x63, 0x65, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, - 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x30, 0x01, 0x12, 0x3e, 0x0a, 0x07, 0x43, - 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x12, 0x1f, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, - 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0e, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x52, - 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x28, 0x01, 0x12, 0x54, 0x0a, 0x09, 0x46, - 0x65, 0x74, 0x63, 0x68, 0x54, 0x61, 0x73, 0x6b, 0x12, 0x21, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, - 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x46, 0x65, 0x74, 0x63, 0x68, - 0x54, 0x61, 0x73, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x67, 0x72, - 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x46, 0x65, - 0x74, 0x63, 0x68, 0x54, 0x61, 0x73, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x12, 0x4e, 0x0a, 0x10, 0x53, 0x65, 0x6e, 0x64, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x69, 0x63, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x28, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, - 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x65, 0x6e, 0x64, 0x4e, 0x6f, 0x74, 0x69, - 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, - 0x0e, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, - 0x00, 0x42, 0x08, 0x5a, 0x06, 0x2e, 0x3b, 0x67, 0x72, 0x70, 0x63, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x53, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x50, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x32, 0xcb, 0x02, + 0x0a, 0x0b, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x56, 0x0a, + 0x09, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x12, 0x21, 0x2e, 0x67, 0x72, 0x70, + 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x53, 0x75, 0x62, + 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, + 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x30, 0x01, 0x12, 0x3e, 0x0a, 0x07, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, + 0x12, 0x1f, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, + 0x69, 0x63, 0x65, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, + 0x74, 0x1a, 0x0e, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x28, 0x01, 0x12, 0x54, 0x0a, 0x09, 0x46, 0x65, 0x74, 0x63, 0x68, 0x54, 0x61, + 0x73, 0x6b, 0x12, 0x21, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x46, 0x65, 0x74, 0x63, 0x68, 0x54, 0x61, 0x73, 0x6b, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x22, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, + 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x46, 0x65, 0x74, 0x63, 0x68, 0x54, 0x61, 0x73, + 0x6b, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x4e, 0x0a, 0x10, 0x53, + 0x65, 0x6e, 0x64, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, + 0x28, 0x2e, 0x67, 0x72, 0x70, 0x63, 0x2e, 0x54, 0x61, 0x73, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x53, 0x65, 0x6e, 0x64, 0x4e, 0x6f, 0x74, 0x69, 0x66, 0x69, 0x63, 0x61, 0x74, 0x69, + 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x0e, 0x2e, 0x67, 0x72, 0x70, 0x63, + 0x2e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x42, 0x08, 0x5a, 0x06, 0x2e, + 0x3b, 0x67, 0x72, 0x70, 0x63, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -532,6 +553,80 @@ func file_services_task_service_proto_init() { return } file_entity_response_proto_init() + if !protoimpl.UnsafeEnabled { + file_services_task_service_proto_msgTypes[0].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceSubscribeRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_services_task_service_proto_msgTypes[1].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceSubscribeResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_services_task_service_proto_msgTypes[2].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceConnectRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_services_task_service_proto_msgTypes[3].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceFetchTaskRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_services_task_service_proto_msgTypes[4].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceFetchTaskResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_services_task_service_proto_msgTypes[5].Exporter = func(v any, i int) any { + switch v := v.(*TaskServiceSendNotificationRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/grpc/task_service_grpc.pb.go b/grpc/task_service_grpc.pb.go index cdcb7c87..caeca53b 100644 --- a/grpc/task_service_grpc.pb.go +++ b/grpc/task_service_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.5.1 -// - protoc v5.29.2 +// - protoc-gen-go-grpc v1.4.0 +// - protoc v5.27.2 // source: services/task_service.proto package grpc @@ -15,8 +15,8 @@ import ( // This is a compile-time assertion to ensure that this generated file // is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.64.0 or later. -const _ = grpc.SupportPackageIsVersion9 +// Requires gRPC-Go v1.62.0 or later. +const _ = grpc.SupportPackageIsVersion8 const ( TaskService_Subscribe_FullMethodName = "/grpc.TaskService/Subscribe" @@ -29,8 +29,8 @@ const ( // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. type TaskServiceClient interface { - Subscribe(ctx context.Context, in *TaskServiceSubscribeRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[TaskServiceSubscribeResponse], error) - Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[TaskServiceConnectRequest, Response], error) + Subscribe(ctx context.Context, in *TaskServiceSubscribeRequest, opts ...grpc.CallOption) (TaskService_SubscribeClient, error) + Connect(ctx context.Context, opts ...grpc.CallOption) (TaskService_ConnectClient, error) FetchTask(ctx context.Context, in *TaskServiceFetchTaskRequest, opts ...grpc.CallOption) (*TaskServiceFetchTaskResponse, error) SendNotification(ctx context.Context, in *TaskServiceSendNotificationRequest, opts ...grpc.CallOption) (*Response, error) } @@ -43,13 +43,13 @@ func NewTaskServiceClient(cc grpc.ClientConnInterface) TaskServiceClient { return &taskServiceClient{cc} } -func (c *taskServiceClient) Subscribe(ctx context.Context, in *TaskServiceSubscribeRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[TaskServiceSubscribeResponse], error) { +func (c *taskServiceClient) Subscribe(ctx context.Context, in *TaskServiceSubscribeRequest, opts ...grpc.CallOption) (TaskService_SubscribeClient, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) stream, err := c.cc.NewStream(ctx, &TaskService_ServiceDesc.Streams[0], TaskService_Subscribe_FullMethodName, cOpts...) if err != nil { return nil, err } - x := &grpc.GenericClientStream[TaskServiceSubscribeRequest, TaskServiceSubscribeResponse]{ClientStream: stream} + x := &taskServiceSubscribeClient{ClientStream: stream} if err := x.ClientStream.SendMsg(in); err != nil { return nil, err } @@ -59,21 +59,57 @@ func (c *taskServiceClient) Subscribe(ctx context.Context, in *TaskServiceSubscr return x, nil } -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type TaskService_SubscribeClient = grpc.ServerStreamingClient[TaskServiceSubscribeResponse] +type TaskService_SubscribeClient interface { + Recv() (*TaskServiceSubscribeResponse, error) + grpc.ClientStream +} -func (c *taskServiceClient) Connect(ctx context.Context, opts ...grpc.CallOption) (grpc.ClientStreamingClient[TaskServiceConnectRequest, Response], error) { +type taskServiceSubscribeClient struct { + grpc.ClientStream +} + +func (x *taskServiceSubscribeClient) Recv() (*TaskServiceSubscribeResponse, error) { + m := new(TaskServiceSubscribeResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *taskServiceClient) Connect(ctx context.Context, opts ...grpc.CallOption) (TaskService_ConnectClient, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) stream, err := c.cc.NewStream(ctx, &TaskService_ServiceDesc.Streams[1], TaskService_Connect_FullMethodName, cOpts...) if err != nil { return nil, err } - x := &grpc.GenericClientStream[TaskServiceConnectRequest, Response]{ClientStream: stream} + x := &taskServiceConnectClient{ClientStream: stream} return x, nil } -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type TaskService_ConnectClient = grpc.ClientStreamingClient[TaskServiceConnectRequest, Response] +type TaskService_ConnectClient interface { + Send(*TaskServiceConnectRequest) error + CloseAndRecv() (*Response, error) + grpc.ClientStream +} + +type taskServiceConnectClient struct { + grpc.ClientStream +} + +func (x *taskServiceConnectClient) Send(m *TaskServiceConnectRequest) error { + return x.ClientStream.SendMsg(m) +} + +func (x *taskServiceConnectClient) CloseAndRecv() (*Response, error) { + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + m := new(Response) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} func (c *taskServiceClient) FetchTask(ctx context.Context, in *TaskServiceFetchTaskRequest, opts ...grpc.CallOption) (*TaskServiceFetchTaskResponse, error) { cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) @@ -97,26 +133,23 @@ func (c *taskServiceClient) SendNotification(ctx context.Context, in *TaskServic // TaskServiceServer is the server API for TaskService service. // All implementations must embed UnimplementedTaskServiceServer -// for forward compatibility. +// for forward compatibility type TaskServiceServer interface { - Subscribe(*TaskServiceSubscribeRequest, grpc.ServerStreamingServer[TaskServiceSubscribeResponse]) error - Connect(grpc.ClientStreamingServer[TaskServiceConnectRequest, Response]) error + Subscribe(*TaskServiceSubscribeRequest, TaskService_SubscribeServer) error + Connect(TaskService_ConnectServer) error FetchTask(context.Context, *TaskServiceFetchTaskRequest) (*TaskServiceFetchTaskResponse, error) SendNotification(context.Context, *TaskServiceSendNotificationRequest) (*Response, error) mustEmbedUnimplementedTaskServiceServer() } -// UnimplementedTaskServiceServer must be embedded to have -// forward compatible implementations. -// -// NOTE: this should be embedded by value instead of pointer to avoid a nil -// pointer dereference when methods are called. -type UnimplementedTaskServiceServer struct{} +// UnimplementedTaskServiceServer must be embedded to have forward compatible implementations. +type UnimplementedTaskServiceServer struct { +} -func (UnimplementedTaskServiceServer) Subscribe(*TaskServiceSubscribeRequest, grpc.ServerStreamingServer[TaskServiceSubscribeResponse]) error { +func (UnimplementedTaskServiceServer) Subscribe(*TaskServiceSubscribeRequest, TaskService_SubscribeServer) error { return status.Errorf(codes.Unimplemented, "method Subscribe not implemented") } -func (UnimplementedTaskServiceServer) Connect(grpc.ClientStreamingServer[TaskServiceConnectRequest, Response]) error { +func (UnimplementedTaskServiceServer) Connect(TaskService_ConnectServer) error { return status.Errorf(codes.Unimplemented, "method Connect not implemented") } func (UnimplementedTaskServiceServer) FetchTask(context.Context, *TaskServiceFetchTaskRequest) (*TaskServiceFetchTaskResponse, error) { @@ -126,7 +159,6 @@ func (UnimplementedTaskServiceServer) SendNotification(context.Context, *TaskSer return nil, status.Errorf(codes.Unimplemented, "method SendNotification not implemented") } func (UnimplementedTaskServiceServer) mustEmbedUnimplementedTaskServiceServer() {} -func (UnimplementedTaskServiceServer) testEmbeddedByValue() {} // UnsafeTaskServiceServer may be embedded to opt out of forward compatibility for this service. // Use of this interface is not recommended, as added methods to TaskServiceServer will @@ -136,13 +168,6 @@ type UnsafeTaskServiceServer interface { } func RegisterTaskServiceServer(s grpc.ServiceRegistrar, srv TaskServiceServer) { - // If the following call pancis, it indicates UnimplementedTaskServiceServer was - // embedded by pointer and is nil. This will cause panics if an - // unimplemented method is ever invoked, so we test this at initialization - // time to prevent it from happening at runtime later due to I/O. - if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { - t.testEmbeddedByValue() - } s.RegisterService(&TaskService_ServiceDesc, srv) } @@ -151,18 +176,47 @@ func _TaskService_Subscribe_Handler(srv interface{}, stream grpc.ServerStream) e if err := stream.RecvMsg(m); err != nil { return err } - return srv.(TaskServiceServer).Subscribe(m, &grpc.GenericServerStream[TaskServiceSubscribeRequest, TaskServiceSubscribeResponse]{ServerStream: stream}) + return srv.(TaskServiceServer).Subscribe(m, &taskServiceSubscribeServer{ServerStream: stream}) } -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type TaskService_SubscribeServer = grpc.ServerStreamingServer[TaskServiceSubscribeResponse] +type TaskService_SubscribeServer interface { + Send(*TaskServiceSubscribeResponse) error + grpc.ServerStream +} + +type taskServiceSubscribeServer struct { + grpc.ServerStream +} + +func (x *taskServiceSubscribeServer) Send(m *TaskServiceSubscribeResponse) error { + return x.ServerStream.SendMsg(m) +} func _TaskService_Connect_Handler(srv interface{}, stream grpc.ServerStream) error { - return srv.(TaskServiceServer).Connect(&grpc.GenericServerStream[TaskServiceConnectRequest, Response]{ServerStream: stream}) + return srv.(TaskServiceServer).Connect(&taskServiceConnectServer{ServerStream: stream}) } -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type TaskService_ConnectServer = grpc.ClientStreamingServer[TaskServiceConnectRequest, Response] +type TaskService_ConnectServer interface { + SendAndClose(*Response) error + Recv() (*TaskServiceConnectRequest, error) + grpc.ServerStream +} + +type taskServiceConnectServer struct { + grpc.ServerStream +} + +func (x *taskServiceConnectServer) SendAndClose(m *Response) error { + return x.ServerStream.SendMsg(m) +} + +func (x *taskServiceConnectServer) Recv() (*TaskServiceConnectRequest, error) { + m := new(TaskServiceConnectRequest) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} func _TaskService_FetchTask_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { in := new(TaskServiceFetchTaskRequest) From 3edd2a1210fe4bf3dc792839344cd89e361de20c Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sat, 16 Aug 2025 17:42:07 +0800 Subject: [PATCH 2/2] refactor: optimize connection health checks to reduce log stream interference; adjust health check intervals and implement non-blocking pings --- core/task/handler/runner.go | 47 ++++++++++++++++++++++++--------- core/task/handler/runner_ipc.go | 3 +++ core/task/handler/runner_log.go | 3 +++ 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/core/task/handler/runner.go b/core/task/handler/runner.go index a9520ee1..7a749a4d 100644 --- a/core/task/handler/runner.go +++ b/core/task/handler/runner.go @@ -51,7 +51,7 @@ func newTaskRunner(id primitive.ObjectID, svc *Service) (r *Runner, err error) { connRetryDelay: 10 * time.Second, ipcTimeout: 60 * time.Second, // generous timeout for all tasks healthCheckInterval: 5 * time.Second, // check process every 5 seconds - connHealthInterval: 60 * time.Second, // check connection health every minute + connHealthInterval: 5 * time.Minute, // reduced frequency to minimize stream interference } // multi error @@ -579,11 +579,13 @@ func (r *Runner) monitorConnectionHealth() { } // isConnectionHealthy checks if the gRPC connection is still healthy +// Uses a non-blocking approach to prevent interfering with log streams func (r *Runner) isConnectionHealthy() bool { r.connMutex.RLock() - defer r.connMutex.RUnlock() + conn := r.conn + r.connMutex.RUnlock() - if r.conn == nil { + if conn == nil { return false } @@ -595,21 +597,42 @@ func (r *Runner) isConnectionHealthy() bool { default: } - // FIXED: Use proper PING mechanism instead of fake log messages - // This prevents health check messages from polluting the actual log stream + // FIXED: Use a completely non-blocking approach to prevent stream interference + // Instead of sending data that could block the log stream, just check connection state + // and use timing-based health assessment + + // Check if we've had recent successful operations + timeSinceLastCheck := time.Since(r.lastConnCheck) + + // If we haven't checked recently, consider it healthy if not too old + // This prevents health checks from interfering with active log streaming + if timeSinceLastCheck < 2*time.Minute { + r.Debugf("connection considered healthy based on recent activity") + return true + } + + // For older connections, try a non-blocking ping only if no active log streaming + // This is a compromise to avoid blocking the critical log data flow pingMsg := &grpc.TaskServiceConnectRequest{ Code: grpc.TaskServiceConnectCode_PING, TaskId: r.tid.Hex(), - Data: nil, // No data needed for ping + Data: nil, } - // Use a channel to make the Send operation timeout-aware + // Use a very short timeout and non-blocking approach done := make(chan error, 1) go func() { - done <- r.conn.Send(pingMsg) + // Re-acquire lock only for the send operation + r.connMutex.RLock() + defer r.connMutex.RUnlock() + if r.conn != nil { + done <- r.conn.Send(pingMsg) + } else { + done <- fmt.Errorf("connection is nil") + } }() - // Wait for either completion or timeout + // Very short timeout to prevent blocking log operations select { case err := <-done: if err != nil { @@ -618,9 +641,9 @@ func (r *Runner) isConnectionHealthy() bool { } r.Debugf("connection health check successful") return true - case <-time.After(5 * time.Second): - r.Debugf("connection health check timed out") - return false + case <-time.After(1 * time.Second): // Much shorter timeout + r.Debugf("connection health check timed out quickly - assume healthy to avoid blocking logs") + return true // Assume healthy to avoid disrupting log flow case <-r.ctx.Done(): r.Debugf("connection health check cancelled") return false diff --git a/core/task/handler/runner_ipc.go b/core/task/handler/runner_ipc.go index 12920fcc..a61d50d1 100644 --- a/core/task/handler/runner_ipc.go +++ b/core/task/handler/runner_ipc.go @@ -157,6 +157,9 @@ func (r *Runner) handleIPCInsertDataMessage(ipcMsg entity.IPCMessage) { } return } + + // Update last successful connection time to help health check avoid unnecessary pings + r.lastConnCheck = time.Now() } } diff --git a/core/task/handler/runner_log.go b/core/task/handler/runner_log.go index 918fa82f..73c516e6 100644 --- a/core/task/handler/runner_log.go +++ b/core/task/handler/runner_log.go @@ -53,6 +53,9 @@ func (r *Runner) writeLogLines(lines []string) { } return } + + // Update last successful connection time to help health check avoid unnecessary pings + r.lastConnCheck = time.Now() } // logInternally sends internal runner logs to the same logging system as the task