mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
feat: add PING mechanism for connection health checks; update proto and generated files
- Introduced PING code in TaskServiceConnectCode enum for health checks. - Updated Runner to use proper PING messages instead of fake log messages for connection health checks. - Modified TaskServiceServer to handle PING requests and acknowledge them. - Adjusted generated gRPC files to reflect changes in proto definitions and ensure compatibility.
This commit is contained in:
@@ -188,6 +188,10 @@ func (svr TaskServiceServer) Connect(stream grpc.TaskService_ConnectServer) (err
|
||||
case grpc.TaskServiceConnectCode_INSERT_LOGS:
|
||||
// handle task log insertion
|
||||
err = svr.handleInsertLogs(taskId, msg)
|
||||
case grpc.TaskServiceConnectCode_PING:
|
||||
// handle connection health check ping - no action needed, just acknowledge
|
||||
svr.Debugf("received ping from task[%s]", taskId.Hex())
|
||||
err = nil
|
||||
default:
|
||||
// invalid message code received
|
||||
svr.Errorf("invalid stream message code: %d", msg.Code)
|
||||
|
||||
@@ -595,18 +595,18 @@ func (r *Runner) isConnectionHealthy() bool {
|
||||
default:
|
||||
}
|
||||
|
||||
// Try to send a ping-like message to test connection with timeout
|
||||
// Use a simple log message as ping since PING code doesn't exist
|
||||
testMsg := &grpc.TaskServiceConnectRequest{
|
||||
Code: grpc.TaskServiceConnectCode_INSERT_LOGS,
|
||||
// FIXED: Use proper PING mechanism instead of fake log messages
|
||||
// This prevents health check messages from polluting the actual log stream
|
||||
pingMsg := &grpc.TaskServiceConnectRequest{
|
||||
Code: grpc.TaskServiceConnectCode_PING,
|
||||
TaskId: r.tid.Hex(),
|
||||
Data: []byte(`["[HEALTH CHECK] connection test"]`),
|
||||
Data: nil, // No data needed for ping
|
||||
}
|
||||
|
||||
// Use a channel to make the Send operation timeout-aware
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- r.conn.Send(testMsg)
|
||||
done <- r.conn.Send(pingMsg)
|
||||
}()
|
||||
|
||||
// Wait for either completion or timeout
|
||||
@@ -616,6 +616,7 @@ func (r *Runner) isConnectionHealthy() bool {
|
||||
r.Debugf("connection health check failed: %v", err)
|
||||
return false
|
||||
}
|
||||
r.Debugf("connection health check successful")
|
||||
return true
|
||||
case <-time.After(5 * time.Second):
|
||||
r.Debugf("connection health check timed out")
|
||||
@@ -731,12 +732,12 @@ func (r *Runner) sendNotification() {
|
||||
r.Errorf("failed to get task client: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Use independent context for async notification - prevents cancellation due to task lifecycle
|
||||
// This ensures notifications are sent even if the task runner is being cleaned up
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
|
||||
_, err = taskClient.SendNotification(ctx, req)
|
||||
if err != nil {
|
||||
if !errors.Is(ctx.Err(), context.DeadlineExceeded) {
|
||||
|
||||
Reference in New Issue
Block a user