Hostname: `frontend-us-northeast-3-vm4w` InstanceId: `us-northeast#us-east4-c.Iqb8nNAA` ```SQL DECLARE hostName STRING DEFAULT "frontend-us-northeast-3-vm4w"; DECLARE lookbackDays INT64 DEFAULT 41; DECLARE start_time TIMESTAMP DEFAULT TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -lookbackDays DAY); ----------------------------------------------------------------- -- Step 1: Find the most recent log message indicating draining connections for a specific host WITH LatestDrainLog AS ( SELECT Message FROM `phenix-pcast.pcast_logs_us.syslog` WHERE Timestamp > start_time AND Facility = 'platform' AND HostName = hostName AND Message LIKE 'Websocket connectionids preventing drain%' ORDER BY Timestamp DESC LIMIT 1 -- Step 2: Extract all connection IDs from that single log message DrainingConnectionIds AS ( SELECT connectionId FROM LatestDrainLog, UNNEST(REGEXP_EXTRACT_ALL(Message, r"'([^']*)'")) AS connectionId ) ), -- Step 3: Find all logs that associate sessions with connections SessionConnections AS ( Timestamp SELECT REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') AS sessionId, REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') AS connectionId, FROM `phenix-pcast.pcast_logs_us.syslog` WHERE Timestamp > start_time AND Facility = 'platform' AND Message LIKE '%Session started with connection%' AND REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') IS NOT NULL AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') IS NOT NULL ), -- Pattern 2: "Session [sessionId] has a new connection [connectionId], previously [oldConnectionId]" SessionNewConnections AS ( SELECT Timestamp REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') AS sessionId, REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') AS connectionId FROM `phenix-pcast.pcast_logs_us.syslog` WHERE Timestamp > start_time AND Facility = 'platform' AND Message LIKE '%Session%has a new connection%' AND REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') IS NOT NULL AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') IS NOT NULL AllSessionConnections AS ( SELECT Timestamp, sessionId, connectionId FROM SessionConnections UNION DISTINCT SELECT Timestamp, sessionId, connectionId FROM SessionNewConnections ) ) SELECT * FROM AllSessionConnections ORDER BY Timestmap ``` ------ ```SQL DECLARE TargetHostName STRING DEFAULT "frontend-us-northeast-3-vm4w"; DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA"; DECLARE TargetConnectionId STRING DEFAULT "us-central#ZQiUCdymrZHbmeF12NhUZQ8xZZXxviWD"; ------------ With ConnectionIdsPreventingDrain AS ( SELECT Message FROM `phenix-pcast.pcast_logs_us.syslog` WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 MINUTE) AND Facility = 'platform' AND HostName = hostName AND Message LIKE 'Websocket connectionids preventing drain%' ORDER BY Timestamp DESC LIMIT 1 ) ``` ```SQL DECLARE HostName STRING DEFAULT "frontend-us-northeast-3-vm4w"; CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS ( COALESCE( ( SELECT CAST( JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64 ) FROM UNNEST( JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load')) ) AS metric WHERE JSON_EXTRACT_SCALAR(metric, '$.name') = metricName LIMIT 1 ), 0 ) ); -- WITH LatestInstanceMetricForHost AS ( SELECT Timestamp, Status, (GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 ) AS UptimeHours, (GET_METRIC_VALUE(Status, 'status/seconds') / 3600 ) AS DrainingHours FROM `phenix-pcast.pcast.InstanceMetrics` WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE) AND Hostname = Hostname QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1 ORDER BY Timestamp DESC LIMIT 1 ``` ```SQL DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA"; CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS ( COALESCE( ( SELECT CAST( JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64 ) FROM UNNEST( JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load')) ) AS metric WHERE JSON_EXTRACT_SCALAR(metric, '$.name') = metricName LIMIT 1 ), 0 ) ); SELECT Timestamp, Status, InstanceId, HostName, Health, HealthAlert, FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 )) AS UptimeHours, FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'status/seconds') / 3600 )) AS DrainingHours, GET_METRIC_VALUE(Status, 'connections/open') AS connectionsOpen, GET_METRIC_VALUE(Status, 'clients') AS clients, GET_METRIC_VALUE(Status, 'clients/subscriptions') AS clientsSubscriptions, GET_METRIC_VALUE(Status, 'clients/replay/events') AS clientsReplayEvents, GET_METRIC_VALUE(Status, 'mq/incoming/pending') AS mqIncomingPending, GET_METRIC_VALUE(Status, 'mq/outgoing/pending') AS mqOutgoingPending, GET_METRIC_VALUE(Status, 'mq/incoming/rate') AS mqIncomingRate, FROM `phenix-pcast.pcast.InstanceMetrics` WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE) AND InstanceId = TargetInstanceId QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1 ORDER BY Timestamp DESC LIMIT 1 ``` Using ```SQL SELECT Timestamp, Category, Severity, Message, HostName, Region, Zone, FROM `phenix-pcast.pcast_logs_us.syslog` WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -90 DAY) AND Facility = 'platform' AND Service = 'frontend' AND Message LIKE "%Drain instance%" AND HostName = 'frontend-us-northeast-3-vm4w' ORDER BY Timestamp ``` `HostName`: `frontend-us-northeast-3-vm4w` Went into draining `2025-11-03 19:49:37.012998 UTC` - `[us-northeast#us-east4-c.Iqb8nNAA] Drain instance (undoable=[false])` `Skipping ping as previous ping is still pending since [1760474289916]` 1760474289916 --> `2025-10-14T20:38:09.916Z`