2025-11-25
This commit is contained in:
227
PhenixRTS/frontend-draining/19 days draining.md
Normal file
227
PhenixRTS/frontend-draining/19 days draining.md
Normal file
@@ -0,0 +1,227 @@
|
||||
Hostname: `frontend-us-northeast-3-vm4w`
|
||||
InstanceId: `us-northeast#us-east4-c.Iqb8nNAA`
|
||||
|
||||
|
||||
```SQL
|
||||
DECLARE
|
||||
hostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
||||
DECLARE
|
||||
lookbackDays INT64 DEFAULT 41;
|
||||
DECLARE
|
||||
start_time TIMESTAMP DEFAULT TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -lookbackDays DAY);
|
||||
|
||||
-----------------------------------------------------------------
|
||||
-- Step 1: Find the most recent log message indicating draining connections for a specific host
|
||||
WITH LatestDrainLog AS (
|
||||
SELECT Message
|
||||
FROM `phenix-pcast.pcast_logs_us.syslog`
|
||||
WHERE
|
||||
Timestamp > start_time
|
||||
AND Facility = 'platform'
|
||||
AND HostName = hostName
|
||||
AND Message LIKE 'Websocket connectionids preventing drain%'
|
||||
ORDER BY Timestamp DESC LIMIT 1
|
||||
-- Step 2: Extract all connection IDs from that single log message
|
||||
DrainingConnectionIds AS (
|
||||
SELECT connectionId
|
||||
FROM
|
||||
LatestDrainLog,
|
||||
UNNEST(REGEXP_EXTRACT_ALL(Message, r"'([^']*)'")) AS connectionId )
|
||||
),
|
||||
-- Step 3: Find all logs that associate sessions with connections
|
||||
SessionConnections AS (
|
||||
Timestamp
|
||||
SELECT
|
||||
REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') AS sessionId,
|
||||
REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') AS connectionId,
|
||||
FROM `phenix-pcast.pcast_logs_us.syslog`
|
||||
WHERE Timestamp > start_time
|
||||
AND Facility = 'platform'
|
||||
AND Message LIKE '%Session started with connection%'
|
||||
AND REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') IS NOT NULL
|
||||
AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') IS NOT NULL
|
||||
),
|
||||
-- Pattern 2: "Session [sessionId] has a new connection [connectionId], previously [oldConnectionId]"
|
||||
SessionNewConnections AS (
|
||||
SELECT
|
||||
Timestamp
|
||||
REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') AS sessionId,
|
||||
REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') AS connectionId
|
||||
FROM `phenix-pcast.pcast_logs_us.syslog`
|
||||
WHERE
|
||||
Timestamp > start_time
|
||||
AND Facility = 'platform'
|
||||
AND Message LIKE '%Session%has a new connection%'
|
||||
AND REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') IS NOT NULL
|
||||
AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') IS NOT NULL
|
||||
AllSessionConnections AS (
|
||||
SELECT Timestamp, sessionId, connectionId
|
||||
FROM SessionConnections
|
||||
UNION DISTINCT
|
||||
SELECT Timestamp, sessionId, connectionId
|
||||
FROM SessionNewConnections )
|
||||
)
|
||||
SELECT *
|
||||
FROM AllSessionConnections
|
||||
ORDER BY Timestmap
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
------
|
||||
|
||||
```SQL
|
||||
DECLARE TargetHostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
||||
DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA";
|
||||
DECLARE TargetConnectionId STRING DEFAULT "us-central#ZQiUCdymrZHbmeF12NhUZQ8xZZXxviWD";
|
||||
------------
|
||||
With ConnectionIdsPreventingDrain AS (
|
||||
SELECT
|
||||
Message
|
||||
FROM
|
||||
`phenix-pcast.pcast_logs_us.syslog`
|
||||
WHERE
|
||||
Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 MINUTE)
|
||||
AND Facility = 'platform'
|
||||
AND HostName = hostName
|
||||
AND Message LIKE 'Websocket connectionids preventing drain%'
|
||||
ORDER BY
|
||||
Timestamp DESC
|
||||
LIMIT
|
||||
1
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
```SQL
|
||||
DECLARE HostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
||||
|
||||
CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS (
|
||||
COALESCE(
|
||||
(
|
||||
SELECT
|
||||
CAST(
|
||||
JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64
|
||||
)
|
||||
FROM
|
||||
UNNEST(
|
||||
JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load'))
|
||||
) AS metric
|
||||
WHERE
|
||||
JSON_EXTRACT_SCALAR(metric, '$.name') = metricName
|
||||
LIMIT
|
||||
1
|
||||
),
|
||||
0
|
||||
)
|
||||
);
|
||||
-- WITH LatestInstanceMetricForHost AS (
|
||||
SELECT
|
||||
Timestamp,
|
||||
Status,
|
||||
(GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 ) AS UptimeHours,
|
||||
(GET_METRIC_VALUE(Status, 'status/seconds') / 3600 ) AS DrainingHours
|
||||
FROM `phenix-pcast.pcast.InstanceMetrics`
|
||||
WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE)
|
||||
AND Hostname = Hostname
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1
|
||||
ORDER BY Timestamp DESC
|
||||
LIMIT 1
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```SQL
|
||||
DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA";
|
||||
|
||||
CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS (
|
||||
COALESCE(
|
||||
(
|
||||
SELECT
|
||||
CAST(
|
||||
JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64
|
||||
)
|
||||
FROM
|
||||
UNNEST(
|
||||
JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load'))
|
||||
) AS metric
|
||||
WHERE
|
||||
JSON_EXTRACT_SCALAR(metric, '$.name') = metricName
|
||||
LIMIT
|
||||
1
|
||||
),
|
||||
0
|
||||
|
||||
|
||||
)
|
||||
);
|
||||
|
||||
SELECT
|
||||
Timestamp,
|
||||
Status,
|
||||
InstanceId,
|
||||
HostName,
|
||||
Health,
|
||||
HealthAlert,
|
||||
FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 )) AS UptimeHours,
|
||||
FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'status/seconds') / 3600 )) AS DrainingHours,
|
||||
GET_METRIC_VALUE(Status, 'connections/open') AS connectionsOpen,
|
||||
GET_METRIC_VALUE(Status, 'clients') AS clients,
|
||||
GET_METRIC_VALUE(Status, 'clients/subscriptions') AS clientsSubscriptions,
|
||||
GET_METRIC_VALUE(Status, 'clients/replay/events') AS clientsReplayEvents,
|
||||
GET_METRIC_VALUE(Status, 'mq/incoming/pending') AS mqIncomingPending,
|
||||
GET_METRIC_VALUE(Status, 'mq/outgoing/pending') AS mqOutgoingPending,
|
||||
GET_METRIC_VALUE(Status, 'mq/incoming/rate') AS mqIncomingRate,
|
||||
|
||||
FROM `phenix-pcast.pcast.InstanceMetrics`
|
||||
WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE)
|
||||
AND InstanceId = TargetInstanceId
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1
|
||||
ORDER BY Timestamp DESC
|
||||
LIMIT 1
|
||||
|
||||
```
|
||||
|
||||
|
||||
Using
|
||||
```SQL
|
||||
SELECT
|
||||
Timestamp,
|
||||
Category,
|
||||
Severity,
|
||||
Message,
|
||||
HostName,
|
||||
Region,
|
||||
Zone,
|
||||
FROM
|
||||
`phenix-pcast.pcast_logs_us.syslog`
|
||||
WHERE
|
||||
Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -90 DAY)
|
||||
|
||||
AND Facility = 'platform'
|
||||
AND Service = 'frontend'
|
||||
AND Message LIKE "%Drain instance%"
|
||||
AND HostName = 'frontend-us-northeast-3-vm4w'
|
||||
ORDER BY
|
||||
Timestamp
|
||||
|
||||
```
|
||||
|
||||
`HostName`: `frontend-us-northeast-3-vm4w`
|
||||
|
||||
Went into draining
|
||||
`2025-11-03 19:49:37.012998 UTC` - `[us-northeast#us-east4-c.Iqb8nNAA] Drain instance (undoable=[false])`
|
||||
|
||||
`Skipping ping as previous ping is still pending since [1760474289916]`
|
||||
1760474289916 --> `2025-10-14T20:38:09.916Z`
|
||||
|
||||
|
||||
Reference in New Issue
Block a user