228 lines
6.2 KiB
Markdown
228 lines
6.2 KiB
Markdown
Hostname: `frontend-us-northeast-3-vm4w`
|
|
InstanceId: `us-northeast#us-east4-c.Iqb8nNAA`
|
|
|
|
|
|
```SQL
|
|
DECLARE
|
|
hostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
|
DECLARE
|
|
lookbackDays INT64 DEFAULT 41;
|
|
DECLARE
|
|
start_time TIMESTAMP DEFAULT TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -lookbackDays DAY);
|
|
|
|
-----------------------------------------------------------------
|
|
-- Step 1: Find the most recent log message indicating draining connections for a specific host
|
|
WITH LatestDrainLog AS (
|
|
SELECT Message
|
|
FROM `phenix-pcast.pcast_logs_us.syslog`
|
|
WHERE
|
|
Timestamp > start_time
|
|
AND Facility = 'platform'
|
|
AND HostName = hostName
|
|
AND Message LIKE 'Websocket connectionids preventing drain%'
|
|
ORDER BY Timestamp DESC LIMIT 1
|
|
-- Step 2: Extract all connection IDs from that single log message
|
|
DrainingConnectionIds AS (
|
|
SELECT connectionId
|
|
FROM
|
|
LatestDrainLog,
|
|
UNNEST(REGEXP_EXTRACT_ALL(Message, r"'([^']*)'")) AS connectionId )
|
|
),
|
|
-- Step 3: Find all logs that associate sessions with connections
|
|
SessionConnections AS (
|
|
Timestamp
|
|
SELECT
|
|
REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') AS sessionId,
|
|
REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') AS connectionId,
|
|
FROM `phenix-pcast.pcast_logs_us.syslog`
|
|
WHERE Timestamp > start_time
|
|
AND Facility = 'platform'
|
|
AND Message LIKE '%Session started with connection%'
|
|
AND REGEXP_EXTRACT(Message, r'\] \[(.*?)\] Session started with connection') IS NOT NULL
|
|
AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\] and roles') IS NOT NULL
|
|
),
|
|
-- Pattern 2: "Session [sessionId] has a new connection [connectionId], previously [oldConnectionId]"
|
|
SessionNewConnections AS (
|
|
SELECT
|
|
Timestamp
|
|
REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') AS sessionId,
|
|
REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') AS connectionId
|
|
FROM `phenix-pcast.pcast_logs_us.syslog`
|
|
WHERE
|
|
Timestamp > start_time
|
|
AND Facility = 'platform'
|
|
AND Message LIKE '%Session%has a new connection%'
|
|
AND REGEXP_EXTRACT(Message, r'Session \[(.*?)\] has a new connection') IS NOT NULL
|
|
AND REGEXP_EXTRACT(Message, r'connection \[(.*?)\], previously') IS NOT NULL
|
|
AllSessionConnections AS (
|
|
SELECT Timestamp, sessionId, connectionId
|
|
FROM SessionConnections
|
|
UNION DISTINCT
|
|
SELECT Timestamp, sessionId, connectionId
|
|
FROM SessionNewConnections )
|
|
)
|
|
SELECT *
|
|
FROM AllSessionConnections
|
|
ORDER BY Timestmap
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
------
|
|
|
|
```SQL
|
|
DECLARE TargetHostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
|
DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA";
|
|
DECLARE TargetConnectionId STRING DEFAULT "us-central#ZQiUCdymrZHbmeF12NhUZQ8xZZXxviWD";
|
|
------------
|
|
With ConnectionIdsPreventingDrain AS (
|
|
SELECT
|
|
Message
|
|
FROM
|
|
`phenix-pcast.pcast_logs_us.syslog`
|
|
WHERE
|
|
Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -2 MINUTE)
|
|
AND Facility = 'platform'
|
|
AND HostName = hostName
|
|
AND Message LIKE 'Websocket connectionids preventing drain%'
|
|
ORDER BY
|
|
Timestamp DESC
|
|
LIMIT
|
|
1
|
|
)
|
|
```
|
|
|
|
|
|
|
|
|
|
```SQL
|
|
DECLARE HostName STRING DEFAULT "frontend-us-northeast-3-vm4w";
|
|
|
|
CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS (
|
|
COALESCE(
|
|
(
|
|
SELECT
|
|
CAST(
|
|
JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64
|
|
)
|
|
FROM
|
|
UNNEST(
|
|
JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load'))
|
|
) AS metric
|
|
WHERE
|
|
JSON_EXTRACT_SCALAR(metric, '$.name') = metricName
|
|
LIMIT
|
|
1
|
|
),
|
|
0
|
|
)
|
|
);
|
|
-- WITH LatestInstanceMetricForHost AS (
|
|
SELECT
|
|
Timestamp,
|
|
Status,
|
|
(GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 ) AS UptimeHours,
|
|
(GET_METRIC_VALUE(Status, 'status/seconds') / 3600 ) AS DrainingHours
|
|
FROM `phenix-pcast.pcast.InstanceMetrics`
|
|
WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE)
|
|
AND Hostname = Hostname
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1
|
|
ORDER BY Timestamp DESC
|
|
LIMIT 1
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
```SQL
|
|
DECLARE TargetInstanceId STRING DEFAULT "us-northeast#us-east4-c.Iqb8nNAA";
|
|
|
|
CREATE TEMPORARY FUNCTION GET_METRIC_VALUE(statusJson STRING, metricName STRING) RETURNS FLOAT64 AS (
|
|
COALESCE(
|
|
(
|
|
SELECT
|
|
CAST(
|
|
JSON_EXTRACT_SCALAR(metric, '$.value') AS FLOAT64
|
|
)
|
|
FROM
|
|
UNNEST(
|
|
JSON_EXTRACT_ARRAY(JSON_EXTRACT(statusJson, '$.load'))
|
|
) AS metric
|
|
WHERE
|
|
JSON_EXTRACT_SCALAR(metric, '$.name') = metricName
|
|
LIMIT
|
|
1
|
|
),
|
|
0
|
|
|
|
|
|
)
|
|
);
|
|
|
|
SELECT
|
|
Timestamp,
|
|
Status,
|
|
InstanceId,
|
|
HostName,
|
|
Health,
|
|
HealthAlert,
|
|
FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'uptime/os/seconds') / 3600 )) AS UptimeHours,
|
|
FORMAT('%.2f', (GET_METRIC_VALUE(Status, 'status/seconds') / 3600 )) AS DrainingHours,
|
|
GET_METRIC_VALUE(Status, 'connections/open') AS connectionsOpen,
|
|
GET_METRIC_VALUE(Status, 'clients') AS clients,
|
|
GET_METRIC_VALUE(Status, 'clients/subscriptions') AS clientsSubscriptions,
|
|
GET_METRIC_VALUE(Status, 'clients/replay/events') AS clientsReplayEvents,
|
|
GET_METRIC_VALUE(Status, 'mq/incoming/pending') AS mqIncomingPending,
|
|
GET_METRIC_VALUE(Status, 'mq/outgoing/pending') AS mqOutgoingPending,
|
|
GET_METRIC_VALUE(Status, 'mq/incoming/rate') AS mqIncomingRate,
|
|
|
|
FROM `phenix-pcast.pcast.InstanceMetrics`
|
|
WHERE Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -1 MINUTE)
|
|
AND InstanceId = TargetInstanceId
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY InstanceId ORDER BY Timestamp DESC) = 1
|
|
ORDER BY Timestamp DESC
|
|
LIMIT 1
|
|
|
|
```
|
|
|
|
|
|
Using
|
|
```SQL
|
|
SELECT
|
|
Timestamp,
|
|
Category,
|
|
Severity,
|
|
Message,
|
|
HostName,
|
|
Region,
|
|
Zone,
|
|
FROM
|
|
`phenix-pcast.pcast_logs_us.syslog`
|
|
WHERE
|
|
Timestamp > TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL -90 DAY)
|
|
|
|
AND Facility = 'platform'
|
|
AND Service = 'frontend'
|
|
AND Message LIKE "%Drain instance%"
|
|
AND HostName = 'frontend-us-northeast-3-vm4w'
|
|
ORDER BY
|
|
Timestamp
|
|
|
|
```
|
|
|
|
`HostName`: `frontend-us-northeast-3-vm4w`
|
|
|
|
Went into draining
|
|
`2025-11-03 19:49:37.012998 UTC` - `[us-northeast#us-east4-c.Iqb8nNAA] Drain instance (undoable=[false])`
|
|
|
|
`Skipping ping as previous ping is still pending since [1760474289916]`
|
|
1760474289916 --> `2025-10-14T20:38:09.916Z`
|
|
|
|
|