Skip to content

Commit 6cc8bed

Browse files
committed
filter by latest job runs
1 parent 612c1fa commit 6cc8bed

File tree

1 file changed

+41
-4
lines changed

1 file changed

+41
-4
lines changed

src/databricks/labs/ucx/queries/assessment/main/01_4_table_crawl_failures.sql

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,58 @@
33
--height 4
44
--width 4
55
*/
6+
WITH latest_job_runs AS (
7+
SELECT
8+
timestamp,
9+
job_id,
10+
job_run_id
11+
FROM (
12+
SELECT
13+
CAST(timestamp AS TIMESTAMP) AS timestamp,
14+
job_id,
15+
job_run_id,
16+
ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY CAST(timestamp AS TIMESTAMP) DESC) = 1 AS latest_run_of_job
17+
FROM inventory.logs
18+
)
19+
WHERE
20+
latest_run_of_job
21+
), logs_latest_job_runs AS (
22+
SELECT
23+
CAST(logs.timestamp AS TIMESTAMP) AS timestamp,
24+
message,
25+
job_run_id,
26+
job_id,
27+
workflow_name,
28+
task_name
29+
FROM inventory.logs
30+
JOIN latest_job_runs
31+
USING (job_id, job_run_id)
32+
WHERE
33+
workflow_name IN ('assessment')
34+
),
635
WITH table_crawl_failures AS (
736
SELECT
837
timestamp,
938
REGEXP_EXTRACT(message, '^failed-table-crawl: (.+?) -> (.+?): (.+)$', 1) AS error_reason,
1039
REGEXP_EXTRACT(message, '^failed-table-crawl: (.+?) -> (.+?): (.+)$', 2) AS error_entity,
1140
REGEXP_EXTRACT(message, '^failed-table-crawl: (.+?) -> (.+?): (.+)$', 3) AS error_message,
12-
FROM inventory.logs
41+
job_run_id,
42+
job_id,
43+
workflow_name,
44+
task_name
45+
FROM latest_job_runs
1346
WHERE
1447
STARTSWITH(message, 'failed-table-crawl: ')
1548
)
1649
SELECT
1750
timestamp,
18-
error_entity,
19-
error_reason,
20-
error_message
51+
workspace_group,
52+
account_group,
53+
error_message,
54+
job_run_id,
55+
job_id,
56+
workflow_name,
57+
task_name
2158
FROM table_crawl_failures
2259
ORDER BY
2360
1

0 commit comments

Comments
 (0)