Skip to content

Commit 622792e

Browse files
committed
Switch to integer identifiers for run_id and snapshot_id.
1 parent 625f16a commit 622792e

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

src/databricks/labs/ucx/framework/history.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import datetime as dt
44
import json
55
import logging
6-
import uuid
6+
import os
77
from collections.abc import Callable, Sequence
88
from dataclasses import dataclass
99
from functools import cached_property
@@ -21,10 +21,10 @@ class HistoricalRecord:
2121
workspace_id: int
2222
"""The identifier of the workspace where this record was generated."""
2323

24-
run_id: str
25-
"""An identifier of the workflow run that generated this record."""
24+
run_id: int
25+
"""The identifier of the workflow run that generated this record."""
2626

27-
snapshot_id: str
27+
snapshot_id: int
2828
"""An identifier that is unique to the records produced for a given snapshot."""
2929

3030
run_start_time: dt.datetime
@@ -63,7 +63,7 @@ def __init__(
6363
self,
6464
ws: WorkspaceClient,
6565
backend: SqlBackend,
66-
run_id: str,
66+
run_id: int,
6767
catalog: str,
6868
schema: str,
6969
table: str,
@@ -91,7 +91,7 @@ class Appender:
9191
def __init__(
9292
self,
9393
ws: WorkspaceClient,
94-
run_id: str,
94+
run_id: int,
9595
klass: type[Record],
9696
key_from: Callable[[Record], str],
9797
persist: Callable[[str, list[HistoricalRecord]], None],
@@ -116,15 +116,20 @@ def _owner(self) -> str:
116116
return owner
117117

118118
def append_snapshot(self, records: Sequence[Record], *, run_start_time: dt.datetime) -> None:
119-
snapshot_id = uuid.uuid4()
119+
# Equivalent entropy to a type-4 UUID.
120+
snapshot_id = int.from_bytes(os.urandom(16), byteorder="big")
120121
historical_records = [
121122
self._inventory_record_to_historical(record, snapshot_id=snapshot_id, run_start_time=run_start_time)
122123
for record in records
123124
]
124125
self._persist(self._object_type, historical_records)
125126

126127
def _inventory_record_to_historical(
127-
self, record: Record, *, snapshot_id: uuid.UUID, run_start_time: dt.datetime
128+
self,
129+
record: Record,
130+
*,
131+
snapshot_id: int,
132+
run_start_time: dt.datetime,
128133
) -> HistoricalRecord:
129134
object_id = self._key_from(record)
130135
object_as_dict = dataclasses.asdict(record)
@@ -134,7 +139,7 @@ def _inventory_record_to_historical(
134139
return HistoricalRecord(
135140
workspace_id=self._workspace_id,
136141
run_id=self._run_id,
137-
snapshot_id=str(snapshot_id),
142+
snapshot_id=snapshot_id,
138143
run_start_time=run_start_time,
139144
object_type=self._object_type,
140145
object_type_version=self._object_type_version,

0 commit comments

Comments
 (0)