Skip to content

Commit c076a7f

Browse files
Merge branch 'main' into feature/##1938
2 parents 16719bd + 5b3c0f2 commit c076a7f

File tree

20 files changed

+338
-237
lines changed

20 files changed

+338
-237
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ integration:
2323
hatch run integration
2424

2525
coverage:
26-
hatch run coverage && open htmlcov/index.html
26+
hatch run coverage; status=$$?; [ -e "htmlcov/index.html" ] && open htmlcov/index.html; exit $$status
2727

2828
known:
2929
hatch run python src/databricks/labs/ucx/source_code/known.py

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
9292
* [Metastore related commands](#metastore-related-commands)
9393
* [`show-all-metastores` command](#show-all-metastores-command)
9494
* [`assign-metastore` command](#assign-metastore-command)
95+
* [`create-ucx-catalog` command](#create-ucx-catalog-command)
9596
* [Table migration commands](#table-migration-commands)
9697
* [`principal-prefix-access` command](#principal-prefix-access-command)
9798
* [Access for AWS S3 Buckets](#access-for-aws-s3-buckets)
@@ -1169,9 +1170,23 @@ a region, and you want to see which ones are available for assignment.
11691170
databricks labs ucx assign-metastore --workspace-id <workspace-id> [--metastore-id <metastore-id>]
11701171
```
11711172

1172-
This command assigns a metastore to a workspace with `workspace-id`. If there is only a single metastore in the workspace
1173-
region, it will be automatically assigned to the workspace. If there are multiple metastores available, you need to specify
1174-
the metastore id of the metastore you want to assign to the workspace.
1173+
This command assigns a metastore to a workspace with `--workspace-id`. If there is only a single metastore in the
1174+
workspace region, the command automatically assigns that metastore to the workspace. If there are multiple metastores
1175+
available, the command prompts for specification of the metastore (id) you want to assign to the workspace.
1176+
1177+
[[back to top](#databricks-labs-ucx)]
1178+
1179+
## `create-ucx-catalog` command
1180+
1181+
```commandline
1182+
databricks labs ucx create-ucx-catalog
1183+
16:12:59 INFO [d.l.u.hive_metastore.catalog_schema] Validating UC catalog: ucx
1184+
Please provide storage location url for catalog: ucx (default: metastore): ...
1185+
16:13:01 INFO [d.l.u.hive_metastore.catalog_schema] Creating UC catalog: ucx
1186+
```
1187+
1188+
Create and setup UCX artifact catalog. Amongst other things, the artifacts are used for tracking the migration progress
1189+
across workspaces.
11751190

11761191
# Table migration commands
11771192

labs.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,15 +261,18 @@ commands:
261261

262262
- name: assign-metastore
263263
is_account_level: true
264-
description: Enable Unity Catalog features on a workspace by assign a metastore to it
264+
description: Enable Unity Catalog features on a workspace by assigning a metastore to it.
265265
flags:
266266
- name: workspace-id
267-
description: (Optional) Workspace ID to assign a metastore to
267+
description: Workspace ID to assign a metastore to
268268
- name: metastore-id
269269
description: (Optional) If there are multiple metastores in the region, specify the metastore ID to assign
270270
- name: default-catalog
271271
description: (Optional) Default catalog to assign to the workspace. If not provided, it will be hive_metastore
272272

273+
- name: create-ucx-catalog
274+
description: Create UCX artifact catalog
275+
273276
- name: migrate-tables
274277
description: |
275278
Trigger the `migrate-tables` workflow and, optionally, `migrate-external-hiveserde-tables-in-place-experimental`

src/databricks/labs/ucx/account/metastores.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,38 +27,33 @@ def show_all_metastores(self, workspace_id: str | None = None):
2727
def assign_metastore(
2828
self,
2929
prompts: Prompts,
30-
str_workspace_id: str | None = None,
30+
workspace_id: int,
31+
*,
3132
metastore_id: str | None = None,
3233
default_catalog: str | None = None,
3334
):
34-
if not str_workspace_id:
35-
workspace_choices = self._get_all_workspaces()
36-
workspace_id = prompts.choice_from_dict("Please select a workspace:", workspace_choices)
37-
else:
38-
workspace_id = int(str_workspace_id)
39-
if not metastore_id:
35+
if metastore_id is None:
4036
# search for all matching metastores
4137
metastore_choices = self._get_all_metastores(self._get_region(workspace_id))
4238
if len(metastore_choices) == 0:
43-
raise ValueError(f"No matching metastore found for workspace {workspace_id}")
39+
raise ValueError(f"No matching metastore found for workspace: {workspace_id}")
4440
# if there are multiple matches, prompt users to select one
4541
if len(metastore_choices) > 1:
4642
metastore_id = prompts.choice_from_dict(
4743
"Multiple metastores found, please select one:", metastore_choices
4844
)
4945
else:
5046
metastore_id = list(metastore_choices.values())[0]
51-
if metastore_id is not None:
52-
self._ac.metastore_assignments.create(workspace_id, metastore_id)
47+
self._ac.metastore_assignments.create(workspace_id, metastore_id)
5348
# set the default catalog using the default_namespace setting API
5449
if default_catalog is not None:
5550
self._set_default_catalog(workspace_id, default_catalog)
5651

57-
def _get_region(self, workspace_id: int) -> str:
52+
def _get_region(self, workspace_id: int) -> str | None:
5853
workspace = self._ac.workspaces.get(workspace_id)
5954
if self._ac.config.is_aws:
60-
return str(workspace.aws_region)
61-
return str(workspace.location)
55+
return workspace.aws_region
56+
return workspace.location
6257

6358
def _get_all_workspaces(self) -> dict[str, int]:
6459
output = dict[str, int]()

src/databricks/labs/ucx/cli.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,11 +590,27 @@ def assign_metastore(
590590
workspace_id: str | None = None,
591591
metastore_id: str | None = None,
592592
default_catalog: str | None = None,
593+
ctx: AccountContext | None = None,
593594
):
594595
"""Assign metastore to a workspace"""
595596
logger.info(f"Account ID: {a.config.account_id}")
596-
ctx = AccountContext(a)
597-
ctx.account_metastores.assign_metastore(ctx.prompts, workspace_id, metastore_id, default_catalog)
597+
ctx = ctx or AccountContext(a)
598+
ctx.account_metastores.assign_metastore(
599+
ctx.prompts,
600+
workspace_id,
601+
metastore_id=metastore_id,
602+
default_catalog=default_catalog,
603+
)
604+
605+
606+
@ucx.command
607+
def create_ucx_catalog(w: WorkspaceClient, prompts: Prompts, ctx: WorkspaceContext | None = None) -> None:
608+
"""Create and setup UCX artifact catalog
609+
610+
Amongst other things, the artifacts are used for tracking the migration progress across workspaces.
611+
"""
612+
workspace_context = ctx or WorkspaceContext(w)
613+
workspace_context.catalog_schema.create_ucx_catalog(prompts)
598614

599615

600616
@ucx.command

src/databricks/labs/ucx/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes
1111
__version__ = 2
1212

1313
inventory_database: str
14+
ucx_catalog: str = "ucx" # Catalog to store UCX artifact tables (shared across workspaces)
1415
# Group name conversion parameters.
1516
workspace_group_regex: str | None = None
1617
workspace_group_replace: str | None = None
@@ -25,7 +26,7 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes
2526
connect: Config | None = None
2627
num_threads: int | None = 10
2728
database_to_catalog_mapping: dict[str, str] | None = None
28-
default_catalog: str | None = "ucx_default"
29+
default_catalog: str | None = "ucx_default" # DEPRECATED: Keeping to avoid errors when loading old configurations
2930
log_level: str | None = "INFO"
3031

3132
# Starting path for notebooks and directories crawler

src/databricks/labs/ucx/contexts/account_cli.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from databricks.sdk import AccountClient
55

6-
76
from databricks.labs.ucx.account.aggregate import AccountAggregate
87
from databricks.labs.ucx.account.metastores import AccountMetastores
98
from databricks.labs.ucx.account.workspaces import AccountWorkspaces

src/databricks/labs/ucx/contexts/application.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler
1919
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
2020
from databricks.sdk import AccountClient, WorkspaceClient, core
21-
from databricks.sdk.errors import ResourceDoesNotExist
21+
from databricks.sdk.errors import NotFound
2222
from databricks.sdk.service import sql
2323

2424
from databricks.labs.ucx.account.workspaces import WorkspaceInfo
@@ -314,7 +314,7 @@ def principal_locations(self):
314314
eligible_locations = self.aws_acl.get_eligible_locations_principals()
315315
if self.is_gcp:
316316
raise NotImplementedError("Not implemented for GCP.")
317-
except ResourceDoesNotExist:
317+
except NotFound:
318318
pass
319319
return eligible_locations
320320

@@ -354,6 +354,7 @@ def catalog_schema(self):
354354
self.principal_acl,
355355
self.sql_backend,
356356
self.grants_crawler,
357+
self.config.ucx_catalog,
357358
)
358359

359360
@cached_property

src/databricks/labs/ucx/hive_metastore/catalog_schema.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,49 @@
1818

1919

2020
class CatalogSchema:
21+
2122
def __init__(
2223
self,
2324
ws: WorkspaceClient,
2425
table_mapping: TableMapping,
2526
principal_grants: PrincipalACL,
2627
sql_backend: SqlBackend,
2728
grants_crawler: GrantsCrawler,
29+
ucx_catalog: str,
2830
):
2931
self._ws = ws
3032
self._table_mapping = table_mapping
3133
self._external_locations = self._ws.external_locations.list()
3234
self._principal_grants = principal_grants
3335
self._backend = sql_backend
3436
self._hive_grants_crawler = grants_crawler
37+
self._ucx_catalog = ucx_catalog
38+
39+
def create_ucx_catalog(self, prompts: Prompts, *, properties: dict[str, str] | None = None) -> None:
40+
"""Create the UCX catalog.
41+
42+
Args:
43+
prompts : Prompts
44+
The prompts object to use for interactive input.
45+
properties : (dict[str, str] | None), default None
46+
The properties to pass to the catalog. If None, no properties are passed.
47+
"""
48+
try:
49+
self._create_catalog_validate(self._ucx_catalog, prompts, properties=properties)
50+
except BadRequest as e:
51+
if "already exists" in str(e):
52+
logger.warning(f"Catalog '{self._ucx_catalog}' already exists. Skipping.")
53+
return
54+
raise
3555

3656
def create_all_catalogs_schemas(self, prompts: Prompts) -> None:
3757
candidate_catalogs, candidate_schemas = self._get_missing_catalogs_schemas()
3858
for candidate_catalog in candidate_catalogs:
3959
try:
40-
self._create_catalog_validate(candidate_catalog, prompts)
60+
self._create_catalog_validate(candidate_catalog, prompts, properties=None)
4161
except BadRequest as e:
4262
if "already exists" in str(e):
43-
logger.warning(f"Catalog {candidate_catalog} already exists. Skipping.")
63+
logger.warning(f"Catalog '{candidate_catalog}' already exists. Skipping.")
4464
continue
4565
for candidate_catalog, schemas in candidate_schemas.items():
4666
for candidate_schema in schemas:
@@ -49,7 +69,7 @@ def create_all_catalogs_schemas(self, prompts: Prompts) -> None:
4969
except BadRequest as e:
5070
if "already exists" in str(e):
5171
logger.warning(
52-
f"Schema {candidate_schema} in catalog {candidate_catalog} " f"already exists. Skipping."
72+
f"Schema '{candidate_schema}' in catalog '{candidate_catalog}' already exists. Skipping."
5373
)
5474
continue
5575
self._apply_from_legacy_table_acls()
@@ -121,20 +141,19 @@ def _get_database_source_target_mapping(self) -> dict[str, list[SchemaInfo]]:
121141
src_trg_schema_mapping[table_mapping.src_schema].append(schema)
122142
return src_trg_schema_mapping
123143

124-
def _create_catalog_validate(self, catalog, prompts: Prompts):
125-
logger.info(f"Creating UC catalog: {catalog}")
126-
# create catalogs
144+
def _create_catalog_validate(self, catalog: str, prompts: Prompts, *, properties: dict[str, str] | None) -> None:
145+
logger.info(f"Validating UC catalog: {catalog}")
127146
attempts = 3
128147
while True:
129148
catalog_storage = prompts.question(
130-
f"Please provide storage location url for catalog:{catalog}.", default="metastore"
149+
f"Please provide storage location url for catalog: {catalog}", default="metastore"
131150
)
132151
if self._validate_location(catalog_storage):
133152
break
134153
attempts -= 1
135154
if attempts == 0:
136155
raise NotFound(f"Failed to validate location for {catalog} catalog")
137-
self._create_catalog(catalog, catalog_storage)
156+
self._create_catalog(catalog, catalog_storage, properties=properties)
138157

139158
def _list_existing(self) -> tuple[set[str], dict[str, set[str]]]:
140159
"""generate a list of existing UC catalogs and schema."""
@@ -199,12 +218,17 @@ def _validate_location(self, location: str):
199218
return True
200219
return False
201220

202-
def _create_catalog(self, catalog, catalog_storage):
221+
def _create_catalog(self, catalog: str, catalog_storage: str, *, properties: dict[str, str] | None) -> None:
203222
logger.info(f"Creating UC catalog: {catalog}")
204223
if catalog_storage == "metastore":
205-
self._ws.catalogs.create(catalog, comment="Created by UCX")
224+
self._ws.catalogs.create(catalog, comment="Created by UCX", properties=properties)
206225
else:
207-
self._ws.catalogs.create(catalog, storage_root=catalog_storage, comment="Created by UCX")
226+
self._ws.catalogs.create(
227+
catalog,
228+
storage_root=catalog_storage,
229+
comment="Created by UCX",
230+
properties=properties,
231+
)
208232

209233
def _create_schema(self, catalog, schema):
210234
logger.info(f"Creating UC schema: {schema} in catalog: {catalog}")

src/databricks/labs/ucx/install.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
234234
inventory_database = self.prompts.question(
235235
"Inventory Database stored in hive_metastore", default=default_database, valid_regex=r"^\w+$"
236236
)
237+
ucx_catalog = self.prompts.question("Catalog to store UCX artifacts in", default="ucx", valid_regex=r"^\w+$")
237238
log_level = self.prompts.question("Log level", default="INFO").upper()
238239
num_threads = int(self.prompts.question("Number of threads", default="8", valid_number=True))
239240
configure_groups = ConfigureGroups(self.prompts)
@@ -248,6 +249,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
248249
)
249250
return WorkspaceConfig(
250251
inventory_database=inventory_database,
252+
ucx_catalog=ucx_catalog,
251253
workspace_group_regex=configure_groups.workspace_group_regex,
252254
workspace_group_replace=configure_groups.workspace_group_replace,
253255
account_group_regex=configure_groups.account_group_regex,

0 commit comments

Comments
 (0)