diff --git a/README.md b/README.md index 676739ec22..7c1c11dc33 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,10 @@ so that you'll be able to [scope the migration](docs/assessment.md) and execute The [README notebook](#readme-notebook), which can be found in the installation folder contains further instructions and explanations of the different ucx workflows & dashboards. Once the migration is scoped, you can start with the [table migration process](#Table-Migration). + More workflows, like notebook code migration are coming in future releases. + UCX also provides a number of command line utilities accessible via `databricks labs ucx`. For questions, troubleshooting or bug fixes, please see our [troubleshooting guide](docs/troubleshooting.md) or submit [an issue](https://github.com/databrickslabs/ucx/issues). @@ -90,6 +92,7 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project. * [`open-remote-config` command](#open-remote-config-command) * [`installations` command](#installations-command) * [`report-account-compatibility` command](#report-account-compatibility-command) + * [`export-assessment` command](#export-assessment-command) * [Metastore related commands](#metastore-related-commands) * [`show-all-metastores` command](#show-all-metastores-command) * [`assign-metastore` command](#assign-metastore-command) @@ -1167,6 +1170,42 @@ databricks labs ucx report-account-compatibility --profile labs-azure-account 12:56:21 INFO [d.l.u.account.aggregate] Non-DELTA format: UNKNOWN: 5 objects ``` +[[back to top](#databricks-labs-ucx)] +## `export-assessment` command + +```commandline +databricks labs ucx export-assessment +``` +The export-assessment command is used to export UCX assessment results to a specified location. When you run this command, you will be prompted to provide details on the destination path and the type of report you wish to generate. If you do not specify these details, the command will default to exporting the main results to the current directory. The exported file will be named based on the selection made in the format. Eg: export_{query_choice}_results.zip +- **Choose a path to save the UCX Assessment results:** + - **Description:** Specify the path where the results should be saved. If not provided, results will be saved in the current directory. + +- **Choose which assessment results to export:** + - **Description:** Select the type of results to export. Options include: + - `azure` + - `estimates` + - `interactive` + - `main` + - **Default:** `main` + +[[back to top](#databricks-labs-ucx)] + +## `export-assessment` command + +```commandline +databricks labs ucx export-assessment +``` +The export-assessment command is used to export UCX assessment results to a specified location. When you run this command, you will be prompted to provide details on the destination path and the type of report you wish to generate. If you do not specify these details, the command will default to exporting the main results to the current directory. The exported file will be named based on the selection made in the format. Eg: export_{query_choice}_results.zip +- **Choose a path to save the UCX Assessment results:** + - **Description:** Specify the path where the results should be saved. If not provided, results will be saved in the current directory. + +- **Choose which assessment results to export:** + - **Description:** Select the type of results to export. Options include: + - `azure` + - `estimates` + - `interactive` + - `main` + [[back to top](#databricks-labs-ucx)] # Metastore related commands diff --git a/labs.yml b/labs.yml index c27fd791c7..9e25ef0494 100644 --- a/labs.yml +++ b/labs.yml @@ -330,3 +330,6 @@ commands: description: The file to download - name: run-as-collection description: Run the command for the collection of workspaces with ucx installed. Default is False. + + - name: export-assessment + description: Export UCX results to a specified location diff --git a/src/databricks/labs/ucx/assessment/export.py b/src/databricks/labs/ucx/assessment/export.py new file mode 100644 index 0000000000..987132e02e --- /dev/null +++ b/src/databricks/labs/ucx/assessment/export.py @@ -0,0 +1,48 @@ +import logging +from pathlib import Path + +from databricks.labs.blueprint.tui import Prompts + +from databricks.labs.ucx.config import WorkspaceConfig +from databricks.labs.lsql.backends import SqlBackend +from databricks.labs.lsql.dashboards import DashboardMetadata + +logger = logging.getLogger(__name__) + + +class AssessmentExporter: + + def __init__(self, sql_backend: SqlBackend, config: WorkspaceConfig): + self._sql_backend = sql_backend + self._config = config + + def export_results(self, prompts: Prompts): + """Main method to export results to CSV files inside a ZIP archive.""" + project_root = Path(__file__).resolve().parents[3] + queries_path_root = project_root / "labs/ucx/queries/assessment" + + results_directory = Path( + prompts.question( + "Choose a path to save the UCX Assessment results", + default=Path.cwd().as_posix(), + validate=lambda p_: Path(p_).exists(), + ) + ) + + query_choice = prompts.choice( + "Choose which assessment results to export", + [subdir.name for subdir in queries_path_root.iterdir() if subdir.is_dir()], + ) + + export_path = results_directory / f"export_{query_choice}_results.zip" + queries_path = queries_path_root / query_choice + + assessment_results = DashboardMetadata.from_path(queries_path).replace_database( + database=self._config.inventory_database, database_to_replace="inventory" + ) + + logger.info("Exporting assessment results....") + results_path = assessment_results.export_to_zipped_csv(self._sql_backend, export_path) + logger.info(f"Results exported to {results_path}") + + return results_path diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index ebd638a569..c5af55a33f 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -19,6 +19,7 @@ from databricks.labs.ucx.install import AccountInstaller from databricks.labs.ucx.source_code.linters.files import LocalCodeLinter + ucx = App(__file__) logger = get_logger(__file__) @@ -791,5 +792,13 @@ def lint_local_code( linter.lint(prompts, None if path is None else Path(path)) +@ucx.command +def export_assessment(w: WorkspaceClient, prompts: Prompts): + """Export the UCX assessment queries to a zip file.""" + ctx = WorkspaceContext(w) + exporter = ctx.assessment_exporter + exporter.export_results(prompts) + + if __name__ == "__main__": ucx() diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 25d1ca98fb..ded5f2797f 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -25,6 +25,7 @@ from databricks.labs.ucx.account.workspaces import WorkspaceInfo from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler +from databricks.labs.ucx.assessment.export import AssessmentExporter from databricks.labs.ucx.aws.credentials import CredentialManager from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.hive_metastore import ExternalLocations, Mounts, TablesCrawler @@ -260,7 +261,11 @@ def tables_migrator(self) -> TablesMigrator: ) @cached_property - def acl_migrator(self) -> ACLMigrator: + def assessment_exporter(self): + return AssessmentExporter(self.sql_backend, self.config) + + @cached_property + def acl_migrator(self): return ACLMigrator( self.tables_crawler, self.workspace_info, diff --git a/src/databricks/labs/ucx/installer/workflows.py b/src/databricks/labs/ucx/installer/workflows.py index 945a79d861..0818c8c280 100644 --- a/src/databricks/labs/ucx/installer/workflows.py +++ b/src/databricks/labs/ucx/installer/workflows.py @@ -115,6 +115,123 @@ f'--parent_run_id=' + dbutils.widgets.get('parent_run_id')) """ +EXPORT_TO_EXCEL_NOTEBOOK = """# Databricks notebook source +# MAGIC %md +# MAGIC ##### Exporter of UCX assessment results +# MAGIC ##### Instructions: +# MAGIC 1. Execute using an all-purpose cluster with Databricks Runtime 14 or higher. +# MAGIC 1. Hit **Run all** button and wait for completion. +# MAGIC 1. Go to the bottom of the notebook and click the Download UCX Results button. +# MAGIC +# MAGIC ##### Important: +# MAGIC Please note that this is only meant to serve as example code. +# MAGIC +# MAGIC Example code developed by **Databricks Shared Technical Services team**. + +# COMMAND ---------- + +# DBTITLE 1,Installing Packages +# MAGIC %pip install {remote_wheel} -qqq +# MAGIC %pip install xlsxwriter -qqq +# MAGIC dbutils.library.restartPython() + +# COMMAND ---------- + +# DBTITLE 1,Libraries Import and Setting UCX +import os +import logging +import threading +import shutil +from pathlib import Path +from threading import Lock +from functools import partial + +import pandas as pd +import xlsxwriter + +from databricks.sdk.config import with_user_agent_extra +from databricks.labs.blueprint.logger import install_logger +from databricks.labs.blueprint.parallel import Threads +from databricks.labs.lsql.dashboards import Dashboards +from databricks.labs.lsql.lakeview.model import Dataset +from databricks.labs.ucx.contexts.workflow_task import RuntimeContext + +# ctx +install_logger() +with_user_agent_extra("cmd", "export-assessment") +named_parameters = dict(config="/Workspace{config_file}") +ctx = RuntimeContext(named_parameters) +lock = Lock() + +# COMMAND ---------- + +# DBTITLE 1,Assessment Export +FILE_NAME = "ucx_assessment_main.xlsx" +TMP_PATH = f"/Workspace{{ctx.installation.install_folder()}}/tmp/" +DOWNLOAD_PATH = "/dbfs/FileStore/excel-export" + + +def _cleanup() -> None: + '''Move the temporary results file to the download path and clean up the temp directory.''' + shutil.move( + os.path.join(TMP_PATH, FILE_NAME), + os.path.join(DOWNLOAD_PATH, FILE_NAME), + ) + shutil.rmtree(TMP_PATH) + + +def _prepare_directories() -> None: + '''Ensure that the necessary directories exist.''' + os.makedirs(TMP_PATH, exist_ok=True) + os.makedirs(DOWNLOAD_PATH, exist_ok=True) + + +def _to_excel(dataset: Dataset, writer: ...) -> None: + '''Execute a SQL query and write the result to an Excel sheet.''' + worksheet_name = dataset.display_name[:31] + df = spark.sql(dataset.query).toPandas() + with lock: + df.to_excel(writer, sheet_name=worksheet_name, index=False) + + +def _render_export() -> None: + '''Render an HTML link for downloading the results.''' + html_content = ''' + +

Export Results

Download Results
+ + ''' + displayHTML(html_content) + + +def export_results() -> None: + '''Main method to export results to an Excel file.''' + _prepare_directories() + + dashboard_path = ( + Path(ctx.installation.install_folder()) + / "dashboards/[UCX] UCX Assessment (Main).lvdash.json" + ) + dashboard = Dashboards(ctx.workspace_client) + dashboard_datasets = dashboard.get_dashboard(dashboard_path).datasets + try: + target = TMP_PATH + "/ucx_assessment_main.xlsx" + with pd.ExcelWriter(target, engine="xlsxwriter") as writer: + tasks = [] + for dataset in dashboard_datasets: + tasks.append(partial(_to_excel, dataset, writer)) + Threads.strict("exporting", tasks) + _cleanup() + _render_export() + except Exception as e: + print(f"Error exporting results ", e) + +# COMMAND ---------- + +# DBTITLE 1,Data Export +export_results() +""" + class DeployedWorkflows: def __init__(self, ws: WorkspaceClient, install_state: InstallState): @@ -484,6 +601,7 @@ def create_jobs(self) -> None: self.remove_jobs(keep=desired_workflows) self._install_state.save() self._create_debug(remote_wheels) + self._create_export(remote_wheels) self._create_readme() def remove_jobs(self, *, keep: set[str] | None = None) -> None: @@ -822,6 +940,16 @@ def _create_debug(self, remote_wheels: list[str]): ).encode("utf8") self._installation.upload('DEBUG.py', content) + def _create_export(self, remote_wheels: list[str]): + remote_wheels_str = " ".join(remote_wheels) + content = EXPORT_TO_EXCEL_NOTEBOOK.format( + remote_wheel=remote_wheels_str, + config_file=self._config_file, + workspace_host=self._ws.config.host, + workspace_id=self._ws.get_workspace_id(), + ).encode("utf8") + self._installation.upload('EXPORT_ASSESSMENT_TO_EXCEL.py', content) + class MaxedStreamHandler(logging.StreamHandler): diff --git a/tests/unit/assessment/test_export.py b/tests/unit/assessment/test_export.py new file mode 100644 index 0000000000..c67355f367 --- /dev/null +++ b/tests/unit/assessment/test_export.py @@ -0,0 +1,41 @@ +from databricks.labs.ucx.config import WorkspaceConfig +from databricks.labs.ucx.assessment.export import AssessmentExporter +from databricks.labs.lsql.backends import MockBackend +from databricks.labs.blueprint.tui import MockPrompts +from databricks.labs.lsql.core import Row + + +def test_export(tmp_path): + """Test the export_results method of the AssessmentExporter class.""" + query = { + "SELECT\n one\nFROM ucx.external_locations": [ + Row(location="s3://bucket1/folder1", table_count=1), + Row(location="abfss://container1@storage1.dfs.core.windows.net/folder1", table_count=1), + Row(location="gcp://folder1", table_count=2), + ] + } + + # Setup workspace configuration + config = WorkspaceConfig(inventory_database="ucx") + + # Prepare temporary paths and files + export_path = tmp_path / "export" + export_path.mkdir(parents=True, exist_ok=True) + + # Mock backend and prompts + mock_backend = MockBackend(rows=query) + query_choice = {"assessment_name": "main", "option": 3} + mock_prompts = MockPrompts( + { + "Choose a path to save the UCX Assessment results": export_path.as_posix(), + "Choose which assessment results to export": query_choice["option"], + } + ) + + # Execute export process + export = AssessmentExporter(mock_backend, config) + exported = export.export_results(mock_prompts) + + # Assertion based on the query_choice + expected_file_name = f"export_{query_choice['assessment_name']}_results.zip" # Adjusted filename + assert exported == export_path / expected_file_name diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 1191e469b7..571b3a4c33 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -61,6 +61,7 @@ validate_groups_membership, workflows, delete_missing_principals, + export_assessment, ) from databricks.labs.ucx.contexts.account_cli import AccountContext from databricks.labs.ucx.contexts.workspace_cli import WorkspaceContext @@ -1133,3 +1134,19 @@ def test_delete_principals(ws): prompts = MockPrompts({"Select the list of roles *": "0"}) delete_missing_principals(ws, prompts, ctx) role_creation.delete_uc_roles.assert_called_once() + + +def test_export_assessment(ws, tmp_path): + query_choice = {"assessment_name": "main", "option": 3} + mock_prompts = MockPrompts( + { + "Choose a path to save the UCX Assessment results": tmp_path.as_posix(), + "Choose which assessment results to export": query_choice["option"], + } + ) + + export_assessment(ws, mock_prompts) + # Construct the expected filename based on the query_choice + expected_filename = f"export_{query_choice['assessment_name']}_results.zip" + # Assert that the file exists in the temporary path + assert len(list(tmp_path.glob(expected_filename))) == 1