1
1
import logging
2
2
import os
3
+ import shutil
3
4
import sys
4
5
from dataclasses import dataclass , field
5
6
from pathlib import Path
13
14
from databricks .labs .ucx .hive_metastore .table_migration_status import TableMigrationIndex
14
15
from databricks .labs .ucx .source_code .base import LocatedAdvice
15
16
from databricks .labs .ucx .source_code .linters .context import LinterContext
17
+ from databricks .labs .ucx .source_code .path_lookup import PathLookup
16
18
17
19
logger = logging .getLogger ("verify-accelerators" )
18
20
@@ -79,16 +81,16 @@ class SolaccContext:
79
81
missing_imports : dict [str , dict [str , int ]] = field (default_factory = dict )
80
82
81
83
@classmethod
82
- def create (cls , lint_all : bool ):
84
+ def create (cls , for_all_dirs : bool ):
83
85
unparsed_path : Path | None = None
84
86
# if lint_all, recreate "solacc-unparsed.txt"
85
- if lint_all is None :
87
+ if for_all_dirs :
86
88
unparsed_path = Path (Path (__file__ ).parent , "solacc-unparsed.txt" )
87
89
if unparsed_path .exists ():
88
90
os .remove (unparsed_path )
89
91
files_to_skip : set [str ] | None = None
90
92
malformed = Path (__file__ ).parent / "solacc-malformed.txt"
91
- if lint_all and malformed .exists ():
93
+ if for_all_dirs and malformed .exists ():
92
94
lines = malformed .read_text (encoding = "utf-8" ).split ("\n " )
93
95
files_to_skip = set (line for line in lines if len (line ) > 0 and not line .startswith ("#" ))
94
96
return SolaccContext (unparsed_path = unparsed_path , files_to_skip = files_to_skip )
@@ -103,28 +105,30 @@ def register_missing_import(self, missing_import: str):
103
105
details [missing_import ] = count + 1
104
106
105
107
def log_missing_imports (self ):
106
- missing_imports = dict (sorted (self .missing_imports .items (), key = lambda item : sum (item [1 ].values ()), reverse = True ))
108
+ missing_imports = dict (
109
+ sorted (self .missing_imports .items (), key = lambda item : sum (item [1 ].values ()), reverse = True )
110
+ )
107
111
for prefix , details in missing_imports .items ():
108
112
logger .info (f"Missing import '{ prefix } '" )
109
113
for item , count in details .items ():
110
114
logger .info (f" { item } : { count } occurrences" )
111
115
112
116
113
-
114
117
def lint_one (solacc : SolaccContext , file : Path , ctx : LocalCheckoutContext ) -> None :
115
118
try :
116
119
advices = list (ctx .local_code_linter .lint_path (file , set ()))
117
120
solacc .parseable_count += 1
118
- missing_imports = collect_missing_imports (advices )
119
- for missing_import in missing_imports :
121
+ for missing_import in collect_missing_imports (advices ):
120
122
solacc .register_missing_import (missing_import )
121
- uninferrable_count = collect_uninferrable_count (advices )
122
- solacc .uninferrable_count += uninferrable_count
123
+ solacc .uninferrable_count += collect_uninferrable_count (advices )
123
124
print_advices (advices , file )
124
125
except Exception as e : # pylint: disable=broad-except
125
126
# here we're most likely catching astroid & sqlglot errors
126
127
# when linting single file, log exception details
127
- logger .error (f"Error during parsing of { file } : { e } " .replace ("\n " , " " ), exc_info = e if solacc .unparsed_path is None else None )
128
+ logger .error (
129
+ f"Error during parsing of { file } : { e } " .replace ("\n " , " " ),
130
+ exc_info = e if solacc .unparsed_path is None else None ,
131
+ )
128
132
if solacc .unparsed_path :
129
133
logger .error (f"Error during parsing of { file } : { e } " .replace ("\n " , " " ))
130
134
# populate solacc-unparsed.txt
@@ -133,17 +137,37 @@ def lint_one(solacc: SolaccContext, file: Path, ctx: LocalCheckoutContext) -> No
133
137
f .write ("\n " )
134
138
135
139
136
- def lint_dir (solacc : SolaccContext , dir : Path , file_to_lint : str | None = None ):
140
+ class _CleanablePathLookup (PathLookup ):
141
+
142
+ def __init__ (self ):
143
+ super ().__init__ (Path .cwd (), [Path (path ) for path in sys .path ])
144
+ self ._original_sys_paths = set (self ._sys_paths )
145
+
146
+ def clean_tmp_sys_paths (self ):
147
+ for path in self ._sys_paths :
148
+ if path in self ._original_sys_paths :
149
+ continue
150
+ if path .is_file ():
151
+ path .unlink ()
152
+ if path .is_dir ():
153
+ shutil .rmtree (path )
154
+
155
+
156
+ def lint_dir (solacc : SolaccContext , soldir : Path , file_to_lint : str | None = None ):
157
+ path_lookup = _CleanablePathLookup ()
137
158
ws = WorkspaceClient (host = '...' , token = '...' )
138
159
ctx = LocalCheckoutContext (ws ).replace (
139
- linter_context_factory = lambda session_state : LinterContext (TableMigrationIndex ([]), session_state )
160
+ linter_context_factory = lambda session_state : LinterContext (TableMigrationIndex ([]), session_state ),
161
+ path_lookup = path_lookup ,
140
162
)
141
- all_files = list (dir .glob ('**/*.py' )) if file_to_lint is None else [Path (dir , file_to_lint )]
163
+ all_files = list (soldir .glob ('**/*.py' )) if file_to_lint is None else [Path (soldir , file_to_lint )]
164
+ solacc .total_count += len (all_files )
142
165
for file in all_files :
143
- solacc .total_count += 1
144
166
if solacc .files_to_skip and file .relative_to (dist ).as_posix () in solacc .files_to_skip :
145
167
continue
146
168
lint_one (solacc , file , ctx )
169
+ # cleanup tmp dirs
170
+ path_lookup .clean_tmp_sys_paths ()
147
171
148
172
149
173
def lint_file (file_to_lint : str ):
@@ -154,8 +178,8 @@ def lint_file(file_to_lint: str):
154
178
155
179
def lint_all ():
156
180
solacc = SolaccContext .create (True )
157
- for dir in os .listdir (dist ):
158
- lint_dir (solacc , dist / dir )
181
+ for soldir in os .listdir (dist ):
182
+ lint_dir (solacc , dist / soldir )
159
183
all_files_len = solacc .total_count - (len (solacc .files_to_skip ) if solacc .files_to_skip else 0 )
160
184
parseable_pct = int (solacc .parseable_count / all_files_len * 100 )
161
185
missing_imports_count = sum (sum (details .values ()) for details in solacc .missing_imports .values ())
0 commit comments