1
1
import logging
2
2
import os
3
+ import shutil
3
4
import sys
5
+ from dataclasses import dataclass , field
4
6
from pathlib import Path
5
7
6
8
import requests
12
14
from databricks .labs .ucx .hive_metastore .table_migration_status import TableMigrationIndex
13
15
from databricks .labs .ucx .source_code .base import LocatedAdvice
14
16
from databricks .labs .ucx .source_code .linters .context import LinterContext
17
+ from databricks .labs .ucx .source_code .path_lookup import PathLookup
15
18
16
19
logger = logging .getLogger ("verify-accelerators" )
17
20
@@ -54,7 +57,7 @@ def collect_missing_imports(advices: list[LocatedAdvice]):
54
57
return missing_imports
55
58
56
59
57
- def collect_not_computed (advices : list [LocatedAdvice ]):
60
+ def collect_uninferrable_count (advices : list [LocatedAdvice ]):
58
61
not_computed = 0
59
62
for located_advice in advices :
60
63
if "computed" in located_advice .advice .message :
@@ -68,93 +71,143 @@ def print_advices(advices: list[LocatedAdvice], file: Path):
68
71
sys .stdout .write (f"{ message } \n " )
69
72
70
73
71
- def lint_one (file : Path , ctx : LocalCheckoutContext , unparsed : Path | None ) -> tuple [set [str ], int , int ]:
74
+ @dataclass
75
+ class SolaccContext :
76
+ unparsed_path : Path | None = None
77
+ files_to_skip : set [str ] | None = None
78
+ total_count = 0
79
+ parseable_count = 0
80
+ uninferrable_count = 0
81
+ missing_imports : dict [str , dict [str , int ]] = field (default_factory = dict )
82
+
83
+ @classmethod
84
+ def create (cls , for_all_dirs : bool ):
85
+ unparsed_path : Path | None = None
86
+ # if lint_all, recreate "solacc-unparsed.txt"
87
+ if for_all_dirs :
88
+ unparsed_path = Path (Path (__file__ ).parent , "solacc-unparsed.txt" )
89
+ if unparsed_path .exists ():
90
+ os .remove (unparsed_path )
91
+ files_to_skip : set [str ] | None = None
92
+ malformed = Path (__file__ ).parent / "solacc-malformed.txt"
93
+ if for_all_dirs and malformed .exists ():
94
+ lines = malformed .read_text (encoding = "utf-8" ).split ("\n " )
95
+ files_to_skip = set (line for line in lines if len (line ) > 0 and not line .startswith ("#" ))
96
+ return SolaccContext (unparsed_path = unparsed_path , files_to_skip = files_to_skip )
97
+
98
+ def register_missing_import (self , missing_import : str ):
99
+ prefix = missing_import .split ("." )[0 ]
100
+ details = self .missing_imports .get (prefix , None )
101
+ if details is None :
102
+ details = {}
103
+ self .missing_imports [prefix ] = details
104
+ count = details .get (missing_import , 0 )
105
+ details [missing_import ] = count + 1
106
+
107
+ def log_missing_imports (self ):
108
+ missing_imports = dict (
109
+ sorted (self .missing_imports .items (), key = lambda item : sum (item [1 ].values ()), reverse = True )
110
+ )
111
+ for prefix , details in missing_imports .items ():
112
+ logger .info (f"Missing import '{ prefix } '" )
113
+ for item , count in details .items ():
114
+ logger .info (f" { item } : { count } occurrences" )
115
+
116
+
117
+ def lint_one (solacc : SolaccContext , file : Path , ctx : LocalCheckoutContext ) -> None :
72
118
try :
73
119
advices = list (ctx .local_code_linter .lint_path (file , set ()))
74
- missing_imports = collect_missing_imports (advices )
75
- not_computed = collect_not_computed (advices )
120
+ solacc .parseable_count += 1
121
+ for missing_import in collect_missing_imports (advices ):
122
+ solacc .register_missing_import (missing_import )
123
+ solacc .uninferrable_count += collect_uninferrable_count (advices )
76
124
print_advices (advices , file )
77
- return missing_imports , 1 , not_computed
78
125
except Exception as e : # pylint: disable=broad-except
79
126
# here we're most likely catching astroid & sqlglot errors
80
- if unparsed is None : # linting single file, log exception details
81
- logger .error (f"Error during parsing of { file } : { e } " .replace ("\n " , " " ), exc_info = e )
82
- else :
127
+ # when linting single file, log exception details
128
+ logger .error (
129
+ f"Error during parsing of { file } : { e } " .replace ("\n " , " " ),
130
+ exc_info = e if solacc .unparsed_path is None else None ,
131
+ )
132
+ if solacc .unparsed_path :
83
133
logger .error (f"Error during parsing of { file } : { e } " .replace ("\n " , " " ))
84
134
# populate solacc-unparsed.txt
85
- with unparsed .open (mode = "a" , encoding = "utf-8" ) as f :
135
+ with solacc . unparsed_path .open (mode = "a" , encoding = "utf-8" ) as f :
86
136
f .write (file .relative_to (dist ).as_posix ())
87
137
f .write ("\n " )
88
- return set (), 0 , 0
89
138
90
139
91
- def lint_all (file_to_lint : str | None ):
140
+ class _CleanablePathLookup (PathLookup ):
141
+
142
+ def __init__ (self ):
143
+ super ().__init__ (Path .cwd (), [Path (path ) for path in sys .path ])
144
+ self ._original_sys_paths = set (self ._sys_paths )
145
+
146
+ def clean_tmp_sys_paths (self ):
147
+ for path in self ._sys_paths :
148
+ if path in self ._original_sys_paths :
149
+ continue
150
+ if path .is_file ():
151
+ path .unlink ()
152
+ if path .is_dir ():
153
+ shutil .rmtree (path )
154
+
155
+
156
+ def lint_dir (solacc : SolaccContext , soldir : Path , file_to_lint : str | None = None ):
157
+ path_lookup = _CleanablePathLookup ()
92
158
ws = WorkspaceClient (host = '...' , token = '...' )
93
159
ctx = LocalCheckoutContext (ws ).replace (
94
- linter_context_factory = lambda session_state : LinterContext (TableMigrationIndex ([]), session_state )
160
+ linter_context_factory = lambda session_state : LinterContext (TableMigrationIndex ([]), session_state ),
161
+ path_lookup = path_lookup ,
95
162
)
96
- parseable = 0
97
- not_computed = 0
98
- missing_imports : dict [str , dict [str , int ]] = {}
99
- all_files = list (dist .glob ('**/*.py' )) if file_to_lint is None else [Path (dist , file_to_lint )]
100
- unparsed : Path | None = None
101
- if file_to_lint is None :
102
- unparsed = Path (Path (__file__ ).parent , "solacc-unparsed.txt" )
103
- if unparsed .exists ():
104
- os .remove (unparsed )
105
- skipped : set [str ] | None = None
106
- malformed = Path (__file__ ).parent / "solacc-malformed.txt"
107
- if file_to_lint is None and malformed .exists ():
108
- lines = malformed .read_text (encoding = "utf-8" ).split ("\n " )
109
- skipped = set (line for line in lines if len (line ) > 0 and not line .startswith ("#" ))
163
+ all_files = list (soldir .glob ('**/*.py' )) if file_to_lint is None else [Path (soldir , file_to_lint )]
164
+ solacc .total_count += len (all_files )
110
165
for file in all_files :
111
- if skipped and file .relative_to (dist ).as_posix () in skipped :
166
+ if solacc . files_to_skip and file .relative_to (dist ).as_posix () in solacc . files_to_skip :
112
167
continue
113
- _missing_imports , _parseable , _not_computed = lint_one (file , ctx , unparsed )
114
- for _import in _missing_imports :
115
- register_missing_import (missing_imports , _import )
116
- parseable += _parseable
117
- not_computed += _not_computed
118
- all_files_len = len (all_files ) - (len (skipped ) if skipped else 0 )
119
- parseable_pct = int (parseable / all_files_len * 100 )
120
- missing_imports_count = sum (sum (details .values ()) for details in missing_imports .values ())
168
+ lint_one (solacc , file , ctx )
169
+ # cleanup tmp dirs
170
+ path_lookup .clean_tmp_sys_paths ()
171
+
172
+
173
+ def lint_file (file_to_lint : str ):
174
+ solacc = SolaccContext .create (False )
175
+ file_path = Path (file_to_lint )
176
+ lint_dir (solacc , file_path .parent , file_path .name )
177
+
178
+
179
+ def lint_all ():
180
+ solacc = SolaccContext .create (True )
181
+ for soldir in os .listdir (dist ):
182
+ lint_dir (solacc , dist / soldir )
183
+ all_files_len = solacc .total_count - (len (solacc .files_to_skip ) if solacc .files_to_skip else 0 )
184
+ parseable_pct = int (solacc .parseable_count / all_files_len * 100 )
185
+ missing_imports_count = sum (sum (details .values ()) for details in solacc .missing_imports .values ())
121
186
logger .info (
122
- f"Skipped: { len (skipped or [])} , parseable: { parseable_pct } % ({ parseable } /{ all_files_len } ), missing imports: { missing_imports_count } , not computed: { not_computed } "
187
+ f"Skipped: { len (solacc .files_to_skip or [])} , "
188
+ f"parseable: { parseable_pct } % ({ solacc .parseable_count } /{ all_files_len } ), "
189
+ f"missing imports: { missing_imports_count } , "
190
+ f"not computed: { solacc .uninferrable_count } "
123
191
)
124
- log_missing_imports (missing_imports )
192
+ solacc . log_missing_imports ()
125
193
# fail the job if files are unparseable
126
194
if parseable_pct < 100 :
127
195
sys .exit (1 )
128
196
129
197
130
- def register_missing_import (missing_imports : dict [str , dict [str , int ]], missing_import : str ):
131
- prefix = missing_import .split ("." )[0 ]
132
- details = missing_imports .get (prefix , None )
133
- if details is None :
134
- details = {}
135
- missing_imports [prefix ] = details
136
- count = details .get (missing_import , 0 )
137
- details [missing_import ] = count + 1
138
-
139
-
140
- def log_missing_imports (missing_imports : dict [str , dict [str , int ]]):
141
- missing_imports = dict (sorted (missing_imports .items (), key = lambda item : sum (item [1 ].values ()), reverse = True ))
142
- for prefix , details in missing_imports .items ():
143
- logger .info (f"Missing import '{ prefix } '" )
144
- for item , count in details .items ():
145
- logger .info (f" { item } : { count } occurrences" )
146
-
147
-
148
198
def main (args : list [str ]):
149
199
install_logger ()
150
200
logging .root .setLevel (logging .INFO )
151
201
file_to_lint = args [1 ] if len (args ) > 1 else None
152
- if not file_to_lint :
202
+ if file_to_lint :
153
203
# don't clone if linting just one file, assumption is we're troubleshooting
154
- logger .info ("Cloning..." )
155
- clone_all ()
204
+ logger .info ("Linting..." )
205
+ lint_file (file_to_lint )
206
+ return
207
+ logger .info ("Cloning..." )
208
+ clone_all ()
156
209
logger .info ("Linting..." )
157
- lint_all (file_to_lint )
210
+ lint_all ()
158
211
159
212
160
213
if __name__ == "__main__" :
0 commit comments