9
9
from difflib import unified_diff
10
10
from enum import Enum
11
11
from pathlib import Path
12
- from typing import Optional
12
+ from typing import Optional , Generator , Callable
13
13
14
14
from render import Renderer , RenderStatus , MissingMetadataError
15
15
from scanner import Scanner
16
16
17
17
from aws_doc_sdk_examples_tools .doc_gen import DocGen
18
+ from aws_doc_sdk_examples_tools .metadata_errors import MetadataError
19
+ from collections import defaultdict
20
+ import re
21
+
22
+ # Folders to exclude from processing (can be extended as needed)
23
+ EXCLUDED_FOLDERS = {'.kiro' , '.git' , 'node_modules' , '__pycache__' }
24
+
25
+
26
+ def apply_folder_exclusion_patches ():
27
+ """
28
+ Apply patches to exclude specified folders from processing.
29
+ This integrates folder exclusion as a core feature.
30
+ """
31
+ from aws_doc_sdk_examples_tools import file_utils , validator_config
32
+ from aws_doc_sdk_examples_tools .fs import Fs , PathFs
33
+
34
+ def patched_skip (path : Path ) -> bool :
35
+ """Enhanced skip function that ignores specified folders."""
36
+ # Check if path contains any excluded folders
37
+ if any (excluded_folder in path .parts for excluded_folder in EXCLUDED_FOLDERS ):
38
+ return True
39
+
40
+ # Call original skip logic
41
+ return path .suffix .lower () not in validator_config .EXT_LOOKUP or path .name in validator_config .IGNORE_FILES
42
+
43
+ def patched_get_files (
44
+ root : Path , skip : Callable [[Path ], bool ] = lambda _ : False , fs : Fs = PathFs ()
45
+ ) -> Generator [Path , None , None ]:
46
+ """Enhanced get_files that uses our patched skip function."""
47
+ for path in file_utils .walk_with_gitignore (root , fs = fs ):
48
+ if not patched_skip (path ):
49
+ yield path
50
+
51
+ # Apply the patches
52
+ validator_config .skip = patched_skip
53
+ file_utils .get_files = patched_get_files
54
+
55
+ excluded_list = ', ' .join (sorted (EXCLUDED_FOLDERS ))
56
+ print (f"Applied folder exclusion: { excluded_list } folders excluded" )
57
+
58
+
59
+ # Apply folder exclusion patches when module is imported
60
+ apply_folder_exclusion_patches ()
18
61
19
62
20
63
# Default to not using Rich
26
69
logging .basicConfig (level = os .environ .get ("LOGLEVEL" , "INFO" ).upper (), force = True )
27
70
28
71
72
+ class UnmatchedSnippetTagError (MetadataError ):
73
+ def __init__ (self , file , id , tag = None , line = None , tag_type = None ):
74
+ super ().__init__ (file = file , id = id )
75
+ self .tag = tag
76
+ self .line = line
77
+ self .tag_type = tag_type # 'start' or 'end'
78
+
79
+ def message (self ):
80
+ return f"Unmatched snippet-{ self .tag_type } tag '{ self .tag } ' at line { self .line } "
81
+
82
+
83
+ class DuplicateSnippetTagError (MetadataError ):
84
+ def __init__ (self , file , id , tag = None , line = None ):
85
+ super ().__init__ (file = file , id = id )
86
+ self .tag = tag
87
+ self .line = line
88
+
89
+ def message (self ):
90
+ return f"Duplicate snippet tag '{ self .tag } ' found at line { self .line } "
91
+
92
+
93
+ def validate_snippet_tags (doc_gen : DocGen ):
94
+ """Validate snippet-start/snippet-end pairs across all files."""
95
+ errors = []
96
+
97
+ # We need to scan files directly since DocGen.snippets only contains valid pairs
98
+ from aws_doc_sdk_examples_tools .file_utils import get_files
99
+ from aws_doc_sdk_examples_tools .validator_config import skip
100
+
101
+ for file_path in get_files (doc_gen .root , skip , fs = doc_gen .fs ):
102
+ try :
103
+ content = doc_gen .fs .read (file_path )
104
+ lines = content .splitlines ()
105
+
106
+ snippet_starts = {} # Track all snippet-start tags and their line numbers
107
+ snippet_ends = {} # Track all snippet-end tags and their line numbers
108
+ snippet_tags_seen = set () # Track all tags in this file to detect duplicates
109
+
110
+ for line_num , line in enumerate (lines , 1 ):
111
+ # Look for snippet-start patterns (# or // comment styles)
112
+ start_match = re .search (r'(#|//)\s*snippet-start:\[([^\]]+)\]' , line )
113
+ if start_match :
114
+ tag = start_match .group (2 )
115
+
116
+ # Check for duplicate start tags in the same file
117
+ if tag in snippet_starts :
118
+ errors .append (DuplicateSnippetTagError (
119
+ file = file_path ,
120
+ id = f"Duplicate snippet-start tag in { file_path } " ,
121
+ tag = tag ,
122
+ line = line_num
123
+ ))
124
+ else :
125
+ snippet_starts [tag ] = line_num
126
+ snippet_tags_seen .add (tag )
127
+
128
+ # Look for snippet-end patterns
129
+ end_match = re .search (r'(#|//)\s*snippet-end:\[([^\]]+)\]' , line )
130
+ if end_match :
131
+ tag = end_match .group (2 )
132
+
133
+ # Check for duplicate end tags in the same file
134
+ if tag in snippet_ends :
135
+ errors .append (DuplicateSnippetTagError (
136
+ file = file_path ,
137
+ id = f"Duplicate snippet-end tag in { file_path } " ,
138
+ tag = tag ,
139
+ line = line_num
140
+ ))
141
+ else :
142
+ snippet_ends [tag ] = line_num
143
+
144
+ # Check that every snippet-start has a corresponding snippet-end
145
+ for tag , start_line in snippet_starts .items ():
146
+ if tag not in snippet_ends :
147
+ errors .append (UnmatchedSnippetTagError (
148
+ file = file_path ,
149
+ id = f"Unclosed snippet-start in { file_path } " ,
150
+ tag = tag ,
151
+ line = start_line ,
152
+ tag_type = 'start'
153
+ ))
154
+
155
+ # Check that every snippet-end has a corresponding snippet-start
156
+ for tag , end_line in snippet_ends .items ():
157
+ if tag not in snippet_starts :
158
+ errors .append (UnmatchedSnippetTagError (
159
+ file = file_path ,
160
+ id = f"Unmatched snippet-end in { file_path } " ,
161
+ tag = tag ,
162
+ line = end_line ,
163
+ tag_type = 'end'
164
+ ))
165
+
166
+ except Exception as e :
167
+ # Skip files that can't be read (binary files, etc.)
168
+ continue
169
+
170
+ return errors
171
+
172
+
29
173
def prepare_scanner (doc_gen : DocGen ) -> Optional [Scanner ]:
30
174
for path in (doc_gen .root / ".doc_gen/metadata" ).glob ("*_metadata.yaml" ):
31
175
doc_gen .process_metadata (path )
32
176
doc_gen .collect_snippets ()
33
177
doc_gen .validate ()
178
+
179
+ # Validate snippet tag pairs
180
+ snippet_errors = validate_snippet_tags (doc_gen )
181
+ if snippet_errors :
182
+ doc_gen .errors .extend (snippet_errors )
183
+
34
184
if doc_gen .errors :
35
185
error_strings = [str (error ) for error in doc_gen .errors ]
36
186
failed_list = "\n " .join (f"DocGen Error: { e } " for e in error_strings )
@@ -200,4 +350,4 @@ def make_diff(renderer, id):
200
350
current = renderer .read_current ().split ("\n " )
201
351
expected = renderer .readme_text .split ("\n " )
202
352
diff = unified_diff (current , expected , f"{ id } /current" , f"{ id } /expected" )
203
- return "\n " .join (diff )
353
+ return "\n " .join (diff )
0 commit comments