-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Writeme: integrate folder exclusion into writeme and fix validation issues #7539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,12 +9,52 @@ | |
from difflib import unified_diff | ||
from enum import Enum | ||
from pathlib import Path | ||
from typing import Optional | ||
from typing import Optional, Generator, Callable | ||
|
||
from render import Renderer, RenderStatus, MissingMetadataError | ||
from scanner import Scanner | ||
|
||
from aws_doc_sdk_examples_tools.doc_gen import DocGen | ||
from aws_doc_sdk_examples_tools.metadata_errors import MetadataError | ||
from collections import defaultdict | ||
import re | ||
|
||
# Folders to exclude from processing | ||
EXCLUDED_FOLDERS = {'.kiro', '.git', 'node_modules', '__pycache__'} | ||
|
||
|
||
def _configure_folder_exclusion(): | ||
"""Configure file processing to exclude specified folders.""" | ||
from aws_doc_sdk_examples_tools import file_utils, validator_config | ||
from aws_doc_sdk_examples_tools.fs import Fs, PathFs | ||
|
||
def enhanced_skip(path: Path) -> bool: | ||
"""Skip function that ignores excluded folders and standard ignored files.""" | ||
# Check if path contains any excluded folders | ||
if any(excluded_folder in path.parts for excluded_folder in EXCLUDED_FOLDERS): | ||
return True | ||
|
||
# Apply standard skip logic | ||
return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES | ||
|
||
def enhanced_get_files( | ||
root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs() | ||
) -> Generator[Path, None, None]: | ||
"""Get files using enhanced skip function.""" | ||
for path in file_utils.walk_with_gitignore(root, fs=fs): | ||
if not enhanced_skip(path): | ||
yield path | ||
|
||
# Configure the file processing functions | ||
validator_config.skip = enhanced_skip | ||
file_utils.get_files = enhanced_get_files | ||
|
||
excluded_list = ', '.join(sorted(EXCLUDED_FOLDERS)) | ||
print(f"Folder exclusion configured: {excluded_list} folders excluded") | ||
|
||
|
||
# Configure folder exclusion when module is imported | ||
_configure_folder_exclusion() | ||
|
||
|
||
# Default to not using Rich | ||
|
@@ -26,11 +66,118 @@ | |
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper(), force=True) | ||
|
||
|
||
class UnmatchedSnippetTagError(MetadataError): | ||
def __init__(self, file, id, tag=None, line=None, tag_type=None): | ||
super().__init__(file=file, id=id) | ||
self.tag = tag | ||
self.line = line | ||
self.tag_type = tag_type # 'start' or 'end' | ||
|
||
def message(self): | ||
return f"Unmatched snippet-{self.tag_type} tag '{self.tag}' at line {self.line}" | ||
|
||
|
||
class DuplicateSnippetTagError(MetadataError): | ||
def __init__(self, file, id, tag=None, line=None): | ||
super().__init__(file=file, id=id) | ||
self.tag = tag | ||
self.line = line | ||
|
||
def message(self): | ||
return f"Duplicate snippet tag '{self.tag}' found at line {self.line}" | ||
|
||
|
||
def validate_snippet_tags(doc_gen: DocGen): | ||
"""Validate snippet-start/snippet-end pairs across all files.""" | ||
errors = [] | ||
|
||
# We need to scan files directly since DocGen.snippets only contains valid pairs | ||
from aws_doc_sdk_examples_tools.file_utils import get_files | ||
from aws_doc_sdk_examples_tools.validator_config import skip | ||
|
||
for file_path in get_files(doc_gen.root, skip, fs=doc_gen.fs): | ||
try: | ||
content = doc_gen.fs.read(file_path) | ||
lines = content.splitlines() | ||
|
||
snippet_starts = {} # Track all snippet-start tags and their line numbers | ||
snippet_ends = {} # Track all snippet-end tags and their line numbers | ||
snippet_tags_seen = set() # Track all tags in this file to detect duplicates | ||
|
||
for line_num, line in enumerate(lines, 1): | ||
# Look for snippet-start patterns (# or // comment styles) | ||
start_match = re.search(r'(#|//)\s*snippet-start:\[([^\]]+)\]', line) | ||
if start_match: | ||
tag = start_match.group(2) | ||
|
||
# Check for duplicate start tags in the same file | ||
if tag in snippet_starts: | ||
errors.append(DuplicateSnippetTagError( | ||
file=file_path, | ||
id=f"Duplicate snippet-start tag in {file_path}", | ||
tag=tag, | ||
line=line_num | ||
)) | ||
else: | ||
snippet_starts[tag] = line_num | ||
snippet_tags_seen.add(tag) | ||
|
||
# Look for snippet-end patterns | ||
end_match = re.search(r'(#|//)\s*snippet-end:\[([^\]]+)\]', line) | ||
if end_match: | ||
tag = end_match.group(2) | ||
|
||
# Check for duplicate end tags in the same file | ||
if tag in snippet_ends: | ||
errors.append(DuplicateSnippetTagError( | ||
file=file_path, | ||
id=f"Duplicate snippet-end tag in {file_path}", | ||
tag=tag, | ||
line=line_num | ||
)) | ||
else: | ||
snippet_ends[tag] = line_num | ||
|
||
# Check that every snippet-start has a corresponding snippet-end | ||
for tag, start_line in snippet_starts.items(): | ||
if tag not in snippet_ends: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code that checks for matching starts and ends, doesn't it duplicate what's in -tools in snippets.py. Is there a problem with that logic or a reason we should duplicate it here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is at least some problem with the previous logic, because this caught the error in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, could you upstream a fix for that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, not a missing tag, but a malformed one? Yeah, I bet the existing logic (and related tests) should be modified to handle that. |
||
errors.append(UnmatchedSnippetTagError( | ||
file=file_path, | ||
id=f"Unclosed snippet-start in {file_path}", | ||
tag=tag, | ||
line=start_line, | ||
tag_type='start' | ||
)) | ||
|
||
# Check that every snippet-end has a corresponding snippet-start | ||
for tag, end_line in snippet_ends.items(): | ||
if tag not in snippet_starts: | ||
errors.append(UnmatchedSnippetTagError( | ||
file=file_path, | ||
id=f"Unmatched snippet-end in {file_path}", | ||
tag=tag, | ||
line=end_line, | ||
tag_type='end' | ||
)) | ||
|
||
except Exception as e: | ||
# Skip files that can't be read (binary files, etc.) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All exceptions are files that can't be read? What if the file is missing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least log them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how would this know if a file is missing? It just wouldn't show up in the list to check then, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or, misnamed, maybe. I was questioning the very wide net cast by that except, and the assumption that all the errors are of one type. |
||
continue | ||
|
||
return errors | ||
|
||
|
||
def prepare_scanner(doc_gen: DocGen) -> Optional[Scanner]: | ||
for path in (doc_gen.root / ".doc_gen/metadata").glob("*_metadata.yaml"): | ||
doc_gen.process_metadata(path) | ||
doc_gen.collect_snippets() | ||
doc_gen.validate() | ||
|
||
# Validate snippet tag pairs | ||
snippet_errors = validate_snippet_tags(doc_gen) | ||
if snippet_errors: | ||
doc_gen.errors.extend(snippet_errors) | ||
|
||
if doc_gen.errors: | ||
error_strings = [str(error) for error in doc_gen.errors] | ||
failed_list = "\n".join(f"DocGen Error: {e}" for e in error_strings) | ||
|
@@ -200,4 +347,4 @@ def make_diff(renderer, id): | |
current = renderer.read_current().split("\n") | ||
expected = renderer.readme_text.split("\n") | ||
diff = unified_diff(current, expected, f"{id}/current", f"{id}/expected") | ||
return "\n".join(diff) | ||
return "\n".join(diff) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,14 @@ | |
|
||
from typer import run | ||
|
||
run(writeme) | ||
# Run writeme and ensure proper exit code handling | ||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought we added exit code handing by using typer.exit in runner.py? Is this necessary here also? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should writeme.py assume runner.py is handling all possibilities? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say yes. |
||
result = run(writeme) | ||
if result is not None and result != 0: | ||
sys.exit(result) | ||
except SystemExit as e: | ||
# Ensure we exit with the proper code | ||
sys.exit(e.code) | ||
else: | ||
from .runner import writeme | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if walk_with_git_ignore is ignoring everything in the git ignore, can we just add these directories to that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think they're guaranteed to be 1:1, but it's probably pretty close
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, then, this seems like duplicated functionality.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
walk_with_gitignore should work at any level of gitignore granularity.
{'.kiro', '.git', 'node_modules', '__pycache__'}
These should definitely be in your .gitignore already? I suppose I can see an argument for
.kiro
return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES
Again, seems like
*.ext
in the .gitignore orpath
in the folders' .gitignore would be equivalent?