Skip to content

Writeme: integrate folder exclusion into writeme and fix validation issues #7539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 149 additions & 2 deletions .tools/readmes/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,52 @@
from difflib import unified_diff
from enum import Enum
from pathlib import Path
from typing import Optional
from typing import Optional, Generator, Callable

from render import Renderer, RenderStatus, MissingMetadataError
from scanner import Scanner

from aws_doc_sdk_examples_tools.doc_gen import DocGen
from aws_doc_sdk_examples_tools.metadata_errors import MetadataError
from collections import defaultdict
import re

# Folders to exclude from processing
EXCLUDED_FOLDERS = {'.kiro', '.git', 'node_modules', '__pycache__'}


def _configure_folder_exclusion():
"""Configure file processing to exclude specified folders."""
from aws_doc_sdk_examples_tools import file_utils, validator_config
from aws_doc_sdk_examples_tools.fs import Fs, PathFs

def enhanced_skip(path: Path) -> bool:
"""Skip function that ignores excluded folders and standard ignored files."""
# Check if path contains any excluded folders
if any(excluded_folder in path.parts for excluded_folder in EXCLUDED_FOLDERS):
return True

# Apply standard skip logic
return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES

def enhanced_get_files(
root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs()
) -> Generator[Path, None, None]:
"""Get files using enhanced skip function."""
for path in file_utils.walk_with_gitignore(root, fs=fs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if walk_with_git_ignore is ignoring everything in the git ignore, can we just add these directories to that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think they're guaranteed to be 1:1, but it's probably pretty close

Copy link
Contributor

@rlhagerm rlhagerm Jul 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, then, this seems like duplicated functionality.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

walk_with_gitignore should work at any level of gitignore granularity.

{'.kiro', '.git', 'node_modules', '__pycache__'}

These should definitely be in your .gitignore already? I suppose I can see an argument for .kiro

return path.suffix.lower() not in validator_config.EXT_LOOKUP or path.name in validator_config.IGNORE_FILES

Again, seems like *.ext in the .gitignore or path in the folders' .gitignore would be equivalent?

if not enhanced_skip(path):
yield path

# Configure the file processing functions
validator_config.skip = enhanced_skip
file_utils.get_files = enhanced_get_files

excluded_list = ', '.join(sorted(EXCLUDED_FOLDERS))
print(f"Folder exclusion configured: {excluded_list} folders excluded")


# Configure folder exclusion when module is imported
_configure_folder_exclusion()


# Default to not using Rich
Expand All @@ -26,11 +66,118 @@
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO").upper(), force=True)


class UnmatchedSnippetTagError(MetadataError):
def __init__(self, file, id, tag=None, line=None, tag_type=None):
super().__init__(file=file, id=id)
self.tag = tag
self.line = line
self.tag_type = tag_type # 'start' or 'end'

def message(self):
return f"Unmatched snippet-{self.tag_type} tag '{self.tag}' at line {self.line}"


class DuplicateSnippetTagError(MetadataError):
def __init__(self, file, id, tag=None, line=None):
super().__init__(file=file, id=id)
self.tag = tag
self.line = line

def message(self):
return f"Duplicate snippet tag '{self.tag}' found at line {self.line}"


def validate_snippet_tags(doc_gen: DocGen):
"""Validate snippet-start/snippet-end pairs across all files."""
errors = []

# We need to scan files directly since DocGen.snippets only contains valid pairs
from aws_doc_sdk_examples_tools.file_utils import get_files
from aws_doc_sdk_examples_tools.validator_config import skip

for file_path in get_files(doc_gen.root, skip, fs=doc_gen.fs):
try:
content = doc_gen.fs.read(file_path)
lines = content.splitlines()

snippet_starts = {} # Track all snippet-start tags and their line numbers
snippet_ends = {} # Track all snippet-end tags and their line numbers
snippet_tags_seen = set() # Track all tags in this file to detect duplicates

for line_num, line in enumerate(lines, 1):
# Look for snippet-start patterns (# or // comment styles)
start_match = re.search(r'(#|//)\s*snippet-start:\[([^\]]+)\]', line)
if start_match:
tag = start_match.group(2)

# Check for duplicate start tags in the same file
if tag in snippet_starts:
errors.append(DuplicateSnippetTagError(
file=file_path,
id=f"Duplicate snippet-start tag in {file_path}",
tag=tag,
line=line_num
))
else:
snippet_starts[tag] = line_num
snippet_tags_seen.add(tag)

# Look for snippet-end patterns
end_match = re.search(r'(#|//)\s*snippet-end:\[([^\]]+)\]', line)
if end_match:
tag = end_match.group(2)

# Check for duplicate end tags in the same file
if tag in snippet_ends:
errors.append(DuplicateSnippetTagError(
file=file_path,
id=f"Duplicate snippet-end tag in {file_path}",
tag=tag,
line=line_num
))
else:
snippet_ends[tag] = line_num

# Check that every snippet-start has a corresponding snippet-end
for tag, start_line in snippet_starts.items():
if tag not in snippet_ends:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code that checks for matching starts and ends, doesn't it duplicate what's in -tools in snippets.py. Is there a problem with that logic or a reason we should duplicate it here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is at least some problem with the previous logic, because this caught the error in BatchActions.java which boils down to a missing end tag (malformed)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, could you upstream a fix for that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, not a missing tag, but a malformed one? Yeah, I bet the existing logic (and related tests) should be modified to handle that.

errors.append(UnmatchedSnippetTagError(
file=file_path,
id=f"Unclosed snippet-start in {file_path}",
tag=tag,
line=start_line,
tag_type='start'
))

# Check that every snippet-end has a corresponding snippet-start
for tag, end_line in snippet_ends.items():
if tag not in snippet_starts:
errors.append(UnmatchedSnippetTagError(
file=file_path,
id=f"Unmatched snippet-end in {file_path}",
tag=tag,
line=end_line,
tag_type='end'
))

except Exception as e:
# Skip files that can't be read (binary files, etc.)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All exceptions are files that can't be read? What if the file is missing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least log them

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how would this know if a file is missing? It just wouldn't show up in the list to check then, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or, misnamed, maybe. I was questioning the very wide net cast by that except, and the assumption that all the errors are of one type.

continue

return errors


def prepare_scanner(doc_gen: DocGen) -> Optional[Scanner]:
for path in (doc_gen.root / ".doc_gen/metadata").glob("*_metadata.yaml"):
doc_gen.process_metadata(path)
doc_gen.collect_snippets()
doc_gen.validate()

# Validate snippet tag pairs
snippet_errors = validate_snippet_tags(doc_gen)
if snippet_errors:
doc_gen.errors.extend(snippet_errors)

if doc_gen.errors:
error_strings = [str(error) for error in doc_gen.errors]
failed_list = "\n".join(f"DocGen Error: {e}" for e in error_strings)
Expand Down Expand Up @@ -200,4 +347,4 @@ def make_diff(renderer, id):
current = renderer.read_current().split("\n")
expected = renderer.readme_text.split("\n")
diff = unified_diff(current, expected, f"{id}/current", f"{id}/expected")
return "\n".join(diff)
return "\n".join(diff)
9 changes: 8 additions & 1 deletion .tools/readmes/writeme.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@

from typer import run

run(writeme)
# Run writeme and ensure proper exit code handling
try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we added exit code handing by using typer.exit in runner.py? Is this necessary here also?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should writeme.py assume runner.py is handling all possibilities?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say yes.

result = run(writeme)
if result is not None and result != 0:
sys.exit(result)
except SystemExit as e:
# Ensure we exit with the proper code
sys.exit(e.code)
else:
from .runner import writeme

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ public CompletableFuture<UpdateComputeEnvironmentResponse> disableComputeEnviron

return responseFuture;
}
// snippet-end:[batch.java2.disable.compute.environment.main
// snippet-end:[batch.java2.disable.compute.environment.main]

// snippet-start:[batch.java2.submit.job.main]
/**
Expand Down
2 changes: 1 addition & 1 deletion python/example_code/kms/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,4 @@ in the `python` folder.

Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

SPDX-License-Identifier: Apache-2.0
SPDX-License-Identifier: Apache-2.0
2 changes: 1 addition & 1 deletion python/example_code/lambda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,4 @@ in the `python` folder.

Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

SPDX-License-Identifier: Apache-2.0
SPDX-License-Identifier: Apache-2.0
Loading