From 9f9fdd66632205e084e397589e6d20a7c3850b98 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 21 Jun 2025 13:29:16 +0200 Subject: [PATCH] implement fluent (ftl) check Fluent is file format used by Firefox and other programs for translation strings. --- README.md | 3 + pre_commit_hooks/check_fluent.py | 137 ++++++++++++++++++++++++++ setup.cfg | 1 + tests/check_fluent_test.py | 159 +++++++++++++++++++++++++++++++ 4 files changed, 300 insertions(+) create mode 100644 pre_commit_hooks/check_fluent.py create mode 100644 tests/check_fluent_test.py diff --git a/README.md b/README.md index 2556f239..16579721 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ Checks for a common error of placing code before the docstring. #### `check-executables-have-shebangs` Checks that non-binary executables have a proper shebang. +#### `check-fluent` +Checks that fluent files are correctly formatted. + #### `check-illegal-windows-names` Check for files that cannot be created on Windows. diff --git a/pre_commit_hooks/check_fluent.py b/pre_commit_hooks/check_fluent.py new file mode 100644 index 00000000..9e7f5536 --- /dev/null +++ b/pre_commit_hooks/check_fluent.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import argparse +from collections.abc import Sequence + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check.') + args = parser.parse_args(argv) + + retval = 0 + for filename in args.filenames: + try: + with open(filename, encoding='UTF-8') as f: + content = f.read() + + if not _validate_fluent_syntax(content, filename): + retval = 1 + + except (OSError, UnicodeDecodeError) as exc: + print(f"{filename}: Failed to read file ({exc})") + retval = 1 + + return retval + + +def _validate_fluent_syntax(content: str, filename: str) -> bool: + """Validate Fluent FTL file syntax.""" + lines = content.splitlines() + errors = [] + + # Track current message context + current_message = None + has_default_variant = False + in_select_expression = False + + for line_num, line in enumerate(lines, 1): + # Skip empty lines and comments + if not line.strip() or line.strip().startswith('#'): + continue + + # Check for message definitions (identifier = value) + if ( + '=' in line and + not line.startswith(' ') and + not line.startswith('\t') + ): + current_message = line.split('=')[0].strip() + in_select_expression = False + has_default_variant = False + + # Validate message identifier + if not _is_valid_identifier(current_message): + errors.append( + f"Line {line_num}: Invalid message identifier " + f'"{current_message}"', + ) + + # Check for select expressions (contains -> or other select syntax) + if '{' in line and '$' in line and '->' in line: + in_select_expression = True + + # Handle indented content (attributes, variants, multiline values) + elif line.startswith(' ') or line.startswith('\t'): + if current_message is None: + errors.append( + f"Line {line_num}: Indented content without " + f"message context", + ) + continue + + stripped = line.strip() + + # Check for attribute definitions + if stripped.startswith('.') and '=' in stripped: + # Remove leading dot + attr_name = stripped.split('=')[0].strip()[1:] + if not _is_valid_identifier(attr_name): + errors.append( + f"Line {line_num}: Invalid attribute identifier " + f'"{attr_name}"', + ) + + # Check for variants in select expressions + elif stripped.startswith('*') or ( + stripped.startswith('[') and stripped.endswith(']') + ): + if not in_select_expression: + errors.append( + f"Line {line_num}: Variant definition outside " + f"select expression", + ) + elif stripped.startswith('*'): + has_default_variant = True + else: + # Non-* variants don't set has_default_variant + pass + + # Check for unterminated select expressions + if in_select_expression and current_message: + if '}' in line: + in_select_expression = False + if not has_default_variant: + errors.append( + f"Line {line_num}: Select expression missing " + f"default variant (marked with *)", + ) + + # Report errors + if errors: + for error in errors: + print(f"{filename}: {error}") + return False + + return True + + +def _is_valid_identifier(identifier: str) -> bool: + """Check if identifier follows Fluent naming conventions.""" + if not identifier: + return False + + # Must start with letter + if not identifier[0].isalpha(): + return False + + # Can contain letters, numbers, underscores, and hyphens + for char in identifier: + if not (char.isalnum() or char in '_-'): + return False + + return True + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index c5e6e0bd..9b5bd87c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,6 +35,7 @@ console_scripts = check-case-conflict = pre_commit_hooks.check_case_conflict:main check-docstring-first = pre_commit_hooks.check_docstring_first:main check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main + check-fluent = pre_commit_hooks.check_fluent:main check-json = pre_commit_hooks.check_json:main check-merge-conflict = pre_commit_hooks.check_merge_conflict:main check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main diff --git a/tests/check_fluent_test.py b/tests/check_fluent_test.py new file mode 100644 index 00000000..2f63adf4 --- /dev/null +++ b/tests/check_fluent_test.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +import pytest + +from pre_commit_hooks.check_fluent import main + + +def test_valid_fluent_file(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + 'hello = Hello, world!\n' + 'greeting = Hello, { $name }!\n' + ' .title = Greeting\n' + 'menu-item = Menu Item\n', + ) + assert main([str(f)]) == 0 + + +def test_fluent_file_with_select_expression(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + 'emails = { $unreadEmails ->\n' + ' [0] You have no unread emails.\n' + ' [one] You have one unread email.\n' + ' *[other] You have { $unreadEmails } unread emails.\n' + '}\n', + ) + assert main([str(f)]) == 0 + + +def test_fluent_file_with_comments(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + '# This is a comment\n' + 'hello = Hello, world!\n' + '\n' + '## Another comment\n' + 'goodbye = Goodbye!\n', + ) + assert main([str(f)]) == 0 + + +def test_fluent_file_with_invalid_identifier(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text('123invalid = Invalid identifier\n') + assert main([str(f)]) == 1 + + +def test_fluent_file_with_invalid_attribute_identifier(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text('hello = Hello\n' ' .123invalid = Invalid attribute\n') + assert main([str(f)]) == 1 + + +def test_fluent_file_missing_default_variant(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + 'emails = { $unreadEmails ->\n' + ' [0] You have no unread emails.\n' + ' [one] You have one unread email.\n' + '}\n', + ) + assert main([str(f)]) == 1 + + +def test_fluent_file_variant_outside_select(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text('hello = Hello\n' ' *[default] This should not be here\n') + assert main([str(f)]) == 1 + + +def test_fluent_file_missing_indentation(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text('hello = Hello\n' '.title = This should be indented\n') + assert main([str(f)]) == 1 + + +def test_fluent_file_indented_without_context(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text(' orphaned = This line has no message context\n') + assert main([str(f)]) == 1 + + +def test_non_utf8_file(tmp_path): + f = tmp_path / 'test.ftl' + f.write_bytes(b'\xa9\xfe\x12') + assert main([str(f)]) == 1 + + +def test_nonexistent_file(): + assert main(['nonexistent.ftl']) == 1 + + +def test_empty_file(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text('') + assert main([str(f)]) == 0 + + +def test_multiple_files(tmp_path): + f1 = tmp_path / 'valid.ftl' + f1.write_text('hello = Hello, world!\n') + + f2 = tmp_path / 'invalid.ftl' + f2.write_text('123invalid = Invalid identifier\n') + + assert main([str(f1), str(f2)]) == 1 + + +def test_multiple_valid_files(tmp_path): + f1 = tmp_path / 'valid1.ftl' + f1.write_text('hello = Hello, world!\n') + + f2 = tmp_path / 'valid2.ftl' + f2.write_text('goodbye = Goodbye!\n') + + assert main([str(f1), str(f2)]) == 0 + + +@pytest.mark.parametrize( + 'identifier,expected', + [ + ('hello', True), + ('hello-world', True), + ('hello_world', True), + ('hello123', True), + ('123hello', False), + ('hello-', True), + ('-hello', False), + ('', False), + ('hello.world', False), + ('hello world', False), + ], +) +def test_identifier_validation(identifier, expected): + from pre_commit_hooks.check_fluent import _is_valid_identifier + + assert _is_valid_identifier(identifier) == expected + + +def test_fluent_file_non_default_variant_with_closing_brace(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + 'emails = { $unreadEmails ->\n' + ' [0] You have no unread emails. }\n', + ) + assert main([str(f)]) == 1 # Should fail due to missing default variant + + +def test_fluent_file_non_star_variant_with_closing_check(tmp_path): + f = tmp_path / 'test.ftl' + f.write_text( + 'test = { $var ->\n' + ' [case]\n' # Comment + ' Value here\n' + ' *[other] Default\n' + '}\n', + ) + assert main([str(f)]) == 0