From 9f9fdd66632205e084e397589e6d20a7c3850b98 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sat, 21 Jun 2025 13:29:16 +0200
Subject: [PATCH] implement fluent (ftl) check

Fluent is file format used by Firefox and other programs for translation strings.
---
 README.md                        |   3 +
 pre_commit_hooks/check_fluent.py | 137 ++++++++++++++++++++++++++
 setup.cfg                        |   1 +
 tests/check_fluent_test.py       | 159 +++++++++++++++++++++++++++++++
 4 files changed, 300 insertions(+)
 create mode 100644 pre_commit_hooks/check_fluent.py
 create mode 100644 tests/check_fluent_test.py

diff --git a/README.md b/README.md
index 2556f239..16579721 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,9 @@ Checks for a common error of placing code before the docstring.
 #### `check-executables-have-shebangs`
 Checks that non-binary executables have a proper shebang.
 
+#### `check-fluent`
+Checks that fluent files are correctly formatted.
+
 #### `check-illegal-windows-names`
 Check for files that cannot be created on Windows.
 
diff --git a/pre_commit_hooks/check_fluent.py b/pre_commit_hooks/check_fluent.py
new file mode 100644
index 00000000..9e7f5536
--- /dev/null
+++ b/pre_commit_hooks/check_fluent.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+import argparse
+from collections.abc import Sequence
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to check.')
+    args = parser.parse_args(argv)
+
+    retval = 0
+    for filename in args.filenames:
+        try:
+            with open(filename, encoding='UTF-8') as f:
+                content = f.read()
+
+            if not _validate_fluent_syntax(content, filename):
+                retval = 1
+
+        except (OSError, UnicodeDecodeError) as exc:
+            print(f"{filename}: Failed to read file ({exc})")
+            retval = 1
+
+    return retval
+
+
+def _validate_fluent_syntax(content: str, filename: str) -> bool:
+    """Validate Fluent FTL file syntax."""
+    lines = content.splitlines()
+    errors = []
+
+    # Track current message context
+    current_message = None
+    has_default_variant = False
+    in_select_expression = False
+
+    for line_num, line in enumerate(lines, 1):
+        # Skip empty lines and comments
+        if not line.strip() or line.strip().startswith('#'):
+            continue
+
+        # Check for message definitions (identifier = value)
+        if (
+            '=' in line and
+            not line.startswith(' ') and
+            not line.startswith('\t')
+        ):
+            current_message = line.split('=')[0].strip()
+            in_select_expression = False
+            has_default_variant = False
+
+            # Validate message identifier
+            if not _is_valid_identifier(current_message):
+                errors.append(
+                    f"Line {line_num}: Invalid message identifier "
+                    f'"{current_message}"',
+                )
+
+            # Check for select expressions (contains -> or other select syntax)
+            if '{' in line and '$' in line and '->' in line:
+                in_select_expression = True
+
+        # Handle indented content (attributes, variants, multiline values)
+        elif line.startswith(' ') or line.startswith('\t'):
+            if current_message is None:
+                errors.append(
+                    f"Line {line_num}: Indented content without "
+                    f"message context",
+                )
+                continue
+
+            stripped = line.strip()
+
+            # Check for attribute definitions
+            if stripped.startswith('.') and '=' in stripped:
+                # Remove leading dot
+                attr_name = stripped.split('=')[0].strip()[1:]
+                if not _is_valid_identifier(attr_name):
+                    errors.append(
+                        f"Line {line_num}: Invalid attribute identifier "
+                        f'"{attr_name}"',
+                    )
+
+            # Check for variants in select expressions
+            elif stripped.startswith('*') or (
+                stripped.startswith('[') and stripped.endswith(']')
+            ):
+                if not in_select_expression:
+                    errors.append(
+                        f"Line {line_num}: Variant definition outside "
+                        f"select expression",
+                    )
+                elif stripped.startswith('*'):
+                    has_default_variant = True
+                else:
+                    # Non-* variants don't set has_default_variant
+                    pass
+
+        # Check for unterminated select expressions
+        if in_select_expression and current_message:
+            if '}' in line:
+                in_select_expression = False
+                if not has_default_variant:
+                    errors.append(
+                        f"Line {line_num}: Select expression missing "
+                        f"default variant (marked with *)",
+                    )
+
+    # Report errors
+    if errors:
+        for error in errors:
+            print(f"{filename}: {error}")
+        return False
+
+    return True
+
+
+def _is_valid_identifier(identifier: str) -> bool:
+    """Check if identifier follows Fluent naming conventions."""
+    if not identifier:
+        return False
+
+    # Must start with letter
+    if not identifier[0].isalpha():
+        return False
+
+    # Can contain letters, numbers, underscores, and hyphens
+    for char in identifier:
+        if not (char.isalnum() or char in '_-'):
+            return False
+
+    return True
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())
diff --git a/setup.cfg b/setup.cfg
index c5e6e0bd..9b5bd87c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,6 +35,7 @@ console_scripts =
     check-case-conflict = pre_commit_hooks.check_case_conflict:main
     check-docstring-first = pre_commit_hooks.check_docstring_first:main
     check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
+    check-fluent = pre_commit_hooks.check_fluent:main
     check-json = pre_commit_hooks.check_json:main
     check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
     check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main
diff --git a/tests/check_fluent_test.py b/tests/check_fluent_test.py
new file mode 100644
index 00000000..2f63adf4
--- /dev/null
+++ b/tests/check_fluent_test.py
@@ -0,0 +1,159 @@
+from __future__ import annotations
+
+import pytest
+
+from pre_commit_hooks.check_fluent import main
+
+
+def test_valid_fluent_file(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        'hello = Hello, world!\n'
+        'greeting = Hello, { $name }!\n'
+        '    .title = Greeting\n'
+        'menu-item = Menu Item\n',
+    )
+    assert main([str(f)]) == 0
+
+
+def test_fluent_file_with_select_expression(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        'emails = { $unreadEmails ->\n'
+        '    [0] You have no unread emails.\n'
+        '    [one] You have one unread email.\n'
+        '   *[other] You have { $unreadEmails } unread emails.\n'
+        '}\n',
+    )
+    assert main([str(f)]) == 0
+
+
+def test_fluent_file_with_comments(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        '# This is a comment\n'
+        'hello = Hello, world!\n'
+        '\n'
+        '## Another comment\n'
+        'goodbye = Goodbye!\n',
+    )
+    assert main([str(f)]) == 0
+
+
+def test_fluent_file_with_invalid_identifier(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('123invalid = Invalid identifier\n')
+    assert main([str(f)]) == 1
+
+
+def test_fluent_file_with_invalid_attribute_identifier(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('hello = Hello\n' '    .123invalid = Invalid attribute\n')
+    assert main([str(f)]) == 1
+
+
+def test_fluent_file_missing_default_variant(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        'emails = { $unreadEmails ->\n'
+        '    [0] You have no unread emails.\n'
+        '    [one] You have one unread email.\n'
+        '}\n',
+    )
+    assert main([str(f)]) == 1
+
+
+def test_fluent_file_variant_outside_select(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('hello = Hello\n' '   *[default] This should not be here\n')
+    assert main([str(f)]) == 1
+
+
+def test_fluent_file_missing_indentation(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('hello = Hello\n' '.title = This should be indented\n')
+    assert main([str(f)]) == 1
+
+
+def test_fluent_file_indented_without_context(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('    orphaned = This line has no message context\n')
+    assert main([str(f)]) == 1
+
+
+def test_non_utf8_file(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_bytes(b'\xa9\xfe\x12')
+    assert main([str(f)]) == 1
+
+
+def test_nonexistent_file():
+    assert main(['nonexistent.ftl']) == 1
+
+
+def test_empty_file(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text('')
+    assert main([str(f)]) == 0
+
+
+def test_multiple_files(tmp_path):
+    f1 = tmp_path / 'valid.ftl'
+    f1.write_text('hello = Hello, world!\n')
+
+    f2 = tmp_path / 'invalid.ftl'
+    f2.write_text('123invalid = Invalid identifier\n')
+
+    assert main([str(f1), str(f2)]) == 1
+
+
+def test_multiple_valid_files(tmp_path):
+    f1 = tmp_path / 'valid1.ftl'
+    f1.write_text('hello = Hello, world!\n')
+
+    f2 = tmp_path / 'valid2.ftl'
+    f2.write_text('goodbye = Goodbye!\n')
+
+    assert main([str(f1), str(f2)]) == 0
+
+
+@pytest.mark.parametrize(
+    'identifier,expected',
+    [
+        ('hello', True),
+        ('hello-world', True),
+        ('hello_world', True),
+        ('hello123', True),
+        ('123hello', False),
+        ('hello-', True),
+        ('-hello', False),
+        ('', False),
+        ('hello.world', False),
+        ('hello world', False),
+    ],
+)
+def test_identifier_validation(identifier, expected):
+    from pre_commit_hooks.check_fluent import _is_valid_identifier
+
+    assert _is_valid_identifier(identifier) == expected
+
+
+def test_fluent_file_non_default_variant_with_closing_brace(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        'emails = { $unreadEmails ->\n'
+        '    [0] You have no unread emails. }\n',
+    )
+    assert main([str(f)]) == 1  # Should fail due to missing default variant
+
+
+def test_fluent_file_non_star_variant_with_closing_check(tmp_path):
+    f = tmp_path / 'test.ftl'
+    f.write_text(
+        'test = { $var ->\n'
+        '    [case]\n'        # Comment
+        '        Value here\n'
+        '   *[other] Default\n'
+        '}\n',
+    )
+    assert main([str(f)]) == 0