Files
OmniRoute/scripts/validate_translation.py
Diego Rodrigues de Sa e Souza 1442c47bbb
Build Electron Desktop App / Validate version (push) Failing after 24s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
Build Electron Desktop App / Publish to npm (push) Has been skipped
chore(release): v3.5.6 — email masking, model toggle, OpenRouter registries & bug fixes (#1080)
* fix(minimax): switch auth from x-api-key to Authorization Bearer (#1076)

Integrated into release/v3.5.6 — MiniMax auth fix with authHeader consistency normalization

* feat(CI,i18n): autogenerate language files + Add missing strings (#1071)

Integrated into release/v3.5.6 — i18n translations for memory, skills, and missing keys across 31 languages

* fix(ci): restore i18n continue-on-error, remove auto-commit race condition

* fix(husky): load nvm in hooks for VS Code compatibility

* fix(husky): gracefully skip hooks when npm is not in PATH

* fix: convert OpenAI function tool_choice to Claude tool format (#1072)

* fix: prevent EPIPE feedback loop filling logs at GB/s (#1006)

* fix: fallback to native fetch when undici dispatcher fails (#1054)

* fix: improve Qoder PAT validation with actionable error messages (#966)

- Add QODER_PERSONAL_ACCESS_TOKEN env var fallback for both validation and execution
- Pre-flight ping check to diagnose connectivity issues (Docker/proxy)
- Detect encrypted auth blobs from ~/.qoder/.auth/user and guide to website PAT
- Clear error messages for auth failures with link to integrations page
- Treat non-auth 4xx as auth-pass (request format issue, not token issue)
- Update tests to cover new validation paths (23 tests, all passing)

* feat: Improve the Chinese translation (#1079)

Integrated into release/v3.5.6

* chore(release): v3.5.6 — i18n updates and credential security fixes

* fix(ci): resolve e2e and docs-sync pipeline failures

* fix(security): bump next to 16.2.3 to resolve SNYK-JS-NEXT-15954202

* fix: guard Memory/Cache UI against null toLocaleString crash (#1083)

* fix: translate OpenAI tool_choice type 'function' to Claude 'tool' format (#1072)

* fix: pass custom baseUrl in provider API key validation (#1078)

* docs: update CHANGELOG with v3.5.6 bug fixes and security patches

* docs: rewrite implement-features workflow with 5-phase harvest-research-report-plan-execute pipeline

* docs: organize _ideia/ into viable/defer/notfit + add Phase 2.5 auto-response workflow

* docs: implementation plans for #1025, #750, #960, #1046 + close already-implemented #833, #973, #982

* feat: mask email addresses in dashboard for privacy (#1025)

* feat: add OpenRouter and GitHub to embedding/image provider registries (#960)

* feat: add model visibility toggle and search filter to provider page (#750)

* docs: move implemented features to notfit, update task plans status

* chore: untrack _ideia/ and _tasks/ from git — private/internal only

* chore(release): bump to v3.5.6 — changelog, docs, version sync & any-budget fix

* fix: remove explicit .ts extension in qoderCli import that caused 500 error in production build

---------

Co-authored-by: Jean Brito <jeanfbrito@gmail.com>
Co-authored-by: zenobit <zenobit@disroot.org>
Co-authored-by: diegosouzapw <diegosouzapw@users.noreply.github.com>
Co-authored-by: Ethan Hunt <136065060+only4copilot@users.noreply.github.com>
2026-04-09 15:55:59 -03:00

634 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
"""
OmniRoute i18n Translation Validator
Script for comparing source (en.json) with any translation
Detects missing translations and source changes needing updates
Usage:
python validate_translation.py # Uses TRANSLATION_LANG env or --lang argument
python validate_translation.py --lang cs # Validate Czech (cs.json)
python validate_translation.py -l de # Validate German (de.json)
TRANSLATION_LANG=fr python validate_translation.py # Validate French
Environment variables:
TRANSLATION_LANG Target language code (e.g., cs, de, fr)
"""
import json
import sys
import os
from pathlib import Path
from typing import Dict, List, Set, Tuple, Any
import argparse
# Colors (ANSI)
RED = "\033[0;31m"
GREEN = "\033[0;32m"
YELLOW = "\033[1;33m"
BLUE = "\033[0;34m"
NC = "\033[0m"
# Configuration - find repo root relative to this script
_script_dir = Path(__file__).parent.resolve()
# If script is in scripts/ subfolder, go up one level to repo root
if _script_dir.name == "scripts":
SCRIPT_DIR = _script_dir.parent
else:
SCRIPT_DIR = _script_dir
MESSAGES_DIR = SCRIPT_DIR / "src" / "i18n" / "messages"
SOURCE_FILE = MESSAGES_DIR / "en.json"
# Get target language from env or argument
def get_target_lang() -> str:
"""Get target language from ENV or CLI argument."""
# First check environment variable
env_lang = os.environ.get("TRANSLATION_LANG")
if env_lang:
return env_lang
# Then check command line argument (will be set in main)
if hasattr(get_target_lang, "cli_lang"):
return get_target_lang.cli_lang
# Default to cs for backwards compatibility
return "cs"
# Keys that should NOT be translated (technical terms, proper names, etc.)
# Loaded from external file for easier maintenance
_UNTRANSLATABLE_KEYS_FILE = _script_dir / "i18n" / "untranslatable-keys.json"
if _UNTRANSLATABLE_KEYS_FILE.exists():
with open(_UNTRANSLATABLE_KEYS_FILE, "r", encoding="utf-8") as _f:
UNTRANSLATABLE_KEYS = set(json.load(_f).get("keys", []))
else:
UNTRANSLATABLE_KEYS = set()
def print_header(msg: str) -> None:
print(f"\n{BLUE}{'=' * 50}{NC}")
print(f"{BLUE}{msg}{NC}")
print(f"{BLUE}{'=' * 50}{NC}")
def print_success(msg: str) -> None:
print(f"{GREEN}{msg}{NC}")
def print_warning(msg: str) -> None:
print(f"{YELLOW}{msg}{NC}")
def print_error(msg: str) -> None:
print(f"{RED}{msg}{NC}")
def load_json(path: Path) -> Dict:
"""Load JSON file"""
try:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
except json.JSONDecodeError as e:
print_error(f"Invalid JSON in {path}: {e}")
sys.exit(1)
def get_all_keys(obj: Any, prefix: str = "") -> Set[str]:
"""Recursively get all leaf keys from JSON object"""
keys = set()
if isinstance(obj, dict):
for key, value in obj.items():
new_prefix = f"{prefix}.{key}" if prefix else key
if isinstance(value, dict):
keys.update(get_all_keys(value, new_prefix))
elif isinstance(value, list):
# Handle arrays - check first element for structure
if value and isinstance(value[0], dict):
for i, item in enumerate(value):
keys.update(get_all_keys(item, f"{new_prefix}[{i}]"))
else:
keys.add(new_prefix)
else:
keys.add(new_prefix)
return keys
def find_missing_keys(source: Dict, trans: Dict) -> Set[str]:
"""Keys in source but not in translation"""
source_keys = get_all_keys(source)
trans_keys = get_all_keys(trans)
return source_keys - trans_keys
def find_extra_keys(source: Dict, trans: Dict) -> Set[str]:
"""Keys in translation but not in source"""
source_keys = get_all_keys(source)
trans_keys = get_all_keys(trans)
return trans_keys - source_keys
def get_value_by_path(obj: Dict, path: str) -> Any:
"""Get value from nested dict using dot notation"""
keys = path.replace("[", ".").replace("]", "").split(".")
current = obj
for key in keys:
if key.isdigit():
idx = int(key)
if isinstance(current, list) and idx < len(current):
current = current[idx]
else:
return None
else:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return None
return current
def find_untranslated(source: Dict, trans: Dict) -> Set[str]:
"""Keys where source value equals translation (not translated), excluding untranslatable keys"""
source_keys = get_all_keys(source)
untranslated = set()
for key in source_keys:
# Skip keys that are in the untranslatable list
if key in UNTRANSLATABLE_KEYS:
continue
source_val = get_value_by_path(source, key)
trans_val = get_value_by_path(trans, key)
if source_val is not None and source_val == trans_val:
untranslated.add(key)
return untranslated
def find_placeholder_issues(source: Dict, trans: Dict) -> List[Tuple[str, str, str]]:
"""
Find placeholder mismatches between source and translation.
Only checks top-level placeholders like {count}, {day}, NOT ICU inner content.
Returns list of (key, source_placeholder, trans_placeholder)
"""
source_keys = get_all_keys(source)
issues = []
for key in source_keys:
source_val = get_value_by_path(source, key)
trans_val = get_value_by_path(trans, key)
if source_val is None or trans_val is None:
continue
if not isinstance(source_val, str) or not isinstance(trans_val, str):
continue
# Only extract top-level placeholders: {name}, {count}, {day}, NOT {# X} inside ICU
import re
# Extract variable names from placeholders (e.g., 'name' from '{name}' or 'count' from '{count, plural, ...}')
# This avoids false positives on ICU strings where the internal text is translated.
placeholder_regex = r"\{\s*([a-zA-Z][a-zA-Z0-9_]*)"
source_placeholders = set(re.findall(placeholder_regex, source_val))
trans_placeholders = set(re.findall(placeholder_regex, trans_val))
# Check for missing placeholders
missing = source_placeholders - trans_placeholders
if missing:
issues.append((key, str(source_placeholders), str(trans_placeholders)))
return issues
def compare_category(
source: Dict, trans: Dict, category: str
) -> Tuple[bool, List[str]]:
"""Compare a specific category, return (complete, missing_keys)"""
if category not in source:
return False, [f"Category '{category}' not in source"]
if category not in trans:
return False, [f"Category '{category}' missing in translation"]
source_keys = get_all_keys(source[category])
trans_keys = get_all_keys(trans[category])
missing = source_keys - trans_keys
return len(missing) == 0, list(missing)
def get_translation_file() -> Path:
"""Get the translation file path based on target language."""
lang = get_target_lang()
return MESSAGES_DIR / f"{lang}.json"
def generate_report():
"""Generate full translation report"""
translation_file = get_translation_file()
print_header("OmniRoute Translation Report")
print(f"Source: {SOURCE_FILE}")
print(f"Translation: {translation_file}\n")
source = load_json(SOURCE_FILE)
trans = load_json(translation_file)
# Count keys
source_count = len(get_all_keys(source))
trans_count = len(get_all_keys(trans))
print(f"{BLUE}Key Statistics:{NC}")
print(f" Source keys: {source_count}")
print(f" Translation keys: {trans_count}\n")
# Missing keys
print_header("Missing Translations")
missing = find_missing_keys(source, trans)
if missing:
print(f"{RED}Found {len(missing)} missing keys:{NC}")
for key in sorted(missing)[:50]: # Limit output
print(f" - {key}")
if len(missing) > 50:
print(f" ... and {len(missing) - 50} more")
else:
print_success("No missing translations!")
# Extra keys
print_header("Extra Keys")
extra = find_extra_keys(source, trans)
if extra:
print(f"{YELLOW}Found {len(extra)} extra keys:{NC}")
for key in sorted(extra)[:50]:
print(f" - {key}")
else:
print_success("No extra keys!")
# Untranslated
print_header("Untranslated Keys (same as source)")
untranslated = find_untranslated(source, trans)
if untranslated:
print(f"{YELLOW}Found {len(untranslated)} untranslated keys:{NC}")
for key in sorted(untranslated)[:50]:
print(f" - {key}")
if len(untranslated) > 50:
print(f" ... and {len(untranslated) - 50} more")
else:
print_success("All keys appear to be translated!")
# Placeholder issues
print_header("Placeholder Mismatches")
placeholder_issues = find_placeholder_issues(source, trans)
if placeholder_issues:
print(f"{YELLOW}Found {len(placeholder_issues)} placeholder mismatches:{NC}")
for key, src_ph, trans_ph in placeholder_issues[:20]:
print(f" - {key}")
print(f" Source: {src_ph}")
print(f" Trans: {trans_ph}")
if len(placeholder_issues) > 20:
print(f" ... and {len(placeholder_issues) - 20} more")
else:
print_success("All placeholders match!")
# Per-category status
print_header("Per-Category Status")
for category in sorted(source.keys()):
complete, missing = compare_category(source, trans, category)
if complete:
print_success(f"{category} - complete")
else:
print_error(f"{category} - missing {len(missing)} keys")
# Summary
print_header("Summary")
if not missing and not extra and not untranslated:
print(f"{GREEN}🎉 Translation is fully synchronized!{NC}")
return 0
else:
print(f"{YELLOW}Translation needs attention:{NC}")
print(f" - Missing: {len(missing)}")
print(f" - Extra: {len(extra)}")
print(f" - Untranslated: {len(untranslated)}")
return 0
def quick_check() -> int:
"""Quick check - just show counts"""
translation_file = get_translation_file()
source = load_json(SOURCE_FILE)
trans = load_json(translation_file)
missing = find_missing_keys(source, trans)
untranslated = find_untranslated(source, trans)
print(f"Missing: {len(missing)}")
print(f"Untranslated: {len(untranslated)}")
print(f"Ignored (UNTRANSLATABLE_KEYS): {len(UNTRANSLATABLE_KEYS)}")
# Exit codes:
# 0 = OK
# 1 = generic error
# 2 = missing string in translation
# 3 = untranslated (soft warning - not a failure)
if missing:
print_warning(f"{len(missing)} missing keys (non-critical)")
return 0
# untranslated is a soft warning, not a failure - translations exist, just not localized
if untranslated:
print_warning(f"{len(untranslated)} untranslated keys (non-critical)")
return 0
return 0
def show_diff(category: str) -> int:
"""Show detailed diff for a category"""
translation_file = get_translation_file()
source = load_json(SOURCE_FILE)
trans = load_json(translation_file)
if category not in source:
print_error(f"Category '{category}' not found in source")
print("Available categories:")
for cat in sorted(source.keys()):
print(f" - {cat}")
return 1
print_header(f"Diff for category: {category}")
print(f"{BLUE}{'Key':<30} | {'Source':<25} | {'Translation':<25}{NC}")
print("-" * 85)
source_keys = get_all_keys(source[category])
for key in sorted(source_keys):
source_val = get_value_by_path(source[category], key)
trans_val = get_value_by_path(trans.get(category, {}), key)
# Truncate long values
source_str = str(source_val)[:25] if source_val else "(null)"
trans_str = str(trans_val)[:25] if trans_val else "(missing)"
if source_val == trans_val:
status = f"{YELLOW}(same){NC}"
elif trans_val is None:
status = f"{RED}(missing){NC}"
else:
status = f"{GREEN}(ok){NC}"
print(f"{key:<30} | {source_str:<25} | {trans_str:<25} {status}")
return 0
def export_csv(output_file: str) -> int:
"""Export to CSV"""
translation_file = get_translation_file()
source = load_json(SOURCE_FILE)
trans = load_json(translation_file)
print_header(f"Exporting to CSV: {output_file}")
source_keys = get_all_keys(source)
with open(output_file, "w", encoding="utf-8") as f:
f.write("key,source_value,translation_value,status\n")
for key in sorted(source_keys):
source_val = get_value_by_path(source, key)
trans_val = get_value_by_path(trans, key)
# Escape commas
source_str = str(source_val).replace(",", ";")
trans_str = str(trans_val).replace(",", ";") if trans_val else ""
if trans_val is None:
status = "MISSING"
elif source_val == trans_val:
status = "UNTRANSLATED"
else:
status = "OK"
f.write(f'"{key}","{source_str}","{trans_str}",{status}\n')
print_success(f"Exported to {output_file}")
return 0
def export_markdown(output_file: str) -> int:
"""Export all keys to separate Markdown files - translated and untranslated"""
translation_file = get_translation_file()
source = load_json(SOURCE_FILE)
trans = load_json(translation_file)
print_header(f"Exporting to Markdown: {output_file}")
source_keys = get_all_keys(source)
missing = find_missing_keys(source, trans)
untranslated = find_untranslated(source, trans)
# Separate translated and untranslated
translated_keys = []
untranslated_sorted = sorted(untranslated)
for key in sorted(source_keys):
if key not in missing and key not in untranslated:
translated_keys.append(key)
translated_count = len(translated_keys)
untranslated_count = len(untranslated_sorted)
# Export untranslated (main output file)
with open(output_file, "w", encoding="utf-8") as f:
f.write("# Nepřeložené klíče (Untranslated Keys)\n\n")
f.write(f"Zdroj: `{SOURCE_FILE.name}` | Překlad: `{TRANSLATION_FILE.name}`\n\n")
f.write(f"**Celkem: {untranslated_count} nepreložených klíčů**\n\n")
f.write("| # | Klíč (Key) | Originál | Nepřeloženo |\n")
f.write("|---|------------|----------|------------|\n")
for i, key in enumerate(untranslated_sorted, 1):
source_val = get_value_by_path(source, key)
trans_val = get_value_by_path(trans, key)
source_str = str(source_val).replace("|", "\\|")[:60]
trans_str = str(trans_val).replace("|", "\\|")[:60]
f.write(f"| {i} | `{key}` | {source_str} | {trans_str} |\n")
f.write("\n## Shrnutí (Summary)\n\n")
f.write(f"- Celkem klíčů: {len(source_keys)}\n")
f.write(f"- Chybějících: {len(missing)}\n")
f.write(f"- Nepřeložených: {untranslated_count}\n")
f.write(f"- Přeložených: {translated_count}\n")
# Export translated to separate file
translated_file = output_file.replace(".md", "_translated.md")
translation_filename = translation_file.name
with open(translated_file, "w", encoding="utf-8") as f:
f.write("# Přeložené klíče (Translated Keys)\n\n")
f.write(f"Zdroj: `{SOURCE_FILE.name}` | Překlad: `{translation_filename}`\n\n")
f.write(f"**Celkem: {translated_count} přeložených klíčů**\n\n")
f.write("| # | Klíč (Key) | Originál | Překlad |\n")
f.write("|---|------------|----------|---------|\n")
for i, key in enumerate(translated_keys, 1):
source_val = get_value_by_path(source, key)
trans_val = get_value_by_path(trans, key)
source_str = str(source_val).replace("|", "\\|")[:40]
trans_str = str(trans_val).replace("|", "\\|")[:40]
f.write(f"| {i} | `{key}` | {source_str} | {trans_str} |\n")
print_success(f"Exported: {output_file} ({untranslated_count} keys)")
print_success(f"Exported: {translated_file} ({translated_count} keys)")
return 0
def usage():
print("""
OmniRoute i18n Translation Validator
Usage: validate_translation.py [command] [options]
Options:
-l, --lang <code> Target language code (e.g., cs, de, fr)
Default: cs or TRANSLATION_LANG env variable
Commands:
(default) Generate full report
quick Quick check - just show counts
diff <category> Show detailed diff for a category
csv [file] Export to CSV (default: translation_report.csv)
md [file] Export to Markdown (default: translation_report.md)
Examples:
python validate_translation.py # Full report (default: cs)
python validate_translation.py --lang de # Validate German
python validate_translation.py -l fr # Validate French
TRANSLATION_LANG=es python validate_translation.py # Validate Spanish
python validate_translation.py quick # Quick status check
python validate_translation.py diff common # Diff common category
python validate_translation.py csv # Export to CSV
python validate_translation.py md # Export to Markdown
python validate_translation.py fix # Auto-generate missing keys from en.json
""")
def fix_missing_keys() -> int:
"""Auto-generate missing keys by copying from en.json"""
source_file = SOURCE_FILE
translation_file = get_translation_file()
source = load_json(source_file)
trans = load_json(translation_file)
# Get all keys recursively
def get_all_keys(obj, prefix=''):
keys = []
if isinstance(obj, dict):
for k, v in obj.items():
full_key = f"{prefix}.{k}" if prefix else k
if isinstance(v, dict):
keys.extend(get_all_keys(v, full_key))
else:
keys.append(full_key)
return keys
source_keys = set(get_all_keys(source))
trans_keys = set(get_all_keys(trans))
missing = source_keys - trans_keys
if not missing:
print_success("No missing keys - translation file is complete!")
return 0
print(f"Found {len(missing)} missing keys")
# Add missing keys to translation
for key in missing:
parts = key.split('.')
current = trans
for i, part in enumerate(parts[:-1]):
if part not in current:
current[part] = {}
current = current[part]
# Get value from source
src = source
for part in parts:
src = src.get(part, {})
# Set the value (use English as fallback)
current[parts[-1]] = src if isinstance(src, str) else key
# Write back
with open(translation_file, 'w', encoding='utf-8') as f:
json.dump(trans, f, ensure_ascii=False, indent=2)
f.write('\n')
print_success(f"Added {len(missing)} missing keys to {translation_file.name}")
return 0
def main():
# Parse global arguments first
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("-l", "--lang", dest="lang", default=None)
parser.add_argument("command", nargs="?")
parser.add_argument("arg", nargs="?")
# Parse known args only to allow commands to handle their own args
args, _ = parser.parse_known_args()
# Set language from argument or use default
if args.lang:
get_target_lang.cli_lang = args.lang
elif not os.environ.get("TRANSLATION_LANG"):
# Default to cs for backwards compatibility
get_target_lang.cli_lang = "cs"
# Check if translation file exists
translation_file = get_translation_file()
if not translation_file.exists():
print_error(f"Translation file not found: {translation_file}")
print(f"Available languages:")
for f in sorted(MESSAGES_DIR.glob("*.json")):
if f.name != "en.json":
print(f" - {f.stem}")
return 1
# Execute command
if not args.command or args.command in ("help", "--help", "-h"):
return generate_report()
if args.command == "quick":
return quick_check()
elif args.command == "diff":
if not args.arg:
print_error("Please specify category")
usage()
return 1
return show_diff(args.arg)
elif args.command == "csv":
output = args.arg if args.arg else "translation_report.csv"
return export_csv(output)
elif args.command == "md":
output = args.arg if args.arg else "translation_report.md"
return export_markdown(output)
elif args.command == "fix":
return fix_missing_keys()
else:
print_error(f"Unknown command: {args.command}")
usage()
return 1
if __name__ == "__main__":
sys.exit(main())