Copies for all (#1780)

* initial script * copies for all! * revert intl files * mistaken en to vi translation * improve translation * add vi translation, fix trnaalste script to respect existing metdata * revert translation files * fix translation to only add more without changing too much existing translations --------- Co-authored-by: ggurdin <46800240+ggurdin@users.noreply.github.com>
9 months ago · 76f6222e95
parent 9a94045e5d
commit 76f6222e95
5 changed files with 1518 additions and 99 deletions
--- a/.gitignore
+++ b/.gitignore
@ -74,3 +74,4 @@ scripts/.credentials
 olm
 needed-translations.txt
 .venv
--- a/assets/l10n/intl_en.arb
+++ b/assets/l10n/intl_en.arb
@ -1,6 +1,6 @@
 {
  "@@locale": "en",
-  "@@last_modified": "2021-08-14 12:38:37.885451",
+  "@@last_modified": "2025-02-13 15:39:10.589408",
  "alwaysUse24HourFormat": "false",
  "@alwaysUse24HourFormat": {
    "description": "Set to true to always display time of day in 24 hour format."
@ -3033,12 +3033,12 @@
    "type": "text",
    "placeholders": {}
  },
-  "publicProfileTitle": "Public Profile",
+  "publicProfileTitle": "Allow my profile to be found in search",
  "@publicProfileTitle": {
    "type": "text",
    "placeholders": {}
  },
-  "publicProfileDesc": "Your profile must be public in order to search or be found as a conversation partner.",
+  "publicProfileDesc": "By turning on, you enable other users to find your profile in the global search bar and send requests to chat. At this point, you can choose to accept or deny the request.",
  "@publicProfileDesc": {
    "type": "text",
    "placeholders": {}
@ -3114,7 +3114,7 @@
    "placeholders": {}
  },
  "toggleToolSettingsDescription": "Here you can toggle your individual language tool settings.",
-  "connectedToStaging": "You are connected to the staging server.",
+  "connectedToStaging": "Connected to Staging",
  "@connectedToStaging": {
    "type": "text",
    "placeholders": {}
@ -4126,7 +4126,6 @@
  "versionNotFound": "Version Not Found",
  "fetchingVersion": "Fetching version...",
  "versionFetchError": "Error fetching version",
  "connectedToStaging": "Connected to Staging",
  "versionText": "Version: {version}+{buildNumber}",
  "@versionText": {
    "description": "Text displaying the app version and build number.",
@ -4500,7 +4499,6 @@
  "grammarCopyPRONTYPEart": "Article",
  "grammarCopyPRONTYPEind": "Indefinite",
  "grammarCopyPRONTYPEintrel": "Interrogative-Relative",
  "grammarCopyPRONTYPEint": "Interrogative",
  "grammarCopyPUNCTSIDEfin": "Final Punctuation",
  "grammarCopyPUNCTTYPEperi": "Period",
  "grammarCopyREFLEXyes": "Reflexive",
@ -4570,7 +4568,6 @@
  "addSubspaceWarning": "Once you add this, it will not appear in public search results, and it will be visible to all members of the parent space.",
  "nestedSpaceError": "Spaces should not be added as children of other spaces",
  "addChatToSpace": "Add chat",
  "continueText": "Continue",
  "welcomeText": "Hey Hey 👋 This is FluffyChat. You can sign in to any homeserver, which is compatible with https://matrix.org. And then chat with anyone. It's a huge decentralized messaging network!",
  "blur": "Blur:",
  "opacity": "Opacity:",
@ -4661,10 +4658,8 @@
  "pleaseEnterEmail": "Please enter a valid email address.",
  "pleaseSelectALanguage": "Please select a language",
  "myBaseLanguage": "My base language",
  "publicProfileTitle": "Allow my profile to be found in search",
  "publicProfileDesc": "By turning on, you enable other users to find your profile in the global search bar and send requests to chat. At this point, you can choose to accept or deny the request.",
  "clickWordsInstructions": "Click on a word or the buttons below to learn more",
-  "chooseBestDefinition": "Choose the best definition",
+  "chooseBestDefinition": "What does this word mean?",
  "meaningSectionHeader": "Meaning:",
  "formSectionHeader": "Forms used in chats:",
  "noEmojiSelectedTooltip": "No emoji selected",
@ -4673,10 +4668,8 @@
  "readingExercisesTooltip": "Reading activities",
  "meaningNotFound": "Meaning could not be found.",
  "formsNotFound": "Forms could not be found.",
  "chooseBestDefinition": "What does this word mean?",
  "chooseBaseForm": "Choose the base form",
  "notTheCodeError": "Sorry, that's not the code!",
  "previous": "Previous",
  "totalXP": "Total XP",
  "numLemmas": "Total number of lemmas",
  "listOfLemmas": "List of lemmas",
@ -4724,7 +4717,6 @@
  "dataAvailable": "Data availability",
  "lemmasNeverUsedCorrectly": "Number of lemmas used correctly 0 times",
  "available": "Available",
  "unavailable": "Unavailable",
  "accessingMemberAnalytics": "Accessing member analytics...",
  "pangeaBotIsFallible": "Pangea Bot makes mistakes too!",
  "whatIsMeaning": "What does '{lemma}' mean?",
@ -4754,9 +4746,8 @@
  },
  "notInClass": "Not in a class!",
  "noClassCode": "No class code!",
  "previous": "Previous",
  "otherPartyNotLoggedIn": "The other party is currently not logged in and therefore cannot receive messages!",
-  "chooseCorrectLabel": "Choose the correct label",
+  "chooseCorrectLabel": "Choose the correct label.",
  "levelPopupTitle": "Congratulations on reaching\nLevel {level}",
  "@levelPopupTitle": {
    "type": "text",
@ -4764,7 +4755,6 @@
      "level": {}
    }
  },
  "chooseCorrectLabel": "Choose the correct label.",
  "activityPlannerTitle": "Activity Planner",
  "topicLabel": "Topic",
  "topicPlaceholder": "Choose a topic...",
@ -4780,20 +4770,17 @@
  "launchActivityButton": "Launch Activity",
  "image": "Image",
  "video": "Video",
  "voiceMessage": "Voice message",
  "nan": "Not applicable",
  "activityPlannerOverviewInstructionsBody": "Choose a topic, mode, learning objective and generate an activity for the chat!",
  "completeActivitiesToUnlock": "Complete the highlighted word activities to unlock",
  "myBookmarkedActivities": "My Bookmarked Activities",
  "noBookmarkedActivities": "No bookmarked activities",
  "noBookmarkedActivities": "When you bookmark activities, they will appear here. Bookmarked activities can be re-used across spaces and chats.",
  "activityTitle": "Activity Title",
  "addVocabulary": "Add Vocabulary",
  "instructions": "Instructions",
  "bookmark": "Bookmark this activity",
  "numberOfLearners": "Number of learners",
  "mustBeInteger": "Must be an integer e.g. 1, 2, 3, ...",
  "noBookmarkedActivities": "No bookmarked activities",
  "noLemmasFound": "There's no vocabulary with more than {xp} XP. Keep practicing!",
  "@noLemmasFound": {
    "type": "text",
@ -4804,7 +4791,6 @@
  "constructUsePvmDesc": "Produced in voice message",
  "lockedMorphFeature": "Waiting to be unlocked",
  "leaveSpaceDescription": "The space will be moved to the archive. Other users will be able to see that you have left the chat.",
  "otherPartyNotLoggedIn": "The other party is currently not logged in and therefore cannot receive messages!",
  "appWantsToUseForLogin": "Use '{server}' to log in",
  "@appWantsToUseForLogin": {
    "type": "text",
@ -4822,4 +4808,4 @@
  "constructUseIgnMmDesc": "Ignored message meaning",
  "clickForMeaningActivity": "Click here for a Meaning Challenge",
  "meaning": "Meaning"
-}
+}
--- a/assets/l10n/intl_es.arb
+++ b/assets/l10n/intl_es.arb
--- a/assets/l10n/intl_vi.arb
+++ b/assets/l10n/intl_vi.arb
@ -1,5 +1,6 @@
 {
-  "@@last_modified": "2021-08-14 12:41:09.781172",
+  "@@locale": "vi",
  "@@last_modified": "2025-02-13 16:15:00.051226",
  "about": "Giới thiệu",
  "@about": {
    "type": "text",
@ -2605,5 +2606,22 @@
  "accountInformation": "Thông tin tài khoản",
  "addGroupDescription": "Thêm mô tả cho cuộc trò chuyện",
  "addNewFriend": "Thêm bạn mới",
-  "alreadyHaveAnAccount": "Bạn đã có tài khoản"
+  "alreadyHaveAnAccount": "Bạn đã có tài khoản",
  "writeAMessageLangCodes": "Gõ bằng {l1} hoặc {l2}...",
  "@writeAMessageLangCodes": {
    "type": "text",
    "placeholders": {
      "l1": {
        "type": "String"
      },
      "l2": {
        "type": "String"
      }
    }
  },
  "grammarCopyVERBFORMaux": "Trợ động từ",
  "@grammarCopyVERBFORMaux": {
    "type": "text",
    "placeholders": {}
  }
 }
--- a/scripts/translate.py
+++ b/scripts/translate.py
@ -0,0 +1,304 @@
 """
 Prerequiresite:
 - Ensure you have an up-to-date `needed-translations.txt` file should you wish to translate only the missing translation keys. To generate an updated `needed-translations.txt` file, run `flutter gen-l10n`
 - Ensure you have python `openai` package installed. If not, run `pip install openai`.
 - Ensure you have an OpenAI API key set in your environment variable `OPENAI_API_KEY`. If not, you can set it by running `export OPENAI_API_KEY=your-api-key` on MacOS/Linux.
 Usage:
 python scripts/translate.py
 """
 def load_needed_translations() -> dict[str, list[str]]:
    import json
    from pathlib import Path
    path_to_needed_translations = (
        Path(__file__).parent.parent / "needed-translations.txt"
    )
    if not path_to_needed_translations.exists():
        raise FileNotFoundError(
            f"File not found: {path_to_needed_translations}. Please run `flutter gen-l10n` to generate the file."
        )
    with open(path_to_needed_translations) as f:
        needed_translations = json.loads(f.read())
    return needed_translations
 def load_translations(lang_code: str) -> dict[str, str]:
    import json
    from pathlib import Path
    path_to_translations = (
        Path(__file__).parent.parent / "assets" / "l10n" / f"intl_{lang_code}.arb"
    )
    if not path_to_translations.exists():
        raise FileNotFoundError(
            f"File not found: {path_to_translations}. Please run `flutter gen-l10n` to generate the file."
        )
    with open(path_to_translations) as f:
        translations = json.loads(f.read())
    return translations
 def save_translations(lang_code: str, translations: dict[str, str]) -> None:
    import json
    from pathlib import Path
    from datetime import datetime
    from collections import OrderedDict
    path_to_translations = (
        Path(__file__).parent.parent / "assets" / "l10n" / f"intl_{lang_code}.arb"
    )
    translations["@@locale"] = lang_code
    translations["@@last_modified"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
    # Load existing data to preserve order.
    if path_to_translations.exists():
        with open(path_to_translations, "r") as f:
            try:
                existing_data = json.load(f, object_pairs_hook=OrderedDict)
            except json.JSONDecodeError:
                existing_data = OrderedDict()
    else:
        existing_data = OrderedDict()
    # Build final_ordered ensuring the metadata immediately follows its translation.
    final_ordered = OrderedDict()
    special_keys = ["@@locale", "@@last_modified"]
    final_ordered["@@locale"] = translations["@@locale"]
    final_ordered["@@last_modified"] = translations["@@last_modified"]
    keys_added = set()
    # Use preserved order from existing file.
    for key in existing_data:
        if key in special_keys:
            continue
        if key in translations:
            final_ordered[key] = translations[key]
            keys_added.add(key)
            meta_key = f"@{key}"
            if meta_key in translations:
                final_ordered[meta_key] = translations[meta_key]
                keys_added.add(meta_key)
    # Append new translation keys (and their metadata immediately after) not in existing data.
    for key in translations:
        if key in special_keys or key.startswith("@") or key in keys_added:
            continue
        final_ordered[key] = translations[key]
        meta_key = f"@{key}"
        if meta_key in translations:
            final_ordered[meta_key] = translations[meta_key]
            keys_added.add(meta_key)
        keys_added.add(key)
    # Append any leftover metadata keys.
    for key in translations:
        if key.startswith("@") and key not in keys_added:
            final_ordered[key] = translations[key]
    with open(path_to_translations, "w") as f:
        f.write(json.dumps(final_ordered, indent=2, ensure_ascii=False))
 def reconcile_metadata(lang_code: str, translation_keys: list[str]) -> None:
    """
    For each translation key, update its metadata (the key prefixed with '@') by merging
    any existing metadata with computed metadata. For basic translations, if no metadata exists,
    add it; otherwise, leave it as is.
    """
    translations = load_translations(lang_code)
    for key in translation_keys:
        translation = translations[key]
        meta_key = f"@{key}"
        existing_meta = translations.get(meta_key, {})
        assert isinstance(translation, str)
        # Case 1: Basic translations, no placeholders.
        if "{" not in translation:
            if not existing_meta:
                translations[meta_key] = {"type": "text", "placeholders": {}}
            # if metadata exists, leave it as is.
        # Case 2: Translations with placeholders (no pluralization).
        elif (
            "{" in translation
            and "plural," not in translation
            and "other{" not in translation
        ):
            # Compute placeholders.
            computed_placeholders = {}
            for placeholder in translation.split("{")[1:]:
                placeholder_name = placeholder.split("}")[0]
                computed_placeholders[placeholder_name] = {"type": "String"}
            if existing_meta:
                # Merge computed placeholders into existing metadata.
                existing_meta.setdefault("type", "text")
                existing_meta["placeholders"] = computed_placeholders
                translations[meta_key] = existing_meta
            else:
                translations[meta_key] = {
                    "type": "text",
                    "placeholders": computed_placeholders,
                }
        # Case 3: Translations with pluralization.
        elif (
            "{" in translation and "plural," in translation and "other{" in translation
        ):
            # Extract placeholders appearing before the plural part.
            prefix = translation.split("plural,")[0].split("{")[1]
            placeholders_list = [
                p.strip() for p in prefix.split(",") if p.strip() != ""
            ]
            computed_placeholders = {ph: {} for ph in placeholders_list}
            if existing_meta:
                existing_meta.setdefault("type", "text")
                existing_meta["placeholders"] = computed_placeholders
                translations[meta_key] = existing_meta
            else:
                translations[meta_key] = {
                    "type": "text",
                    "placeholders": computed_placeholders,
                }
    save_translations(lang_code, translations)
 def translate(lang_code: str, lang_display_name: str) -> None:
    """
    Translate the needed translations from English to the target language.
    """
    import json
    import random
    from openai import OpenAI
    needed_translations = load_needed_translations()
    needed_translations = needed_translations.get(lang_code, [])
    english_translations_dict = load_translations("en")
    vietnamese_translations_dict = load_translations("vi")
    # there are 3 types of translation keys: basic, with placeholders, with pluralization. Read more: TRANSLATORS_GUIDE.md
    basic_translation_keys = [
        k
        for k in english_translations_dict.keys()
        if not k.startswith("@") and not english_translations_dict[k].startswith("{")
    ]
    example_basic_translation_keys = (
        random.sample(basic_translation_keys, 2)
        if len(basic_translation_keys) > 2
        else basic_translation_keys
    )
    placeholder_translation_keys = [
        k
        for k in english_translations_dict.keys()
        if not k.startswith("@")
        and "{" in english_translations_dict[k]
        and "plural," not in english_translations_dict[k]
        and "other{" not in english_translations_dict[k]
    ]
    example_placeholder_translation_keys = (
        random.sample(placeholder_translation_keys, 2)
        if len(placeholder_translation_keys) > 2
        else placeholder_translation_keys
    )
    plural_translation_keys = [
        k
        for k in english_translations_dict.keys()
        if not k.startswith("@")
        and "{" in english_translations_dict[k]
        and "plural," in english_translations_dict[k]
        and "other{" in english_translations_dict[k]
    ]
    example_plural_translation_keys = (
        random.sample(plural_translation_keys, 2)
        if len(plural_translation_keys) > 2
        else plural_translation_keys
    )
    # build example translations
    example_english_translations = {}
    for key in example_basic_translation_keys:
        example_english_translations[key] = english_translations_dict[key]
    for key in example_placeholder_translation_keys:
        example_english_translations[key] = english_translations_dict[key]
    for key in example_plural_translation_keys:
        example_english_translations[key] = english_translations_dict[key]
    example_vietnamese_translations = {}
    for key in example_basic_translation_keys:
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
    for key in example_placeholder_translation_keys:
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
    for key in example_plural_translation_keys:
        example_vietnamese_translations[key] = vietnamese_translations_dict[key]
    new_translations = {}
    progress = 0
    for i in range(0, len(needed_translations), 20):
        chunk = needed_translations[i : i + 20]
        translation_requests = {}
        for key in chunk:
            translation_requests[key] = english_translations_dict[key]
        prompt = f"""
        Please translate the following text from English to {lang_display_name}.
        Example:
        req: {json.dumps(example_english_translations, indent=2)}
        res: {json.dumps(example_vietnamese_translations, indent=2)}
        ========================
        req: {json.dumps(translation_requests, indent=2)}
        res:
        """
        client = OpenAI()
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a translator that will only response to translation requests in json format without any additional information.",
                },
                {
                    "role": "user",
                    "content": prompt,
                },
            ],
            model="gpt-4o-mini",
            temperature=0.0,
        )
        response = chat_completion.choices[0].message.content
        _new_translations = json.loads(response)
        new_translations.update(_new_translations)
        print(f"Translated {progress + len(chunk)}/{len(needed_translations)}")
        progress += len(chunk)
    # save translations
    current_translations = load_translations(lang_code)
    current_translations.update(new_translations)
    save_translations(lang_code, current_translations)
    # reconcile metadata
    reconcile_metadata(lang_code, needed_translations)
 """Example usage:
 python scripts/translate.py
 """
 if __name__ == "__main__":
    lang_code = input("Enter the language code (e.g. vi, en): ").strip()
    lang_display_name = input(
        "Enter the language display name (e.g. Vietnamese, English): "
    )
    translate(
        lang_code=lang_code,
        lang_display_name=lang_display_name,
    )