feat: update Chinese translations in zh.json and zh-Hant.json using locale_updater.py (#1506)

update zh.json and zh-Hant.json using locale_updater.py
3 years ago · 0020498c10
parent 66ed43cbcb
commit 0020498c10
3 changed files with 324 additions and 14 deletions
--- a/web/src/locales/locale_updater.py
+++ b/web/src/locales/locale_updater.py
@ -0,0 +1,202 @@
+# Author: Oaklight
+# GitHub profile: https://github.com/Oaklight
+# Date: April 9, 2023
+# Description: This script is used to patch missing translations in a locale file.
+
+# The script uses 'en.json' as the reference file to find missing keys in other locale files.
+# It iterates through each field and their entries in 'en.json' and checks if the same field/entry exists in other files.
+# If a field/entry is missing, the script prompts the source string, reference Google translation, and asks for confirmation or correction.
+# The resulting file is saved as './*.proposed.json', and you should review it before merging and uploading.
+
+# usage: locale_updater.py [-h] ref_locale tgt_locale
+
+#TODO: add other NMT system for different preference and accuracy
+
+import json
+import requests
+
+
+def flatten_json(nested_json, parent_key="", sep=":"):
+    flattened_dict = {}
+    for key, value in nested_json.items():
+        new_key = parent_key + sep + key if parent_key else key
+        if isinstance(value, dict):
+            flattened_dict.update(flatten_json(value, new_key, sep))
+        else:
+            flattened_dict[new_key] = value
+    return flattened_dict
+
+
+def unflatten_json(flattened_dict, sep=":"):
+    nested_json = {}
+    for key, value in flattened_dict.items():
+        parts = key.split(sep)
+        current = nested_json
+        for part in parts[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+        current[parts[-1]] = value
+    return nested_json
+
+
+def sort_nested_json(nested_json):
+    if isinstance(nested_json, dict):
+        sorted_dict = {}
+        for key in sorted(nested_json.keys()):
+            sorted_dict[key] = sort_nested_json(nested_json[key])
+        return sorted_dict
+    elif isinstance(nested_json, list):
+        sorted_list = []
+        for item in nested_json:
+            sorted_list.append(sort_nested_json(item))
+        return sorted_list
+    else:
+        return nested_json
+
+
+def google_translate(
+    source_text, source_language="en", target_language="zh-CN"
+):
+    # Create post content
+    new_line = "\r\n"
+    post_content = "q=" + source_text.replace(new_line, " ")
+
+    # Send post request and get JSON response, using source_language and target_language
+    # url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=en&tl=zh-CN&dt=t"
+    url = f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={source_language}&tl={target_language}&dt=t"
+    headers = {"Content-type": "application/x-www-form-urlencoded"}
+    response = requests.post(url, headers=headers, data=post_content.encode("utf-8"))
+    json_value = response.json()
+
+    # Extract translations from JSON
+    translations = [item[0] for item in json_value[0]]
+    translations = [t.replace(new_line, "") for t in translations]
+    target_text = translations[0]
+
+    return target_text
+
+
+def get_code_name(json_filename):
+    # Remove extension and split language and country codes
+    file_parts = json_filename.split(".")[0].split("_")
+    lang_code = file_parts[0]
+    country_code = file_parts[1] if len(file_parts) > 1 else ""
+
+    # Map language code to code name
+    lang_map = {
+        "de": "de",
+        "en": "en",
+        "es": "es",
+        "fr": "fr",
+        "it": "it",
+        "ko": "ko",
+        "nl": "nl",
+        "pl": "pl",
+        "pt": "pt-BR",
+        "ru": "ru",
+        "sl": "sl",
+        "sv": "sv",
+        "tr": "tr",
+        "uk": "uk",
+        "vi": "vi",
+        "zh-Hant": "zh-TW",
+        "zh": "zh-CN",
+    }
+    code_name = lang_map.get(lang_code, "")
+
+    # Add country code if available
+    if country_code:
+        code_name += "-" + country_code.upper()
+
+    return code_name
+
+
+if __name__ == "__main__":
+    # ref_locale = "./en.json"
+    # tgt_locale = "./zh.json"
+    # receive the reference locale and target locale from the command line using argparse
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("ref_locale", help="reference locale file")
+    parser.add_argument("tgt_locale", help="target locale file")
+    args = parser.parse_args()
+    ref_locale = args.ref_locale
+    tgt_locale = args.tgt_locale
+
+    ref_codename = get_code_name(ref_locale)
+    tgt_codename = get_code_name(tgt_locale)
+
+    with open(ref_locale, "r") as f:
+        ref = json.load(f)
+
+    with open(tgt_locale, "r") as f:
+        tgt = json.load(f)
+
+    # using the flatten_json function, produce a temp json for each locale and save to the disk
+    ref_flat = flatten_json(ref)
+    tgt_flat = flatten_json(tgt)
+
+    # # save the flattened json to the disk
+    # with open("ref_flat.json", "w") as f:
+    #     json.dump(ref_flat, f, indent=2, ensure_ascii=False)
+    # with open("tgt_flat.json", "w") as f:
+    #     json.dump(tgt_flat, f, indent=2, ensure_ascii=False)
+
+    # first diff the keys to inform the user of the missing keys
+    missing_keys = set(ref_flat.keys()) - set(tgt_flat.keys())
+    # print total number of missing keys, in red color, number as default
+    if len(missing_keys) == 0:
+        print("\033[92m All keys are present in the target locale \033[0m")
+        exit()
+    else:
+        print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
+
+
+    # formatted print line by line, wrap the missing key in red color, and the English translation in green color
+    for key in missing_keys:
+        # print(f"Missing key: {key} | English: {ref_flat[key]}")
+        print(
+            "\033[91m"
+            + f"Missing key: {key}"
+            + "\033[0m"
+            + " | "
+            + "\033[92m"
+            + f"English: {ref_flat[key]}"
+            + "\033[0m"
+        )
+    print("=============================================")
+    print(f"\033[91m Total missing keys: \033[0m {len(missing_keys)}")
+
+
+    # now compare the tgt_flat with ref_flat to find all missing keys and prompt to terminal for translation. Then save back to the tgt_flat
+
+    # iterate over the missing key and their corresponding values in ref_flat, to get reference google translation using google_translate_to_chinese function
+    # then present the reference translation to the user in the terminal
+    # then present the user with a prompt to ask for translation
+    for i, key in enumerate(missing_keys):
+        print(
+            f"============================================= {i + 1}/{len(missing_keys)}"
+        )
+        # print wrap the missing key in red color, and the English translation in green color
+        print("\033[91m" + "Missing key: " + "\033[0m" + key)
+        print("\033[92m" + f"{ref_codename}: " + "\033[0m" + ref_flat[key])
+        # get reference translation from google translate, print in blue
+        proposal_google = google_translate(ref_flat[key], ref_codename, tgt_codename)
+        print("\033[94m" + f"Reference {tgt_codename} translation: " + "\033[0m" + proposal_google)
+        # prompt user for translation, or enter to use the reference translation, in green color
+        proposal = input("\033[92m" + "Enter translation: " + "\033[0m")
+        if proposal == "":
+            proposal = proposal_google
+        # save the translation to the tgt_flat
+        tgt_flat[key] = proposal
+
+    # unflatten the ref_flat.json and tgt_flat.json back to the original format. save to another file
+    ref_unflat = unflatten_json(ref_flat)
+    tgt_unflat = unflatten_json(tgt_flat)
+    # save the unflattened json to the disk, with original tgt file name with ".proposed" appended before .json
+    # by getting the file name from from the tgt_locale path
+    tgt_locale_name = tgt_locale.split("/")[-1].split(".")[0]
+    with open(f"{tgt_locale_name}.proposed.json", "w") as f:
+        json.dump(tgt_unflat, f, indent=2, ensure_ascii=False)
--- a/web/src/locales/zh-Hant.json
+++ b/web/src/locales/zh-Hant.json
@ -52,12 +52,20 @@
    "vacuum": "清理",
    "select": "選擇",
    "database": "資料庫",
-    "avatar": "頭像"
+    "avatar": "頭像",
+    "upload": "上傳",
+    "rename": "改名",
+    "name": "姓名",
+    "visibility": "能見度",
+    "clear": "清除",
+    "preview": "預覽"
  },
  "auth": {
    "signup-as-host": "註冊為管理員",
    "host-tip": "你正在註冊為管理員帳號。",
-    "not-host-tip": "如果你沒有帳號，請聯絡網站管理員。"
+    "not-host-tip": "如果你沒有帳號，請聯絡網站管理員。",
+    "new-password": "新密碼",
+    "repeat-new-password": "重複新密碼"
  },
  "resource": {
    "description": "查看在 Memo 中的靜態資源。例如：圖片",
@ -78,7 +86,24 @@
    "no-files-selected": "沒有文件被選中❗",
    "upload-successfully": "上傳成功",
    "file-drag-drop-prompt": "將您的文件拖放到此處以上傳文件",
-    "select": "選擇"
+    "select": "選擇",
+    "create-dialog": {
+      "upload-method": "上傳方式",
+      "local-file": {
+        "choose": "選擇一個文件...",
+        "option": "本地文件"
+      },
+      "external-link": {
+        "file-name-placeholder": "文件名",
+        "option": "外部鏈接",
+        "type-placeholder": "文件類型",
+        "link": "鏈接",
+        "type": "類型",
+        "file-name": "文件名"
+      },
+      "title": "創建資源"
+    },
+    "search-bar-placeholder": "搜索資源"
  },
  "archived": {
    "archived-memos": "已封存的 Memo",
@ -104,7 +129,14 @@
      "protected": "登入使用者可見",
      "public": "所有人可見",
      "disabled": "公共memos已禁用"
-    }
+    },
+    "fetching-data": "正在獲取數據...",
+    "archived-memos": "歸檔備忘錄",
+    "archived-at": "存檔於",
+    "fetch-more": "點擊此處獲取更多",
+    "embed": "嵌入",
+    "no-archived-memos": "沒有存檔的備忘錄。",
+    "search-placeholder": "搜索備忘錄"
  },
  "memo-list": {
    "fetching-data": "讀取資料中...",
@ -144,7 +176,10 @@
    "text-placeholder": "以 ^ 開頭使用正則表達式"
  },
  "tag-list": {
-    "tip-text": "輸入`#tag `來新增標籤"
+    "tip-text": "輸入`#tag `來新增標籤",
+    "create-tag": "創建標籤",
+    "tag-name": "標籤名",
+    "all-tags": "所有標籤"
  },
  "search": {
    "quickly-filter": "快速過濾"
@ -170,7 +205,9 @@
      "mobile-editor-style": "手機版編輯器樣式",
      "default-memo-sort-option": "Memo 顯示時間",
      "created_ts": "建立時間",
-      "updated_ts": "更新時間"
+      "updated_ts": "更新時間",
+      "daily-review-time-offset": "每日回顧時間偏移",
+      "default-resource-visibility": "默認資源可見性"
    },
    "storage-section": {
      "storage-services-list": "存儲服務列表",
@ -252,7 +289,8 @@
    "succeed-update-additional-script": "更新附加腳本成功",
    "update-succeed": "更新成功",
    "page-not-found": "404 - 未找到網頁 😥",
-    "succeed-vacuum-database": "清理資料庫成功"
+    "succeed-vacuum-database": "清理資料庫成功",
+    "resource-ready": "所有資源都準備好了"
  },
  "days": {
    "monday": "星期一",
@ -269,5 +307,23 @@
    "sat": "六",
    "sunday": "星期天",
    "sun": "日"
+  },
+  "router": {
+    "back-to-home": "回到首頁"
+  },
+  "ask-ai": {
+    "not-enabled": "您尚未設置 OpenAI API 密鑰。",
+    "title": "問AI",
+    "placeholder": "隨便問",
+    "go-to-settings": "前往設置"
+  },
+  "embed-memo": {
+    "only-public-supported": "* 僅公開備忘錄支持。",
+    "title": "嵌入備忘錄",
+    "copy": "複製",
+    "text": "將以下代碼複製並粘貼到您的博客或網站中。"
+  },
+  "daily-review": {
+    "title": "每日回顧"
  }
 }
--- a/web/src/locales/zh.json
+++ b/web/src/locales/zh.json
@ -52,12 +52,20 @@
    "vacuum": "清理",
    "select": "选择",
    "database": "数据库",
-    "avatar": "头像"
+    "avatar": "头像",
+    "rename": "改名",
+    "upload": "上传",
+    "visibility": "能见度",
+    "preview": "预览",
+    "name": "姓名",
+    "clear": "清除"
  },
  "auth": {
    "signup-as-host": "注册为 Host",
    "host-tip": "你正在注册为 Host 用户账号。",
-    "not-host-tip": "如果你没有账号，请联系站点 Host"
+    "not-host-tip": "如果你没有账号，请联系站点 Host",
+    "new-password": "新密码",
+    "repeat-new-password": "重复新密码"
  },
  "resource": {
    "description": "查看在 Memo 中的静态资源。例如：图片",
@ -78,7 +86,24 @@
    "no-files-selected": "没有文件被选中❗",
    "upload-successfully": "上传成功",
    "file-drag-drop-prompt": "将您的文件拖放到此处以上传文件",
-    "select": "选择"
+    "select": "选择",
+    "create-dialog": {
+      "external-link": {
+        "type-placeholder": "文件类型",
+        "link": "链接",
+        "file-name": "文件名",
+        "type": "类型",
+        "file-name-placeholder": "文件名",
+        "option": "外部链接"
+      },
+      "local-file": {
+        "choose": "选择一个文件...",
+        "option": "本地文件"
+      },
+      "upload-method": "上传方式",
+      "title": "创建资源"
+    },
+    "search-bar-placeholder": "搜索资源"
  },
  "archived": {
    "archived-memos": "已归档的 Memo",
@ -104,7 +129,14 @@
      "protected": "登录用户可见",
      "public": "所有人可见",
      "disabled": "公共memos已禁用"
-    }
+    },
+    "embed": "嵌入",
+    "search-placeholder": "搜索备忘录",
+    "archived-at": "存档于",
+    "no-archived-memos": "没有存档的备忘录。",
+    "fetching-data": "正在获取数据...",
+    "archived-memos": "归档备忘录",
+    "fetch-more": "点击此处获取更多"
  },
  "memo-list": {
    "fetching-data": "请求数据中...",
@ -173,7 +205,9 @@
      "mobile-editor-style": "移动端编辑器样式",
      "default-memo-sort-option": "Memo 显示时间",
      "created_ts": "创建时间",
-      "updated_ts": "更新时间"
+      "updated_ts": "更新时间",
+      "daily-review-time-offset": "每日回顾时间偏移",
+      "default-resource-visibility": "默认资源可见性"
    },
    "storage-section": {
      "storage-services-list": "存储服务列表",
@ -272,5 +306,23 @@
    "sat": "六",
    "sunday": "星期天",
    "sun": "日"
+  },
+  "embed-memo": {
+    "title": "嵌入备忘录",
+    "copy": "复制",
+    "only-public-supported": "* 仅支持公开备忘录。",
+    "text": "将以下代码复制并粘贴到您的博客或网站中。"
+  },
+  "ask-ai": {
+    "placeholder": "随便问",
+    "title": "问AI",
+    "not-enabled": "您尚未设置 OpenAI API 密钥。",
+    "go-to-settings": "前往设置"
+  },
+  "daily-review": {
+    "title": "每日回顾"
+  },
+  "router": {
+    "back-to-home": "回到首页"
  }
 }