move to v3.4.9-beta
Browse files- README.md +6 -1
- demo/cote_replacements.json +3 -2
- demo/genders.json +140 -0
- demo/translation_settings.json +9 -7
- handlers/json_handler.py +12 -18
- jsons/cote_replacements.json +3 -3
- lib/common/translation_settings_description.txt +4 -0
- lib/gui/HUGGING_FACE_README.md +5 -0
- lib/gui/save_to_file.js +2 -1
- modules/common/file_ensurer.py +25 -1
- modules/common/gender_util.py +420 -0
- modules/common/toolkit.py +1 -1
- modules/common/translator.py +34 -40
- modules/gui/gui_json_util.py +2 -1
- requirements.txt +2 -2
- util/token_counter.py +1 -1
- webgui.py +172 -74
README.md
CHANGED
@@ -113,6 +113,11 @@ For further details on the settings file, see [here](#translation-with-llms-sett
|
|
113 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
114 |
|
115 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
|
|
|
|
|
|
|
|
|
|
116 |
----------------------------------------------------------------------------------
|
117 |
Open AI Settings:
|
118 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
@@ -223,4 +228,4 @@ I actively encourage and welcome any feedback on this project.
|
|
223 |
|
224 |
Kudasai gets it's original name idea from it's inspiration, Atreyagaurav's Onegai. Which also means please. You can find that [here](https://github.com/Atreyagaurav/onegai)
|
225 |
|
226 |
-
---------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
113 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
114 |
|
115 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
116 |
+
|
117 |
+
gender_context_insertion : true or false - Whether to insert gender info into system prompts when translating text. Kudasai will look for a "gender.json" (exactly) in the root directory and scan each text to translate for the names in the file. If a name is found, Kudasai will insert the gender info into the system prompt. For more info look at the README.md.
|
118 |
+
|
119 |
+
is_cote: true or false - Whether you are translating COTE (Classroom of the Elite), Kudasai has specialized functions for COTE that will be enabled if this is set to true.
|
120 |
+
|
121 |
----------------------------------------------------------------------------------
|
122 |
Open AI Settings:
|
123 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
|
|
228 |
|
229 |
Kudasai gets it's original name idea from it's inspiration, Atreyagaurav's Onegai. Which also means please. You can find that [here](https://github.com/Atreyagaurav/onegai)
|
230 |
|
231 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------
|
demo/cote_replacements.json
CHANGED
@@ -87,7 +87,7 @@
|
|
87 |
"Mika": ["美香"],
|
88 |
"Minamikawa": ["南川"],
|
89 |
"Namekawa": ["滑川"],
|
90 |
-
"
|
91 |
"Obokata": ["小保方"],
|
92 |
"Onizuka": ["鬼塚"],
|
93 |
"Ryūko": ["竜子"],
|
@@ -209,7 +209,8 @@
|
|
209 |
},
|
210 |
|
211 |
"enhanced_check_whitelist": {
|
212 |
-
"Hoshinomiya Chie": ["星之宮","知恵"]
|
|
|
213 |
}
|
214 |
|
215 |
}
|
|
|
87 |
"Mika": ["美香"],
|
88 |
"Minamikawa": ["南川"],
|
89 |
"Namekawa": ["滑川"],
|
90 |
+
"Ōba": ["大場"],
|
91 |
"Obokata": ["小保方"],
|
92 |
"Onizuka": ["鬼塚"],
|
93 |
"Ryūko": ["竜子"],
|
|
|
209 |
},
|
210 |
|
211 |
"enhanced_check_whitelist": {
|
212 |
+
"Hoshinomiya Chie": ["星之宮","知恵"],
|
213 |
+
"Kijima": ["鬼島"]
|
214 |
}
|
215 |
|
216 |
}
|
demo/genders.json
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Male": {
|
3 |
+
"Ijūin": ["伊集院"],
|
4 |
+
"Iki": ["井木"],
|
5 |
+
"Ishida": ["石田"],
|
6 |
+
"Keisei": ["啓誠"],
|
7 |
+
"Kijima": ["鬼島"],
|
8 |
+
"Kisarazu": ["木更津"],
|
9 |
+
"Kinugasa": ["衣笠"],
|
10 |
+
"Ohba": ["大場"],
|
11 |
+
"Onizuka": ["鬼塚"],
|
12 |
+
"Satonaka": ["里中"],
|
13 |
+
"Shiro": ["志朗"],
|
14 |
+
"Sonezaki": ["曽根崎"],
|
15 |
+
"Sōya": ["宗谷"],
|
16 |
+
"Sugio": ["杉尾"],
|
17 |
+
"Tatebayashi": ["舘林"],
|
18 |
+
"Tomose": ["トモセ"],
|
19 |
+
"Toyohashi": ["豊橋"],
|
20 |
+
"Asama Hisashi": ["浅間", "久"],
|
21 |
+
"Ayanokōji Atsuomi": ["綾小路", "篤臣"],
|
22 |
+
"Ayanokōji Kiyotaka": ["綾小路", "清隆"],
|
23 |
+
"Hamaguchi Tetsuya": ["浜口", "哲也"],
|
24 |
+
"Hashimoto Masayoshi": ["橋本", "正義"],
|
25 |
+
"Hirata Yōsuke": ["平田", "洋介"],
|
26 |
+
"Hondō Ryōtarō": ["本堂", "遼太郎"],
|
27 |
+
"Hōsen Kazuomi": ["宝泉", "和臣"],
|
28 |
+
"Ike Kanji": ["池", "寛治"],
|
29 |
+
"Ishigami Kyō": ["石上", "京"],
|
30 |
+
"Ishizaki Daichi": ["石崎", "大地"],
|
31 |
+
"Isomaru Yōkō": ["磯丸", "容幸"],
|
32 |
+
"Kamogawa Toshizō": ["鴨川", "俊三"],
|
33 |
+
"Kanzaki Ryūji": ["神崎", "隆二"],
|
34 |
+
"Katsuragi Kōhei": ["葛城", "康平"],
|
35 |
+
"Kiriyama Ikuto": ["桐山", "生叶"],
|
36 |
+
"Kitō Hayato": ["鬼頭", "隼"],
|
37 |
+
"Komiya Kyōgo": ["小宮", "叶吾"],
|
38 |
+
"Kondō Reo": ["近藤", "玲音"],
|
39 |
+
"Kōenji Rokusuke": ["高円寺", "六助"],
|
40 |
+
"Kōji Machida": ["浩二", "町田"],
|
41 |
+
"Kusuda Yukitsu": ["楠田", "ゆきつ"],
|
42 |
+
"Mashima Tomonari": ["真嶋", "智也"],
|
43 |
+
"Miyake Akito": ["三宅", "明人"],
|
44 |
+
"Miyamoto Sōshi": ["宮本", "蒼士"],
|
45 |
+
"Nagumo Miyabi": ["南雲", "雅"],
|
46 |
+
"Naoe Jinnosuke": ["直江", "仁之助"],
|
47 |
+
"Norihito Watanabe": ["渡辺", "紀仁"],
|
48 |
+
"Okitani Kyōsuke": ["沖谷", "京介"],
|
49 |
+
"Ryūen Kakeru": ["龍園", "翔"],
|
50 |
+
"Sakagami Kazuma": ["坂上", "数馬"],
|
51 |
+
"Sakayanagi Narimori": ["坂柳", "成守"],
|
52 |
+
"Sanada Kousei": ["真田", "康生"],
|
53 |
+
"Shiba Katsunori": ["司馬", "克典"],
|
54 |
+
"Shibata Sō": ["柴田", "颯"],
|
55 |
+
"Sotomura Hideo": ["外村", "秀雄"],
|
56 |
+
"Sudō Ken": ["須藤", "健"],
|
57 |
+
"Suzukake Tanji": ["鈴懸", "鍛治"],
|
58 |
+
"Tachibana Kento": ["立花", "賢人"],
|
59 |
+
"Takahashi Osamu": ["高橋", "修"],
|
60 |
+
"Tokitō Hiroya": ["時任", "裕也"],
|
61 |
+
"Totsuka Yahiko": ["戸塚", "弥彦"],
|
62 |
+
"Tsukishiro Tokinari": ["月城", "常成"],
|
63 |
+
"Utomiya Riku": ["宇都宮", "陸"],
|
64 |
+
"Yagami Takuya": ["八神", "拓也"],
|
65 |
+
"Yamada Albert": ["山田", "アルベルト"],
|
66 |
+
"Yamauchi Haruki": ["山内", "春樹"],
|
67 |
+
"Yanagi Yasuhisa": ["柳", "安久"],
|
68 |
+
"Yukimura Teruhiko": ["幸村", "輝彦"]
|
69 |
+
},
|
70 |
+
"Female": {
|
71 |
+
"Akiyama": ["秋山"],
|
72 |
+
"Isoyama": ["磯山"],
|
73 |
+
"Mii": ["みー"],
|
74 |
+
"Mika": ["美香"],
|
75 |
+
"Minamikawa": ["南川"],
|
76 |
+
"Amasawa Ichika": ["天沢", "一夏"],
|
77 |
+
"Amikura Mako": ["網倉", "麻子"],
|
78 |
+
"Andō Sayo": ["安藤", "紗代"],
|
79 |
+
"Asahina Nazuna": ["朝比奈", "なずな"],
|
80 |
+
"Azuma Sana": ["東", "咲菜"],
|
81 |
+
"Chabashira Sae": ["茶柱", "佐枝"],
|
82 |
+
"Enoshima Midoriko": ["榎嶋", "翠子"],
|
83 |
+
"Hasebe Haruka": ["長谷部", "波瑠加"],
|
84 |
+
"Himeno Yuki": ["姫野", "ユキ"],
|
85 |
+
"Horikita Suzune": ["堀北", "鈴音"],
|
86 |
+
"Hoshinomiya Chie": ["星之宮", "知恵"],
|
87 |
+
"Ibuki Mio": ["伊吹", "澪"],
|
88 |
+
"Ichinose Honami": ["一之瀬", "帆波"],
|
89 |
+
"Inogashira Kokoro": ["井の頭", "心"],
|
90 |
+
"Kamuro Masumi": ["神室", "真澄"],
|
91 |
+
"Karuizawa Kei": ["軽井沢", "恵"],
|
92 |
+
"Kikyō Kushida": ["桔梗", "櫛田"],
|
93 |
+
"Kinoshita Minori": ["木下", "美野里"],
|
94 |
+
"Kiryūin Fūka": ["鬼龍院", "楓花"],
|
95 |
+
"Kobashi Yume": ["小橋", "夢"],
|
96 |
+
"Manabe Shiho": ["真鍋", "志保"],
|
97 |
+
"Matsushita Chiaki": ["松下", "千秋"],
|
98 |
+
"Mori Nene": ["森", "寧々"],
|
99 |
+
"Morishita Ai": ["森下", "藍"],
|
100 |
+
"Nanase Tsubasa": ["七瀬", "翼"],
|
101 |
+
"Nishino Takeko": ["西野", "武子"],
|
102 |
+
"Onodera Kayano": ["小野寺", "かや乃"],
|
103 |
+
"Sakayanagi Arisu": ["坂柳", "有栖"],
|
104 |
+
"Sakura Airi": ["佐倉", "愛里"],
|
105 |
+
"Satō Maya": ["佐藤", "麻耶"],
|
106 |
+
"Shiina Hiyori": ["椎名", "ひより"],
|
107 |
+
"Shinohara Satsuki": ["篠原", "さつき"],
|
108 |
+
"Shiranami Chihiro": ["白波", "千尋"],
|
109 |
+
"Suchi Moeka": ["須知", "萌香"],
|
110 |
+
"Tachibana Akane": ["橘", "茜"],
|
111 |
+
"Tsubaki Sakurako": ["椿", "桜子"],
|
112 |
+
"Wan Mei-Yui": ["王", "美雨"],
|
113 |
+
"Yabu Nanami": ["藪", "菜々美"],
|
114 |
+
"Yamamura Miki": ["山村", "美紀"],
|
115 |
+
"Yamanaka Ikuko": ["山中", "郁���"],
|
116 |
+
"Yamashita Saki": ["鈴代", "紗弓"]
|
117 |
+
},
|
118 |
+
"Unknown": {
|
119 |
+
"Anzai": ["安在"],
|
120 |
+
"Jima": ["島"],
|
121 |
+
"Kosumi": ["小角"],
|
122 |
+
"Midora": ["美空"],
|
123 |
+
"Namekawa": ["滑川"],
|
124 |
+
"Obokata": ["小保方"],
|
125 |
+
"Ryūko": ["竜子"],
|
126 |
+
"Yano": ["矢野"],
|
127 |
+
"king": ["Wan-sama"],
|
128 |
+
"Eikura Mami": ["榮倉", "まみ"],
|
129 |
+
"Fukuchi Hinano": ["福地陽", "菜乃"],
|
130 |
+
"Hagiwara Chihaya": ["萩原", "千颯"],
|
131 |
+
"Hatsukawa Maho": ["初川", "舞峰"],
|
132 |
+
"Iguchi Yuri": ["井口", "由里"],
|
133 |
+
"Nishimura Ryūko": ["西村", "龍子"],
|
134 |
+
"Sawada Yasumi": ["沢田", "恭美"],
|
135 |
+
"Shintoku Tarō": ["新徳", "太郎"],
|
136 |
+
"Tatewaki Aoi": ["帯刀", "碧"],
|
137 |
+
"Tōdō Rin": ["藤堂", "凛"],
|
138 |
+
"Yamashita Naohisa": ["山下", "直久"]
|
139 |
+
}
|
140 |
+
}
|
demo/translation_settings.json
CHANGED
@@ -6,13 +6,15 @@
|
|
6 |
"je_check_mode": 2,
|
7 |
"number_of_malformed_batch_retries": 1,
|
8 |
"batch_retry_timeout": 700,
|
9 |
-
"number_of_concurrent_batches": 2
|
|
|
|
|
10 |
},
|
11 |
|
12 |
"openai settings": {
|
13 |
-
"openai_model": "gpt-
|
14 |
-
"openai_system_message": "As a Japanese to English translator, translate narration into
|
15 |
-
"openai_temperature": 0.
|
16 |
"openai_top_p": 1.0,
|
17 |
"openai_n": 1,
|
18 |
"openai_stream": false,
|
@@ -24,9 +26,9 @@
|
|
24 |
},
|
25 |
|
26 |
"gemini settings": {
|
27 |
-
"gemini_model": "gemini-
|
28 |
-
"gemini_prompt": "As a Japanese to English translator, translate narration into
|
29 |
-
"gemini_temperature": 0.
|
30 |
"gemini_top_p": null,
|
31 |
"gemini_top_k": null,
|
32 |
"gemini_candidate_count": 1,
|
|
|
6 |
"je_check_mode": 2,
|
7 |
"number_of_malformed_batch_retries": 1,
|
8 |
"batch_retry_timeout": 700,
|
9 |
+
"number_of_concurrent_batches": 2,
|
10 |
+
"gender_context_insertion": true,
|
11 |
+
"is_cote": true
|
12 |
},
|
13 |
|
14 |
"openai settings": {
|
15 |
+
"openai_model": "gpt-4-turbo",
|
16 |
+
"openai_system_message": "As a Japanese to English translator, translate narration into simple past tense, everything else should remain in its original tense. Maintain original formatting, spacing, punctuation, and paragraph structure. Keep pre-translated terms and anticipate names not replaced. Calls and text messages should be enclosed in brackets. Preserve terms and markers marked with >>><<< and match the output's line count to the input's. Note: 〇 indicates chapter changes.",
|
17 |
+
"openai_temperature": 0.1,
|
18 |
"openai_top_p": 1.0,
|
19 |
"openai_n": 1,
|
20 |
"openai_stream": false,
|
|
|
26 |
},
|
27 |
|
28 |
"gemini settings": {
|
29 |
+
"gemini_model": "gemini-pro",
|
30 |
+
"gemini_prompt": "As a Japanese to English translator, translate narration into simple past tense, everything else should remain in its original tense. Maintain original formatting, spacing, punctuation, and paragraph structure. Keep pre-translated terms and anticipate names not replaced. Calls and text messages should be enclosed in brackets. Preserve terms and markers marked with >>><<< and match the output's line count to the input's. Note: 〇 indicates chapter changes.",
|
31 |
+
"gemini_temperature": 0.1,
|
32 |
"gemini_top_p": null,
|
33 |
"gemini_top_k": null,
|
34 |
"gemini_candidate_count": 1,
|
handlers/json_handler.py
CHANGED
@@ -43,7 +43,9 @@ class JsonHandler:
|
|
43 |
"je_check_mode",
|
44 |
"number_of_malformed_batch_retries",
|
45 |
"batch_retry_timeout",
|
46 |
-
"number_of_concurrent_batches"
|
|
|
|
|
47 |
]
|
48 |
|
49 |
openai_keys = [
|
@@ -86,6 +88,8 @@ class JsonHandler:
|
|
86 |
"je_check_mode": lambda x: isinstance(x, int) and 1 <= x <= 2,
|
87 |
"number_of_malformed_batch_retries": lambda x: isinstance(x, int) and x >= 0,
|
88 |
"batch_retry_timeout": lambda x: isinstance(x, int) and x >= 0,
|
|
|
|
|
89 |
"number_of_concurrent_batches": lambda x: isinstance(x, int) and x >= 0,
|
90 |
"openai_model": lambda x: isinstance(x, str) and x in ALLOWED_OPENAI_MODELS,
|
91 |
"openai_system_message": lambda x: x not in ["", "None", None],
|
@@ -326,6 +330,8 @@ class JsonHandler:
|
|
326 |
"number_of_malformed_batch_retries": {"type": int, "constraints": lambda x: x >= 0},
|
327 |
"batch_retry_timeout": {"type": int, "constraints": lambda x: x >= 0},
|
328 |
"number_of_concurrent_batches": {"type": int, "constraints": lambda x: x >= 0},
|
|
|
|
|
329 |
"openai_model": {"type": str, "constraints": lambda x: x in ALLOWED_OPENAI_MODELS},
|
330 |
"openai_system_message": {"type": str, "constraints": lambda x: x not in ["", "None", None]},
|
331 |
"openai_temperature": {"type": float, "constraints": lambda x: 0 <= x <= 2},
|
@@ -412,26 +418,14 @@ class SettingsChanger:
|
|
412 |
menu = """
|
413 |
Current settings:
|
414 |
----------------------------------------------------------------
|
415 |
-
|
416 |
"""
|
417 |
|
418 |
-
|
419 |
-
menu += key + " : " + str(value) + "\n"
|
420 |
-
|
421 |
-
print("\n")
|
422 |
-
|
423 |
-
for key,value in JsonHandler.current_translation_settings["openai settings"].items():
|
424 |
-
menu += key + " : " + str(value) + "\n"
|
425 |
-
|
426 |
-
print("\n")
|
427 |
-
|
428 |
-
for key,value in JsonHandler.current_translation_settings["gemini settings"].items():
|
429 |
-
menu += key + " : " + str(value) + "\n"
|
430 |
-
|
431 |
-
print("\n")
|
432 |
|
433 |
-
for
|
434 |
-
|
|
|
|
|
435 |
|
436 |
menu += """
|
437 |
It is recommended that you maximize the console window for this. You will have to to see the settings above.
|
|
|
43 |
"je_check_mode",
|
44 |
"number_of_malformed_batch_retries",
|
45 |
"batch_retry_timeout",
|
46 |
+
"number_of_concurrent_batches",
|
47 |
+
"gender_context_insertion",
|
48 |
+
"is_cote"
|
49 |
]
|
50 |
|
51 |
openai_keys = [
|
|
|
88 |
"je_check_mode": lambda x: isinstance(x, int) and 1 <= x <= 2,
|
89 |
"number_of_malformed_batch_retries": lambda x: isinstance(x, int) and x >= 0,
|
90 |
"batch_retry_timeout": lambda x: isinstance(x, int) and x >= 0,
|
91 |
+
"gender_context_insertion": lambda x: isinstance(x, bool),
|
92 |
+
"is_cote": lambda x: isinstance(x, bool),
|
93 |
"number_of_concurrent_batches": lambda x: isinstance(x, int) and x >= 0,
|
94 |
"openai_model": lambda x: isinstance(x, str) and x in ALLOWED_OPENAI_MODELS,
|
95 |
"openai_system_message": lambda x: x not in ["", "None", None],
|
|
|
330 |
"number_of_malformed_batch_retries": {"type": int, "constraints": lambda x: x >= 0},
|
331 |
"batch_retry_timeout": {"type": int, "constraints": lambda x: x >= 0},
|
332 |
"number_of_concurrent_batches": {"type": int, "constraints": lambda x: x >= 0},
|
333 |
+
"gender_context_insertion": {"type": bool, "constraints": lambda x: isinstance(x, bool)},
|
334 |
+
"is_cote": {"type": bool, "constraints": lambda x: isinstance(x, bool)},
|
335 |
"openai_model": {"type": str, "constraints": lambda x: x in ALLOWED_OPENAI_MODELS},
|
336 |
"openai_system_message": {"type": str, "constraints": lambda x: x not in ["", "None", None]},
|
337 |
"openai_temperature": {"type": float, "constraints": lambda x: 0 <= x <= 2},
|
|
|
418 |
menu = """
|
419 |
Current settings:
|
420 |
----------------------------------------------------------------
|
|
|
421 |
"""
|
422 |
|
423 |
+
settings = ["base translation settings", "openai settings", "gemini settings", "deepl settings"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
+
for setting in settings:
|
426 |
+
for key, value in JsonHandler.current_translation_settings[setting].items():
|
427 |
+
menu += key + " : " + str(value) + "\n"
|
428 |
+
menu += "\n"
|
429 |
|
430 |
menu += """
|
431 |
It is recommended that you maximize the console window for this. You will have to to see the settings above.
|
jsons/cote_replacements.json
CHANGED
@@ -87,7 +87,7 @@
|
|
87 |
"Mika": ["美香"],
|
88 |
"Minamikawa": ["南川"],
|
89 |
"Namekawa": ["滑川"],
|
90 |
-
"
|
91 |
"Obokata": ["小保方"],
|
92 |
"Onizuka": ["鬼塚"],
|
93 |
"Ryūko": ["竜子"],
|
@@ -103,7 +103,6 @@
|
|
103 |
"king": ["Wan-sama"]
|
104 |
},
|
105 |
|
106 |
-
|
107 |
"full_names": {
|
108 |
"Amasawa Ichika": ["天沢","一夏"],
|
109 |
"Amikura Mako": ["網倉","麻子"],
|
@@ -209,7 +208,8 @@
|
|
209 |
},
|
210 |
|
211 |
"enhanced_check_whitelist": {
|
212 |
-
"Hoshinomiya Chie": ["星之宮","知恵"]
|
|
|
213 |
}
|
214 |
|
215 |
}
|
|
|
87 |
"Mika": ["美香"],
|
88 |
"Minamikawa": ["南川"],
|
89 |
"Namekawa": ["滑川"],
|
90 |
+
"Ōba": ["大場"],
|
91 |
"Obokata": ["小保方"],
|
92 |
"Onizuka": ["鬼塚"],
|
93 |
"Ryūko": ["竜子"],
|
|
|
103 |
"king": ["Wan-sama"]
|
104 |
},
|
105 |
|
|
|
106 |
"full_names": {
|
107 |
"Amasawa Ichika": ["天沢","一夏"],
|
108 |
"Amikura Mako": ["網倉","麻子"],
|
|
|
208 |
},
|
209 |
|
210 |
"enhanced_check_whitelist": {
|
211 |
+
"Hoshinomiya Chie": ["星之宮","知恵"],
|
212 |
+
"Kijima": ["鬼島"]
|
213 |
}
|
214 |
|
215 |
}
|
lib/common/translation_settings_description.txt
CHANGED
@@ -14,6 +14,10 @@ number_of_malformed_batch_retries : (Malformed batch is when je-fixing fails) Ho
|
|
14 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
15 |
|
16 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
|
|
|
|
|
|
|
|
17 |
----------------------------------------------------------------------------------
|
18 |
Open AI Settings:
|
19 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
|
|
14 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
15 |
|
16 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
17 |
+
|
18 |
+
gender_context_insertion : true or false - Whether to insert gender info into system prompts when translating text. Kudasai will look for a "gender.json" (exactly) in the root directory and scan each text to translate for the names in the file. If a name is found, Kudasai will insert the gender info into the system prompt. For more info look at the README.md.
|
19 |
+
|
20 |
+
is_cote: true or false - Whether you are translating COTE (Classroom of the Elite), Kudasai has specialized functions for COTE that will be enabled if this is set to true.
|
21 |
----------------------------------------------------------------------------------
|
22 |
Open AI Settings:
|
23 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
lib/gui/HUGGING_FACE_README.md
CHANGED
@@ -113,6 +113,11 @@ For further details on the settings file, see [here](#translation-with-llms-sett
|
|
113 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
114 |
|
115 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
|
|
|
|
|
|
|
|
|
|
116 |
----------------------------------------------------------------------------------
|
117 |
Open AI Settings:
|
118 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
|
|
113 |
batch_retry_timeout : How long Kudasai will try to translate a batch in seconds, if a requests exceeds this duration, Kudasai will leave it untranslated.
|
114 |
|
115 |
number_of_concurrent_batches : How many translations batches Kudasai will send to the translation API at a time. For OpenAI, be conservative as rate-limiting is aggressive, I'd suggest 3-5. For Gemini, do not exceed 15 for 1.0 or 2 for 1.5. This setting more or less doesn't matter for DeepL.
|
116 |
+
|
117 |
+
gender_context_insertion : true or false - Whether to insert gender info into system prompts when translating text. Kudasai will look for a "gender.json" (exactly) in the root directory and scan each text to translate for the names in the file. If a name is found, Kudasai will insert the gender info into the system prompt. For more info look at the README.md.
|
118 |
+
|
119 |
+
is_cote: true or false - Whether you are translating COTE (Classroom of the Elite), Kudasai has specialized functions for COTE that will be enabled if this is set to true.
|
120 |
+
|
121 |
----------------------------------------------------------------------------------
|
122 |
Open AI Settings:
|
123 |
See https://platform.openai.com/docs/api-reference/chat/create for further details
|
lib/gui/save_to_file.js
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
(text) =>
|
|
|
2 |
const blob = new Blob([text], { type: 'text/plain;charset=utf-8' });
|
3 |
const url = URL.createObjectURL(blob);
|
4 |
const a = document.createElement('a');
|
|
|
1 |
+
(text) =>
|
2 |
+
{
|
3 |
const blob = new Blob([text], { type: 'text/plain;charset=utf-8' });
|
4 |
const url = URL.createObjectURL(blob);
|
5 |
const a = document.createElement('a');
|
modules/common/file_ensurer.py
CHANGED
@@ -58,6 +58,9 @@ class FileEnsurer():
|
|
58 |
## translation settings
|
59 |
external_translation_settings_path = os.path.join(script_dir,'translation_settings.json')
|
60 |
config_translation_settings_path = os.path.join(config_dir,'translation_settings.json')
|
|
|
|
|
|
|
61 |
|
62 |
## api keys
|
63 |
deepl_api_key_path = os.path.join(secrets_dir, "deepl_api_key.txt")
|
@@ -85,7 +88,9 @@ class FileEnsurer():
|
|
85 |
"je_check_mode": 2,
|
86 |
"number_of_malformed_batch_retries": 1,
|
87 |
"batch_retry_timeout": 300,
|
88 |
-
"number_of_concurrent_batches": 5
|
|
|
|
|
89 |
},
|
90 |
|
91 |
"openai settings": {
|
@@ -121,6 +126,13 @@ class FileEnsurer():
|
|
121 |
"deepl_formality": "default"
|
122 |
}
|
123 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
INVALID_TRANSLATION_SETTINGS_PLACEHOLDER = {
|
125 |
"INVALID JSON":
|
126 |
{
|
@@ -128,6 +140,13 @@ class FileEnsurer():
|
|
128 |
}
|
129 |
}
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
## rules
|
132 |
blank_rules_path = os.path.join(jsons_dir, "blank_replacements.json")
|
133 |
|
@@ -200,6 +219,11 @@ class FileEnsurer():
|
|
200 |
with open(FileEnsurer.config_translation_settings_path, 'w+', encoding='utf-8') as file:
|
201 |
json.dump(FileEnsurer.DEFAULT_TRANSLATION_SETTING, file)
|
202 |
|
|
|
|
|
|
|
|
|
|
|
203 |
##-------------------start-of-purge_storage()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
204 |
|
205 |
@staticmethod
|
|
|
58 |
## translation settings
|
59 |
external_translation_settings_path = os.path.join(script_dir,'translation_settings.json')
|
60 |
config_translation_settings_path = os.path.join(config_dir,'translation_settings.json')
|
61 |
+
|
62 |
+
external_translation_genders_path = os.path.join(script_dir,'genders.json')
|
63 |
+
config_translation_genders_path = os.path.join(config_dir, 'genders.json')
|
64 |
|
65 |
## api keys
|
66 |
deepl_api_key_path = os.path.join(secrets_dir, "deepl_api_key.txt")
|
|
|
88 |
"je_check_mode": 2,
|
89 |
"number_of_malformed_batch_retries": 1,
|
90 |
"batch_retry_timeout": 300,
|
91 |
+
"number_of_concurrent_batches": 5,
|
92 |
+
"gender_context_insertion": False,
|
93 |
+
"is_cote": False,
|
94 |
},
|
95 |
|
96 |
"openai settings": {
|
|
|
126 |
"deepl_formality": "default"
|
127 |
}
|
128 |
}
|
129 |
+
|
130 |
+
DEFAULT_GENDER_SETTINGS = {
|
131 |
+
"Male": {},
|
132 |
+
"Female": {},
|
133 |
+
"Unknown": {}
|
134 |
+
}
|
135 |
+
|
136 |
INVALID_TRANSLATION_SETTINGS_PLACEHOLDER = {
|
137 |
"INVALID JSON":
|
138 |
{
|
|
|
140 |
}
|
141 |
}
|
142 |
|
143 |
+
INVALID_GENDER_SETTINGS_PLACEHOLDER = {
|
144 |
+
"INVALID JSON":
|
145 |
+
{
|
146 |
+
"INVALID JSON":"INVALID JSON"
|
147 |
+
}
|
148 |
+
}
|
149 |
+
|
150 |
## rules
|
151 |
blank_rules_path = os.path.join(jsons_dir, "blank_replacements.json")
|
152 |
|
|
|
219 |
with open(FileEnsurer.config_translation_settings_path, 'w+', encoding='utf-8') as file:
|
220 |
json.dump(FileEnsurer.DEFAULT_TRANSLATION_SETTING, file)
|
221 |
|
222 |
+
## creates the genders file if it doesn't exist
|
223 |
+
if(os.path.exists(FileEnsurer.config_translation_genders_path) == False):
|
224 |
+
with open(FileEnsurer.config_translation_genders_path, 'w+', encoding='utf-8') as file:
|
225 |
+
json.dump(FileEnsurer.DEFAULT_GENDER_SETTINGS, file)
|
226 |
+
|
227 |
##-------------------start-of-purge_storage()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
228 |
|
229 |
@staticmethod
|
modules/common/gender_util.py
ADDED
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## built-in libraries
|
2 |
+
import json
|
3 |
+
import typing
|
4 |
+
import regex
|
5 |
+
|
6 |
+
## custom modules
|
7 |
+
from modules.common.file_ensurer import FileEnsurer
|
8 |
+
|
9 |
+
##-------------------start-of-GenderUtil---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
10 |
+
|
11 |
+
class GenderUtil:
|
12 |
+
|
13 |
+
genders:typing.Optional[dict] = None
|
14 |
+
cache = {}
|
15 |
+
|
16 |
+
is_cote:bool = False
|
17 |
+
|
18 |
+
##-------------------start-of-find_english_words()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
19 |
+
|
20 |
+
@staticmethod
|
21 |
+
def find_english_words(text:str) -> list[tuple[str, int]]:
|
22 |
+
|
23 |
+
"""
|
24 |
+
|
25 |
+
Finds the english words in the text.
|
26 |
+
|
27 |
+
Parameters:
|
28 |
+
text (str) : The text to be searched.
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
(list[tuple[str, int]]) : The list of words and their starting index.
|
32 |
+
|
33 |
+
"""
|
34 |
+
|
35 |
+
return [(match.group(), match.start()) for match in regex.finditer(r'\p{Latin}+', text)]
|
36 |
+
|
37 |
+
##-------------------start-of-is_potential_name()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
38 |
+
|
39 |
+
@staticmethod
|
40 |
+
def is_potential_name(word:str) -> bool:
|
41 |
+
|
42 |
+
"""
|
43 |
+
|
44 |
+
Assuming words are potential names and excluding full-width Latin characters, this function returns a boolean value indicating whether the word is a potential name.
|
45 |
+
|
46 |
+
Parameters:
|
47 |
+
word (str) : The word to be checked.
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
(bool) : The result of the check.
|
51 |
+
|
52 |
+
"""
|
53 |
+
|
54 |
+
return not any(0xFF00 <= ord(ch) <= 0xFFEF for ch in word)
|
55 |
+
|
56 |
+
##-------------------start-of-group_names()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
57 |
+
|
58 |
+
@staticmethod
|
59 |
+
def group_names(text, names_with_positions: list[tuple[str, int]], max_distance: int = 10) -> list[str]:
|
60 |
+
|
61 |
+
"""
|
62 |
+
|
63 |
+
Groups names together if they follow one another within a certain distance and are separated by spaces.
|
64 |
+
|
65 |
+
Parameters:
|
66 |
+
text (str) : The text to be searched.
|
67 |
+
names_with_positions (list[tuple[str, int]]) : The names with their positions.
|
68 |
+
max_distance (int) : The maximum distance between names.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
(list[str]) : The grouped names.
|
72 |
+
|
73 |
+
"""
|
74 |
+
|
75 |
+
honorifics = [
|
76 |
+
"chan",
|
77 |
+
"dono",
|
78 |
+
"kun",
|
79 |
+
"kōhai",
|
80 |
+
"paisen",
|
81 |
+
"sama",
|
82 |
+
"san",
|
83 |
+
"senpai",
|
84 |
+
"sensei",
|
85 |
+
"shi",
|
86 |
+
"ue"
|
87 |
+
]
|
88 |
+
|
89 |
+
blacklist = [
|
90 |
+
"contents",
|
91 |
+
]
|
92 |
+
|
93 |
+
grouped_names = []
|
94 |
+
i = 0
|
95 |
+
skip_next = False
|
96 |
+
length = len(names_with_positions)
|
97 |
+
|
98 |
+
while i < length - 1:
|
99 |
+
|
100 |
+
if(skip_next):
|
101 |
+
skip_next = False
|
102 |
+
|
103 |
+
else:
|
104 |
+
current_name, current_pos = names_with_positions[i]
|
105 |
+
next_name, next_pos = names_with_positions[i + 1]
|
106 |
+
|
107 |
+
if(current_name in blacklist):
|
108 |
+
i += 1
|
109 |
+
continue
|
110 |
+
|
111 |
+
## Check if names are separated by spaces and are within the maximum distance.
|
112 |
+
separator = text[current_pos + len(current_name):next_pos]
|
113 |
+
|
114 |
+
if(GenderUtil.is_potential_name(next_name) and (separator.isspace()) and next_pos - current_pos <= max_distance):
|
115 |
+
grouped_names.append(current_name + " " + next_name)
|
116 |
+
skip_next = True
|
117 |
+
else:
|
118 |
+
grouped_names.append(current_name)
|
119 |
+
i += 1
|
120 |
+
|
121 |
+
if(not skip_next and names_with_positions):
|
122 |
+
grouped_names.append(names_with_positions[-1][0])
|
123 |
+
|
124 |
+
## merge honorifics with names
|
125 |
+
for i, name in enumerate(grouped_names):
|
126 |
+
if(i + 1 < len(grouped_names) and grouped_names[i + 1].lower() in honorifics):
|
127 |
+
grouped_names[i] += "-" + grouped_names[i + 1]
|
128 |
+
grouped_names.pop(i + 1)
|
129 |
+
|
130 |
+
return grouped_names
|
131 |
+
|
132 |
+
##-------------------start-of-load_genders()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
133 |
+
|
134 |
+
@staticmethod
|
135 |
+
def load_genders() -> dict:
|
136 |
+
|
137 |
+
"""
|
138 |
+
|
139 |
+
Loads the genders from the specified file path.
|
140 |
+
|
141 |
+
Parameters:
|
142 |
+
file_path (str) : The file
|
143 |
+
|
144 |
+
Returns:
|
145 |
+
(dict) : The loaded json.
|
146 |
+
|
147 |
+
"""
|
148 |
+
|
149 |
+
GenderUtil.cache = {}
|
150 |
+
|
151 |
+
with open(FileEnsurer.config_translation_genders_path, 'r', encoding='utf-8') as file:
|
152 |
+
return json.load(file)
|
153 |
+
|
154 |
+
##-------------------start-of-discard_non_names()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
155 |
+
|
156 |
+
@staticmethod
|
157 |
+
def discard_non_names(names: list[str]) -> list[str]:
|
158 |
+
|
159 |
+
"""
|
160 |
+
|
161 |
+
Discards any names that are not in the gender list.
|
162 |
+
|
163 |
+
Parameters:
|
164 |
+
names (list[str]) : The names to be filtered.
|
165 |
+
|
166 |
+
Returns:
|
167 |
+
new_names (list[str]) : The filtered names.
|
168 |
+
|
169 |
+
"""
|
170 |
+
|
171 |
+
GenderUtil.genders = GenderUtil.load_genders()
|
172 |
+
|
173 |
+
new_names = [
|
174 |
+
name for name in names
|
175 |
+
if any(
|
176 |
+
any(
|
177 |
+
part == full_part
|
178 |
+
for part in GenderUtil.honorific_stripper(name).split(' ')
|
179 |
+
for full_part in full_name.split(' ')
|
180 |
+
)
|
181 |
+
for gender, gender_names in GenderUtil.genders.items()
|
182 |
+
for full_name, _ in gender_names.items()
|
183 |
+
)
|
184 |
+
]
|
185 |
+
|
186 |
+
if(GenderUtil.is_cote):
|
187 |
+
## known issues with cote
|
188 |
+
new_names = [name for name in new_names if name not in ["king"] and len(name) > 1]
|
189 |
+
|
190 |
+
return new_names
|
191 |
+
|
192 |
+
##-------------------start-of-honorific_stripper()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
193 |
+
|
194 |
+
@staticmethod
|
195 |
+
def honorific_stripper(name:str) -> str:
|
196 |
+
|
197 |
+
"""
|
198 |
+
|
199 |
+
Strips the honorific from the name.
|
200 |
+
|
201 |
+
Parameters:
|
202 |
+
name (str) : The name to be stripped.
|
203 |
+
|
204 |
+
Returns:
|
205 |
+
(str) : The stripped name.
|
206 |
+
|
207 |
+
"""
|
208 |
+
|
209 |
+
if("-" in name):
|
210 |
+
return name.split("-")[0]
|
211 |
+
|
212 |
+
return name
|
213 |
+
|
214 |
+
##-------------------start-of-reverse_honorific_stripper()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
215 |
+
|
216 |
+
@staticmethod
|
217 |
+
def reverse_honorific_stripper(name:str) -> str:
|
218 |
+
|
219 |
+
"""
|
220 |
+
|
221 |
+
Removes the name from the honorific. (Gets the honorific)
|
222 |
+
|
223 |
+
Parameters:
|
224 |
+
name (str) : The name to be stripped.
|
225 |
+
|
226 |
+
Returns:
|
227 |
+
(str) : The stripped name.
|
228 |
+
|
229 |
+
"""
|
230 |
+
|
231 |
+
if("-" in name):
|
232 |
+
return name.split("-")[1]
|
233 |
+
|
234 |
+
return ""
|
235 |
+
|
236 |
+
##-------------------start-of-discard_similar_names()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
237 |
+
|
238 |
+
@staticmethod
|
239 |
+
def discard_similar_names(names: list[str]) -> list[str]:
|
240 |
+
|
241 |
+
"""
|
242 |
+
|
243 |
+
Discards any names that are similar to each other.
|
244 |
+
|
245 |
+
This totally didn't take me literally 2 hours because I'm a dipshit who overcomplicates things.
|
246 |
+
|
247 |
+
Parameters:
|
248 |
+
names (list[str]) : The names to be filtered.
|
249 |
+
|
250 |
+
Returns:
|
251 |
+
(list[str]) : The filtered names
|
252 |
+
|
253 |
+
"""
|
254 |
+
|
255 |
+
seen = set()
|
256 |
+
result = []
|
257 |
+
|
258 |
+
# Sort names by length (shortest first)
|
259 |
+
names.sort(key=len)
|
260 |
+
|
261 |
+
for name in names:
|
262 |
+
base_name = GenderUtil.honorific_stripper(name)
|
263 |
+
if(not any(base_name in seen_name or seen_name in base_name for seen_name in seen)):
|
264 |
+
result.append(name)
|
265 |
+
seen.add(base_name)
|
266 |
+
|
267 |
+
return result
|
268 |
+
|
269 |
+
##-------------------start-of-find_name_gender()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
270 |
+
|
271 |
+
@staticmethod
|
272 |
+
def find_name_gender(name:str) -> list[str]:
|
273 |
+
|
274 |
+
"""
|
275 |
+
|
276 |
+
Finds the gender associated to a name.
|
277 |
+
|
278 |
+
Parameters:
|
279 |
+
name (str) : The name to find
|
280 |
+
|
281 |
+
Returns:
|
282 |
+
result (list[str])
|
283 |
+
|
284 |
+
"""
|
285 |
+
|
286 |
+
## known names that are literally 95% this
|
287 |
+
cote_predetermined: typing.Dict[typing.Tuple[str, str], str] = {
|
288 |
+
("Sakayanagi", "san"): "Female",
|
289 |
+
("Horikita", "san"): "Female",
|
290 |
+
("Horikita", ""): "Female",
|
291 |
+
("Sakayanagi", ""): "Female",
|
292 |
+
("Sakayanagi", "sama"): "Male",
|
293 |
+
("Sakayanagi", "sensei"): "Male",
|
294 |
+
("Kei", ""): "Female"
|
295 |
+
}
|
296 |
+
|
297 |
+
GenderUtil.genders = GenderUtil.load_genders()
|
298 |
+
|
299 |
+
if(name in GenderUtil.cache):
|
300 |
+
return GenderUtil.cache[name]
|
301 |
+
|
302 |
+
honorific = GenderUtil.reverse_honorific_stripper(name)
|
303 |
+
stripped_name = GenderUtil.honorific_stripper(name)
|
304 |
+
|
305 |
+
## check if the name is predetermined
|
306 |
+
if((stripped_name, honorific) in cote_predetermined and GenderUtil.is_cote):
|
307 |
+
result = [cote_predetermined[(stripped_name, honorific)]]
|
308 |
+
GenderUtil.cache[name] = result
|
309 |
+
return result
|
310 |
+
|
311 |
+
## this does an in operation
|
312 |
+
## so it could return too many (Kei for instance, will trigger Keisei and Kei)
|
313 |
+
result = [gender for gender, names in GenderUtil.genders.items() for full_name in names if stripped_name in full_name]
|
314 |
+
|
315 |
+
## so we can go through it again and split the full name into first and last name, compare them to the stripped name and set the result
|
316 |
+
if(len(result) > 1):
|
317 |
+
for gender, names in GenderUtil.genders.items():
|
318 |
+
for full_name in names:
|
319 |
+
first_name = full_name.split(" ")[0]
|
320 |
+
last_name = full_name.split(" ")[-1]
|
321 |
+
|
322 |
+
if((first_name == stripped_name or last_name == stripped_name) and gender in result):
|
323 |
+
## need to readd, done because this can do for multiple genders
|
324 |
+
result.remove(gender)
|
325 |
+
result.append(gender)
|
326 |
+
|
327 |
+
if(len(set(result)) > 1 or result in ["Undetermined", "Unknown"]):
|
328 |
+
if(honorific == "kun"):
|
329 |
+
result = ["Male"]
|
330 |
+
elif(honorific == "chan"):
|
331 |
+
result = ["Female"]
|
332 |
+
|
333 |
+
else:
|
334 |
+
result = ["Undetermined"]
|
335 |
+
|
336 |
+
GenderUtil.cache[name] = result
|
337 |
+
|
338 |
+
return result
|
339 |
+
|
340 |
+
##-------------------start-of-get_pronoun_assumption_for_system_prompt()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
341 |
+
|
342 |
+
@staticmethod
|
343 |
+
def get_pronoun_assumption_for_system_prompt(sample:str) -> typing.List[str]:
|
344 |
+
|
345 |
+
"""
|
346 |
+
|
347 |
+
Gets the pronoun assumptions for a text sample so it can be used in the system prompt.
|
348 |
+
|
349 |
+
Parameters:
|
350 |
+
sample (str) : The text to be analyzed.
|
351 |
+
|
352 |
+
Returns:
|
353 |
+
pronoun_assumptions (list[str]) : The pronoun assumptions.
|
354 |
+
|
355 |
+
"""
|
356 |
+
|
357 |
+
gender_to_pronoun_map = {
|
358 |
+
"Male": "he",
|
359 |
+
"Female": "she",
|
360 |
+
## we used unknown in the json file, but we should use undetermined and no im not changing the json file
|
361 |
+
"Undetermined": "they",
|
362 |
+
"Unknown": "they"
|
363 |
+
}
|
364 |
+
|
365 |
+
names_with_positions = GenderUtil.find_english_words(sample)
|
366 |
+
potential_names_with_positions = [(name, pos) for name, pos in names_with_positions if GenderUtil.is_potential_name(name)]
|
367 |
+
grouped_names = GenderUtil.group_names(sample, potential_names_with_positions)
|
368 |
+
actual_names = GenderUtil.discard_non_names(grouped_names)
|
369 |
+
filtered_names = GenderUtil.discard_similar_names(actual_names)
|
370 |
+
|
371 |
+
assumptions = [
|
372 |
+
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and gender not in ["Undetermined", "Unknown"] else "{} : Undetermined\n".format(name)
|
373 |
+
for name in filtered_names
|
374 |
+
for gender in [GenderUtil.find_name_gender(name)]
|
375 |
+
]
|
376 |
+
|
377 |
+
pronoun_assumptions = [
|
378 |
+
"{} : {}\n".format(name.strip(), gender_to_pronoun_map.get(gender.strip(), "they"))
|
379 |
+
for assumption in assumptions
|
380 |
+
for name, gender in [assumption.split(":")]
|
381 |
+
]
|
382 |
+
|
383 |
+
return pronoun_assumptions
|
384 |
+
|
385 |
+
##----------------start-of-get_gender_assumption_for_system_prompt()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
386 |
+
|
387 |
+
@staticmethod
|
388 |
+
def get_gender_assumption_for_system_prompt(sample:str) -> typing.List[str]:
|
389 |
+
|
390 |
+
"""
|
391 |
+
|
392 |
+
Gets the gender assumptions for a text sample.
|
393 |
+
|
394 |
+
Parameters:
|
395 |
+
sample (str) : The text to be analyzed.
|
396 |
+
|
397 |
+
Returns:
|
398 |
+
genders (list[str]) : The gender assumptions.
|
399 |
+
|
400 |
+
"""
|
401 |
+
|
402 |
+
names_with_positions = GenderUtil.find_english_words(sample)
|
403 |
+
potential_names_with_positions = [(name, pos) for name, pos in names_with_positions if GenderUtil.is_potential_name(name)]
|
404 |
+
grouped_names = GenderUtil.group_names(sample, potential_names_with_positions)
|
405 |
+
actual_names = GenderUtil.discard_non_names(grouped_names)
|
406 |
+
filtered_names = GenderUtil.discard_similar_names(actual_names)
|
407 |
+
|
408 |
+
assumptions = [
|
409 |
+
"{} : {}\n".format(name, gender[0]) if gender and len(set(gender)) == 1 and gender not in ["Undetermined", "Unknown"] else "{} : Undetermined\n".format(name)
|
410 |
+
for name in filtered_names
|
411 |
+
for gender in [GenderUtil.find_name_gender(name)]
|
412 |
+
]
|
413 |
+
|
414 |
+
gender_assumptions = [
|
415 |
+
"{} : {}\n".format(name.strip(), gender.strip().replace("Unknown", "Undetermined"))
|
416 |
+
for assumption in assumptions
|
417 |
+
for name, gender in [assumption.split(":")]
|
418 |
+
]
|
419 |
+
|
420 |
+
return gender_assumptions
|
modules/common/toolkit.py
CHANGED
@@ -15,7 +15,7 @@ class Toolkit():
|
|
15 |
|
16 |
"""
|
17 |
|
18 |
-
CURRENT_VERSION = "v3.4.
|
19 |
|
20 |
##-------------------start-of-clear_console()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
21 |
|
|
|
15 |
|
16 |
"""
|
17 |
|
18 |
+
CURRENT_VERSION = "v3.4.9-beta"
|
19 |
|
20 |
##-------------------start-of-clear_console()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
21 |
|
modules/common/translator.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import typing
|
3 |
import base64
|
4 |
import re
|
|
|
5 |
import time
|
6 |
import typing
|
7 |
import asyncio
|
@@ -21,6 +22,7 @@ from modules.common.file_ensurer import FileEnsurer
|
|
21 |
from modules.common.toolkit import Toolkit
|
22 |
from modules.common.exceptions import OpenAIAuthenticationError, MaxBatchDurationExceededException, DeepLAuthorizationException, OpenAIInternalServerError, OpenAIRateLimitError, OpenAIAPITimeoutError, GoogleAuthError, OpenAIAPIStatusError, OpenAIAPIConnectionError, DeepLException, GoogleAPIError
|
23 |
from modules.common.decorators import permission_error_decorator
|
|
|
24 |
|
25 |
##-------------------start-of-Translator--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
26 |
|
@@ -74,6 +76,8 @@ class Translator:
|
|
74 |
number_of_malformed_batch_retries:int
|
75 |
batch_retry_timeout:float
|
76 |
num_concurrent_batches:int
|
|
|
|
|
77 |
|
78 |
decorator_to_use:typing.Callable
|
79 |
|
@@ -432,6 +436,15 @@ class Translator:
|
|
432 |
|
433 |
"""
|
434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
435 |
logging.debug(f"Translator Activated, Translation Method : {Translator.TRANSLATION_METHOD} "
|
436 |
f"Settings are as follows : ")
|
437 |
|
@@ -444,6 +457,10 @@ class Translator:
|
|
444 |
Translator.num_of_malform_retries = int(JsonHandler.current_translation_settings["base translation settings"]["number_of_malformed_batch_retries"])
|
445 |
Translator.max_batch_duration = float(JsonHandler.current_translation_settings["base translation settings"]["batch_retry_timeout"])
|
446 |
Translator.num_concurrent_batches = int(JsonHandler.current_translation_settings["base translation settings"]["number_of_concurrent_batches"])
|
|
|
|
|
|
|
|
|
447 |
|
448 |
Translator._semaphore = asyncio.Semaphore(Translator.num_concurrent_batches)
|
449 |
|
@@ -552,36 +569,39 @@ class Translator:
|
|
552 |
"""
|
553 |
|
554 |
async_requests = []
|
|
|
555 |
|
556 |
translation_batches_methods = {
|
557 |
-
|
558 |
-
"gemini"
|
559 |
-
"deepl": Translator.deepl_translation_batches,
|
560 |
-
"google translate": Translator.google_translate_translation_batches
|
561 |
}
|
562 |
|
563 |
translation_batches = translation_batches_methods[Translator.TRANSLATION_METHOD]
|
564 |
batch_length = len(translation_batches)
|
565 |
-
batch_number = 1 # Initialize batch number
|
566 |
|
|
|
567 |
if(Translator.TRANSLATION_METHOD not in ["deepl", "google translate"]):
|
568 |
-
for
|
569 |
-
instructions = translation_batches[i]
|
570 |
-
prompt = translation_batches[i + 1]
|
571 |
-
|
572 |
assert isinstance(instructions, (SystemTranslationMessage, str))
|
573 |
assert isinstance(prompt, (ModelTranslationMessage, str))
|
574 |
|
575 |
-
|
576 |
-
|
|
|
|
|
577 |
|
|
|
|
|
|
|
|
|
578 |
else:
|
579 |
-
for
|
580 |
assert isinstance(batch, str)
|
581 |
-
|
582 |
async_requests.append(Translator.handle_translation(model, batch_number, batch_length, batch, None))
|
583 |
-
batch_number += 1
|
584 |
|
|
|
|
|
585 |
return async_requests
|
586 |
|
587 |
##-------------------start-of-generate_text_to_translate_batches()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
@@ -675,32 +695,6 @@ class Translator:
|
|
675 |
elif(Translator.TRANSLATION_METHOD == 'google translate'):
|
676 |
Translator.google_translate_translation_batches.append(batch)
|
677 |
|
678 |
-
logging_message = "Built Messages: \n\n"
|
679 |
-
|
680 |
-
batches_to_iterate = {
|
681 |
-
"openai": Translator.openai_translation_batches,
|
682 |
-
"gemini": Translator.gemini_translation_batches,
|
683 |
-
"deepl": Translator.deepl_translation_batches,
|
684 |
-
"google translate": Translator.google_translate_translation_batches
|
685 |
-
}
|
686 |
-
|
687 |
-
i = 0
|
688 |
-
|
689 |
-
batches = batches_to_iterate[Translator.TRANSLATION_METHOD]
|
690 |
-
|
691 |
-
for message in batches:
|
692 |
-
|
693 |
-
i+=1
|
694 |
-
|
695 |
-
message = str(message) if Translator.TRANSLATION_METHOD != 'openai' else message.content # type: ignore
|
696 |
-
|
697 |
-
if(i % 2 == 1 and Translator.TRANSLATION_METHOD not in ['deepl', 'google_translate']):
|
698 |
-
logging_message += "\n" "------------------------" "\n"
|
699 |
-
|
700 |
-
logging_message += message + "\n"
|
701 |
-
|
702 |
-
logging.debug(logging_message)
|
703 |
-
|
704 |
##-------------------start-of-handle_cost_estimate_prompt()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
705 |
|
706 |
@staticmethod
|
|
|
2 |
import typing
|
3 |
import base64
|
4 |
import re
|
5 |
+
import shutil
|
6 |
import time
|
7 |
import typing
|
8 |
import asyncio
|
|
|
22 |
from modules.common.toolkit import Toolkit
|
23 |
from modules.common.exceptions import OpenAIAuthenticationError, MaxBatchDurationExceededException, DeepLAuthorizationException, OpenAIInternalServerError, OpenAIRateLimitError, OpenAIAPITimeoutError, GoogleAuthError, OpenAIAPIStatusError, OpenAIAPIConnectionError, DeepLException, GoogleAPIError
|
24 |
from modules.common.decorators import permission_error_decorator
|
25 |
+
from modules.common.gender_util import GenderUtil
|
26 |
|
27 |
##-------------------start-of-Translator--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
28 |
|
|
|
76 |
number_of_malformed_batch_retries:int
|
77 |
batch_retry_timeout:float
|
78 |
num_concurrent_batches:int
|
79 |
+
gender_context_insertion:bool
|
80 |
+
is_cote:bool
|
81 |
|
82 |
decorator_to_use:typing.Callable
|
83 |
|
|
|
436 |
|
437 |
"""
|
438 |
|
439 |
+
if(os.path.exists(FileEnsurer.external_translation_genders_path) and not is_webgui):
|
440 |
+
logging.info("External genders.json file found, overriding config...")
|
441 |
+
shutil.copy2(FileEnsurer.external_translation_genders_path, FileEnsurer.config_translation_genders_path)
|
442 |
+
|
443 |
+
if(not os.path.exists(FileEnsurer.external_translation_settings_path) and not is_webgui):
|
444 |
+
logging.info("External translation_settings.json file not found, using config...")
|
445 |
+
print("External translation_settings.json file not found, using config...")
|
446 |
+
time.sleep(2)
|
447 |
+
|
448 |
logging.debug(f"Translator Activated, Translation Method : {Translator.TRANSLATION_METHOD} "
|
449 |
f"Settings are as follows : ")
|
450 |
|
|
|
457 |
Translator.num_of_malform_retries = int(JsonHandler.current_translation_settings["base translation settings"]["number_of_malformed_batch_retries"])
|
458 |
Translator.max_batch_duration = float(JsonHandler.current_translation_settings["base translation settings"]["batch_retry_timeout"])
|
459 |
Translator.num_concurrent_batches = int(JsonHandler.current_translation_settings["base translation settings"]["number_of_concurrent_batches"])
|
460 |
+
Translator.gender_context_insertion = bool(JsonHandler.current_translation_settings["base translation settings"]["gender_context_insertion"])
|
461 |
+
Translator.is_cote = bool(JsonHandler.current_translation_settings["base translation settings"]["is_cote"])
|
462 |
+
|
463 |
+
GenderUtil.is_cote = Translator.is_cote
|
464 |
|
465 |
Translator._semaphore = asyncio.Semaphore(Translator.num_concurrent_batches)
|
466 |
|
|
|
569 |
"""
|
570 |
|
571 |
async_requests = []
|
572 |
+
logging_message = "Built Messages: \n\n"
|
573 |
|
574 |
translation_batches_methods = {
|
575 |
+
method_name: getattr(Translator, f"{method_name}_translation_batches" if method_name != "google translate" else "google_translate_translation_batches")
|
576 |
+
for method_name in ["openai", "gemini", "deepl", "google translate"]
|
|
|
|
|
577 |
}
|
578 |
|
579 |
translation_batches = translation_batches_methods[Translator.TRANSLATION_METHOD]
|
580 |
batch_length = len(translation_batches)
|
|
|
581 |
|
582 |
+
## if openai/gemini which are llm, they have the instructions/prompt format
|
583 |
if(Translator.TRANSLATION_METHOD not in ["deepl", "google translate"]):
|
584 |
+
for batch_number, (instructions, prompt) in enumerate(zip(translation_batches[::2], translation_batches[1::2]), start=1):
|
|
|
|
|
|
|
585 |
assert isinstance(instructions, (SystemTranslationMessage, str))
|
586 |
assert isinstance(prompt, (ModelTranslationMessage, str))
|
587 |
|
588 |
+
if(Translator.gender_context_insertion):
|
589 |
+
assumption = list(set(GenderUtil.get_gender_assumption_for_system_prompt(prompt if isinstance(prompt, str) else prompt.content)))
|
590 |
+
assumption_string = "Additional Information:\nCharacter Genders:\n" + "".join(assumption) if len(assumption) > 0 else ""
|
591 |
+
instructions = SystemTranslationMessage(content=f"{instructions.content if isinstance(instructions, Message) else instructions}\n{assumption_string}")
|
592 |
|
593 |
+
logging_message += f"\n------------------------\n{instructions.content if isinstance(instructions, Message) else instructions}\n{prompt if isinstance(prompt, str) else prompt.content}"
|
594 |
+
async_requests.append(Translator.handle_translation(model, batch_number, batch_length//2, prompt, instructions))
|
595 |
+
|
596 |
+
## if deepl/google translate, they only have the prompt
|
597 |
else:
|
598 |
+
for batch_number, batch in enumerate(translation_batches, start=1):
|
599 |
assert isinstance(batch, str)
|
600 |
+
logging_message += f"\n------------------------\n{batch}"
|
601 |
async_requests.append(Translator.handle_translation(model, batch_number, batch_length, batch, None))
|
|
|
602 |
|
603 |
+
logging.debug(logging_message)
|
604 |
+
|
605 |
return async_requests
|
606 |
|
607 |
##-------------------start-of-generate_text_to_translate_batches()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
695 |
elif(Translator.TRANSLATION_METHOD == 'google translate'):
|
696 |
Translator.google_translate_translation_batches.append(batch)
|
697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
698 |
##-------------------start-of-handle_cost_estimate_prompt()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
699 |
|
700 |
@staticmethod
|
modules/gui/gui_json_util.py
CHANGED
@@ -56,7 +56,8 @@ class GuiJsonUtil:
|
|
56 |
|
57 |
for header in new_rules.keys():
|
58 |
for key, value in new_values:
|
59 |
-
new_rules[header]
|
|
|
60 |
|
61 |
JsonHandler.current_translation_settings = new_rules
|
62 |
JsonHandler.validate_json()
|
|
|
56 |
|
57 |
for header in new_rules.keys():
|
58 |
for key, value in new_values:
|
59 |
+
if(key in new_rules[header]):
|
60 |
+
new_rules[header][key] = JsonHandler.convert_to_correct_type(key, str(value))
|
61 |
|
62 |
JsonHandler.current_translation_settings = new_rules
|
63 |
JsonHandler.validate_json()
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
backoff==2.2.1
|
2 |
gradio==4.20.0
|
3 |
-
kairyou==1.6.
|
4 |
-
easytl==0.4.
|
5 |
ja_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/ja_core_news_lg-3.7.0/ja_core_news_lg-3.7.0-py3-none-any.whl#sha256=f08eecb4d40523045c9478ce59a67564fd71edd215f32c076fa91dc1f05cc7fd
|
|
|
1 |
backoff==2.2.1
|
2 |
gradio==4.20.0
|
3 |
+
kairyou==1.6.5
|
4 |
+
easytl==0.4.3
|
5 |
ja_core_news_lg @ https://github.com/explosion/spacy-models/releases/download/ja_core_news_lg-3.7.0/ja_core_news_lg-3.7.0-py3-none-any.whl#sha256=f08eecb4d40523045c9478ce59a67564fd71edd215f32c076fa91dc1f05cc7fd
|
util/token_counter.py
CHANGED
@@ -67,7 +67,7 @@ class TokenCounter:
|
|
67 |
print("\nNote that the cost estimate is not always accurate, and may be higher than the actual cost. However cost calculation now includes output tokens.\n")
|
68 |
|
69 |
if(self.service == "gemini"):
|
70 |
-
print(f"As of Kudasai {Toolkit.CURRENT_VERSION}, Gemini Pro 1.0 is free to use under
|
71 |
|
72 |
print("Estimated number of tokens : " + str(num_tokens))
|
73 |
print("Estimated minimum cost : " + str(min_cost) + " USD")
|
|
|
67 |
print("\nNote that the cost estimate is not always accurate, and may be higher than the actual cost. However cost calculation now includes output tokens.\n")
|
68 |
|
69 |
if(self.service == "gemini"):
|
70 |
+
print(f"As of Kudasai {Toolkit.CURRENT_VERSION}, Gemini Pro 1.0 is free to use under 15 requests per minute, Gemini Pro 1.5 is free to use under 2 requests per minute.\nIt is up to you to set these in the settings json.\nIt is currently unknown whether the ultra model parameter is connecting to the actual ultra model and not a pro one. As it works, but does not appear on any documentation.\n")
|
71 |
|
72 |
print("Estimated number of tokens : " + str(num_tokens))
|
73 |
print("Estimated minimum cost : " + str(min_cost) + " USD")
|
webgui.py
CHANGED
@@ -3,6 +3,7 @@ import typing
|
|
3 |
import base64
|
4 |
import asyncio
|
5 |
import os
|
|
|
6 |
|
7 |
## third-party libraries
|
8 |
import gradio as gr
|
@@ -55,35 +56,37 @@ class KudasaiGUI:
|
|
55 |
"number_of_malformed_batch_retries": lines[12-1].strip(),
|
56 |
"batch_retry_timeout": lines[14-1].strip(),
|
57 |
"number_of_concurrent_batches": lines[16-1].strip(),
|
58 |
-
"
|
59 |
-
"
|
60 |
-
"
|
61 |
-
"
|
62 |
-
"
|
63 |
-
"
|
64 |
-
"
|
65 |
-
"
|
66 |
-
"
|
67 |
-
"
|
68 |
-
"
|
69 |
-
"
|
70 |
-
"
|
71 |
-
"
|
72 |
-
"
|
73 |
-
"
|
74 |
-
"
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
-
"
|
81 |
-
"
|
82 |
-
"
|
83 |
-
"
|
84 |
-
"
|
85 |
-
"
|
86 |
-
"
|
|
|
|
|
87 |
}
|
88 |
|
89 |
##-------------------start-of-build_gui()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
@@ -96,7 +99,7 @@ class KudasaiGUI:
|
|
96 |
|
97 |
"""
|
98 |
|
99 |
-
with gr.Blocks(title="Kudasai") as self.gui:
|
100 |
|
101 |
##-------------------start-of-Utility-Functions---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
102 |
|
@@ -259,30 +262,32 @@ class KudasaiGUI:
|
|
259 |
4: "number_of_malformed_batch_retries",
|
260 |
5: "batch_retry_timeout",
|
261 |
6: "number_of_concurrent_batches",
|
262 |
-
7: "
|
263 |
-
8: "
|
264 |
-
9: "
|
265 |
-
10: "
|
266 |
-
11: "
|
267 |
-
12: "
|
268 |
-
13: "
|
269 |
-
14: "
|
270 |
-
15: "
|
271 |
-
16: "
|
272 |
-
17: "
|
273 |
-
18: "
|
274 |
-
19: "
|
275 |
-
20: "
|
276 |
-
21: "
|
277 |
-
22: "
|
278 |
-
23: "
|
279 |
-
24: "
|
280 |
-
25: "
|
281 |
-
26: "
|
282 |
-
27: "
|
283 |
-
28: "
|
284 |
-
29: "
|
285 |
-
30: "
|
|
|
|
|
286 |
}
|
287 |
|
288 |
for index, setting in enumerate(translation_settings):
|
@@ -381,7 +386,8 @@ class KudasaiGUI:
|
|
381 |
with gr.Column():
|
382 |
self.input_txt_file_translator = gr.File(label='TXT file with Japanese Text', file_count='single', file_types=['.txt'], type='filepath', interactive=True)
|
383 |
self.input_text_translator = gr.Textbox(label='Japanese Text', placeholder='Use this or the text file input, if you provide both, Kudasai will use the file input.', lines=10, show_label=True, interactive=True, type='text')
|
384 |
-
self.input_translation_rules_file = gr.File(value = FileEnsurer.config_translation_settings_path, label='Translation Settings File', file_count='single', file_types=['.json'], type='filepath')
|
|
|
385 |
|
386 |
with gr.Row():
|
387 |
self.llm_option_dropdown = gr.Dropdown(label='Translation Method', choices=["OpenAI", "Gemini", "DeepL", "Google Translate"], value="DeepL", show_label=True, interactive=True)
|
@@ -488,6 +494,20 @@ class KudasaiGUI:
|
|
488 |
interactive=True,
|
489 |
elem_id="number_of_concurrent_batches",
|
490 |
show_copy_button=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
491 |
|
492 |
with gr.Column():
|
493 |
|
@@ -956,17 +976,14 @@ class KudasaiGUI:
|
|
956 |
JsonHandler.current_translation_settings = GuiJsonUtil.current_translation_settings
|
957 |
|
958 |
## next, set the llm type
|
959 |
-
|
960 |
-
|
961 |
-
|
962 |
-
|
963 |
-
|
964 |
-
|
965 |
-
|
966 |
-
|
967 |
-
|
968 |
-
elif(translation_method == "Google Translate"):
|
969 |
-
Translator.TRANSLATION_METHOD = "google translate"
|
970 |
|
971 |
## api key as well
|
972 |
await set_translator_api_key(api_key)
|
@@ -1131,7 +1148,7 @@ class KudasaiGUI:
|
|
1131 |
|
1132 |
##-------------------start-of-clear_translator_tab()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1133 |
|
1134 |
-
def clear_translator_tab() -> typing.Tuple[None, str, gr.File, str, str, str]:
|
1135 |
|
1136 |
"""
|
1137 |
|
@@ -1140,6 +1157,8 @@ class KudasaiGUI:
|
|
1140 |
Returns:
|
1141 |
input_txt_file_translator (gr.File) : An empty file.
|
1142 |
input_text_translator (str) : An empty string.
|
|
|
|
|
1143 |
translator_translated_text_output_field (str) : An empty string.
|
1144 |
je_check_text_field_translator (str) : An empty string.
|
1145 |
translator_debug_log_output_field (str) : An empty string.
|
@@ -1157,13 +1176,16 @@ class KudasaiGUI:
|
|
1157 |
input_text_translator = ""
|
1158 |
|
1159 |
## Also gonna want to reset the json input field to the default json file
|
1160 |
-
input_translation_rules_file = gr.File(value = FileEnsurer.config_translation_settings_path, label='Translation Settings File', file_count='single', file_types=['.json'], type='filepath')
|
|
|
|
|
|
|
1161 |
|
1162 |
translator_translated_text_output_field = ""
|
1163 |
je_check_text_field_translator = ""
|
1164 |
translator_debug_log_output_field = ""
|
1165 |
|
1166 |
-
return input_file_translator, input_text_translator, input_translation_rules_file, translator_translated_text_output_field, je_check_text_field_translator, translator_debug_log_output_field
|
1167 |
|
1168 |
##-------------------start-of-clear_log_button_click()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1169 |
|
@@ -1197,6 +1219,8 @@ class KudasaiGUI:
|
|
1197 |
num_malformed_batch_retries:int,
|
1198 |
batch_retry_timeout:int,
|
1199 |
num_concurrent_batches:int,
|
|
|
|
|
1200 |
openai_model:str,
|
1201 |
openai_system_message:str,
|
1202 |
openai_temperature:float,
|
@@ -1241,6 +1265,8 @@ class KudasaiGUI:
|
|
1241 |
num_malformed_batch_retries,
|
1242 |
batch_retry_timeout,
|
1243 |
num_concurrent_batches,
|
|
|
|
|
1244 |
openai_model,
|
1245 |
openai_system_message,
|
1246 |
openai_temperature,
|
@@ -1270,9 +1296,9 @@ class KudasaiGUI:
|
|
1270 |
|
1271 |
## create the new key-value pair list
|
1272 |
new_key_value_tuple_pairs = create_new_key_value_tuple_pairs(settings_list)
|
1273 |
-
|
1274 |
try:
|
1275 |
-
## and then have the GuiJsonUtil apply the new translator settings
|
1276 |
GuiJsonUtil.update_translation_settings_with_new_values(input_translation_rules_file, new_key_value_tuple_pairs)
|
1277 |
|
1278 |
except:
|
@@ -1303,6 +1329,8 @@ class KudasaiGUI:
|
|
1303 |
("base translation settings", "number_of_malformed_batch_retries", str),
|
1304 |
("base translation settings", "batch_retry_timeout", str),
|
1305 |
("base translation settings", "number_of_concurrent_batches", str),
|
|
|
|
|
1306 |
("openai settings", "openai_model", str),
|
1307 |
("openai settings", "openai_system_message", str),
|
1308 |
("openai settings", "openai_temperature", float),
|
@@ -1360,6 +1388,8 @@ class KudasaiGUI:
|
|
1360 |
("base translation settings", "number_of_malformed_batch_retries", str),
|
1361 |
("base translation settings", "batch_retry_timeout", str),
|
1362 |
("base translation settings", "number_of_concurrent_batches", str),
|
|
|
|
|
1363 |
("openai settings", "openai_model", str),
|
1364 |
("openai settings", "openai_system_message", str),
|
1365 |
("openai settings", "openai_temperature", float),
|
@@ -1393,6 +1423,51 @@ class KudasaiGUI:
|
|
1393 |
raise gr.Error("Invalid Custom Translation Settings File")
|
1394 |
|
1395 |
return return_batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1396 |
|
1397 |
##-------------------start-of-clear_translation_settings_input_fields()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1398 |
|
@@ -1414,6 +1489,8 @@ class KudasaiGUI:
|
|
1414 |
"num_malformed_batch_retries_value": None,
|
1415 |
"batch_retry_timeout_value": None,
|
1416 |
"num_concurrent_batches_value": None,
|
|
|
|
|
1417 |
"openai_model_value": None,
|
1418 |
"openai_system_message_value": None,
|
1419 |
"openai_temperature_value": None,
|
@@ -1566,8 +1643,6 @@ class KudasaiGUI:
|
|
1566 |
self.logging_tab_debug_log_output_field, ## debug log on log tab
|
1567 |
self.logging_tab_error_log_output_field]) ## error log on log tab
|
1568 |
|
1569 |
-
|
1570 |
-
|
1571 |
##-------------------start-of-translate_with_translator()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1572 |
|
1573 |
## for the actual translation, and the je check text
|
@@ -1592,7 +1667,6 @@ class KudasaiGUI:
|
|
1592 |
|
1593 |
every=.1) ## update every 100ms
|
1594 |
|
1595 |
-
|
1596 |
##-------------------start-of translator_calculate_costs_button_click()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1597 |
|
1598 |
self.translator_calculate_cost_button.click(translator_calculate_costs_button_click,
|
@@ -1641,6 +1715,7 @@ class KudasaiGUI:
|
|
1641 |
self.input_txt_file_translator, ## input txt file
|
1642 |
self.input_text_translator, ## input text
|
1643 |
self.input_translation_rules_file, ## Translation Settings File
|
|
|
1644 |
self.translator_translated_text_output_field, ## translation output field
|
1645 |
self.translator_je_check_text_output_field, ## je check text field on translator tab
|
1646 |
self.translator_debug_log_output_field], ## debug log on translator tab
|
@@ -1668,6 +1743,8 @@ class KudasaiGUI:
|
|
1668 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1669 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1670 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
|
|
|
|
1671 |
self.openai_model_input_field, ## openai model input field
|
1672 |
self.openai_system_message_input_field, ## openai system message input field
|
1673 |
self.openai_temperature_input_field, ## openai temperature input field
|
@@ -1708,6 +1785,8 @@ class KudasaiGUI:
|
|
1708 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1709 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1710 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
|
|
|
|
1711 |
self.openai_model_input_field, ## openai model input field
|
1712 |
self.openai_system_message_input_field, ## openai system message input field
|
1713 |
self.openai_temperature_input_field, ## openai temperature input field
|
@@ -1746,6 +1825,8 @@ class KudasaiGUI:
|
|
1746 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1747 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1748 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
|
|
|
|
1749 |
self.openai_model_input_field, ## openai model input field
|
1750 |
self.openai_system_message_input_field, ## openai system message input field
|
1751 |
self.openai_temperature_input_field, ## openai temperature input field
|
@@ -1784,6 +1865,8 @@ class KudasaiGUI:
|
|
1784 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1785 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1786 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
|
|
|
|
1787 |
self.openai_model_input_field, ## openai model input field
|
1788 |
self.openai_system_message_input_field, ## openai system message input field
|
1789 |
self.openai_temperature_input_field, ## openai temperature input field
|
@@ -1821,6 +1904,8 @@ class KudasaiGUI:
|
|
1821 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1822 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1823 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
|
|
|
|
1824 |
self.openai_model_input_field, ## openai model input field
|
1825 |
self.openai_system_message_input_field, ## openai system message input field
|
1826 |
self.openai_temperature_input_field, ## openai temperature input field
|
@@ -1845,6 +1930,19 @@ class KudasaiGUI:
|
|
1845 |
self.deepl_split_sentences_input_field, ## deepl split sentences input field
|
1846 |
self.deepl_preserve_formatting_input_field, ## deepl preserve formatting input field
|
1847 |
self.deepl_formality_input_field]) ## deepl formality input field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1848 |
|
1849 |
##-------------------start-of-logging_tab.select()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1850 |
|
|
|
3 |
import base64
|
4 |
import asyncio
|
5 |
import os
|
6 |
+
import json
|
7 |
|
8 |
## third-party libraries
|
9 |
import gradio as gr
|
|
|
56 |
"number_of_malformed_batch_retries": lines[12-1].strip(),
|
57 |
"batch_retry_timeout": lines[14-1].strip(),
|
58 |
"number_of_concurrent_batches": lines[16-1].strip(),
|
59 |
+
"gender_context_insertion": lines[18-1].strip(),
|
60 |
+
"is_cote": lines[20-1].strip(),
|
61 |
+
"openai_help_link": lines[23-1].strip(),
|
62 |
+
"openai_model": lines[25-1].strip(),
|
63 |
+
"openai_system_message": lines[27-1].strip(),
|
64 |
+
"openai_temperature": lines[29-1].strip(),
|
65 |
+
"openai_top_p": lines[31-1].strip(),
|
66 |
+
"openai_n": lines[33-1].strip(),
|
67 |
+
"openai_stream": lines[35-1].strip(),
|
68 |
+
"openai_stop": lines[37-1].strip(),
|
69 |
+
"openai_logit_bias": lines[39-1].strip(),
|
70 |
+
"openai_max_tokens": lines[41-1].strip(),
|
71 |
+
"openai_presence_penalty": lines[43-1].strip(),
|
72 |
+
"openai_frequency_penalty": lines[45-1].strip(),
|
73 |
+
"openai_disclaimer": lines[47-1].strip(),
|
74 |
+
"gemini_help_link": lines[50-1].strip(),
|
75 |
+
"gemini_model": lines[52-1].strip(),
|
76 |
+
"gemini_prompt": lines[54-1].strip(),
|
77 |
+
"gemini_temperature": lines[56-1].strip(),
|
78 |
+
"gemini_top_p": lines[58-1].strip(),
|
79 |
+
"gemini_top_k": lines[60-1].strip(),
|
80 |
+
"gemini_candidate_count": lines[62-1].strip(),
|
81 |
+
"gemini_stream": lines[64-1].strip(),
|
82 |
+
"gemini_stop_sequences": lines[66-1].strip(),
|
83 |
+
"gemini_max_output_tokens": lines[68-1].strip(),
|
84 |
+
"gemini_disclaimer": lines[70-1].strip(),
|
85 |
+
"deepl_help_link": lines[73-1].strip(),
|
86 |
+
"deepl_context": lines[75-1].strip(),
|
87 |
+
"deepl_split_sentences": lines[77-1].strip(),
|
88 |
+
"deepl_preserve_formatting": lines[79-1].strip(),
|
89 |
+
"deepl_formality": lines[81-1].strip(),
|
90 |
}
|
91 |
|
92 |
##-------------------start-of-build_gui()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
|
99 |
|
100 |
"""
|
101 |
|
102 |
+
with gr.Blocks(title="Kudasai", delete_cache=(300, 300)) as self.gui:
|
103 |
|
104 |
##-------------------start-of-Utility-Functions---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
105 |
|
|
|
262 |
4: "number_of_malformed_batch_retries",
|
263 |
5: "batch_retry_timeout",
|
264 |
6: "number_of_concurrent_batches",
|
265 |
+
7: "gender_context_insertion",
|
266 |
+
8: "is_cote",
|
267 |
+
9: "openai_model",
|
268 |
+
10: "openai_system_message",
|
269 |
+
11: "openai_temperature",
|
270 |
+
12: "openai_top_p",
|
271 |
+
13: "openai_n",
|
272 |
+
14: "openai_stream",
|
273 |
+
15: "openai_stop",
|
274 |
+
16: "openai_logit_bias",
|
275 |
+
17: "openai_max_tokens",
|
276 |
+
18: "openai_presence_penalty",
|
277 |
+
19: "openai_frequency_penalty",
|
278 |
+
20: "gemini_model",
|
279 |
+
21: "gemini_prompt",
|
280 |
+
22: "gemini_temperature",
|
281 |
+
23: "gemini_top_p",
|
282 |
+
24: "gemini_top_k",
|
283 |
+
25: "gemini_candidate_count",
|
284 |
+
26: "gemini_stream",
|
285 |
+
27: "gemini_stop_sequences",
|
286 |
+
28: "gemini_max_output_tokens",
|
287 |
+
29: "deepl_context",
|
288 |
+
30: "deepl_split_sentences",
|
289 |
+
31: "deepl_preserve_formatting",
|
290 |
+
32: "deepl_formality",
|
291 |
}
|
292 |
|
293 |
for index, setting in enumerate(translation_settings):
|
|
|
386 |
with gr.Column():
|
387 |
self.input_txt_file_translator = gr.File(label='TXT file with Japanese Text', file_count='single', file_types=['.txt'], type='filepath', interactive=True)
|
388 |
self.input_text_translator = gr.Textbox(label='Japanese Text', placeholder='Use this or the text file input, if you provide both, Kudasai will use the file input.', lines=10, show_label=True, interactive=True, type='text')
|
389 |
+
self.input_translation_rules_file = gr.File(value = FileEnsurer.config_translation_settings_path, label='Translation Settings File', file_count='single', file_types=['.json'], type='filepath', interactive=True)
|
390 |
+
self.input_genders_file = gr.File(value=FileEnsurer.config_translation_genders_path, label='Genders.json File', file_count='single', file_types=['.json'], type='filepath', interactive=True)
|
391 |
|
392 |
with gr.Row():
|
393 |
self.llm_option_dropdown = gr.Dropdown(label='Translation Method', choices=["OpenAI", "Gemini", "DeepL", "Google Translate"], value="DeepL", show_label=True, interactive=True)
|
|
|
494 |
interactive=True,
|
495 |
elem_id="number_of_concurrent_batches",
|
496 |
show_copy_button=True)
|
497 |
+
|
498 |
+
self.gender_context_insertion_input_field = gr.Checkbox(label="Gender Context Insertion",
|
499 |
+
value=bool(GuiJsonUtil.fetch_translation_settings_key_values("base translation settings","gender_context_insertion")),
|
500 |
+
info=KudasaiGUI.description_dict.get("gender_context_insertion"),
|
501 |
+
show_label=True,
|
502 |
+
interactive=True,
|
503 |
+
elem_id="number_of_concurrent_batches")
|
504 |
+
|
505 |
+
self.is_cote_input_field = gr.Checkbox(label="Is Cote",
|
506 |
+
value=bool(GuiJsonUtil.fetch_translation_settings_key_values("base translation settings","is_cote")),
|
507 |
+
info=KudasaiGUI.description_dict.get("is_cote"),
|
508 |
+
show_label=True,
|
509 |
+
interactive=True,
|
510 |
+
elem_id="is_cote")
|
511 |
|
512 |
with gr.Column():
|
513 |
|
|
|
976 |
JsonHandler.current_translation_settings = GuiJsonUtil.current_translation_settings
|
977 |
|
978 |
## next, set the llm type
|
979 |
+
translation_methods = {
|
980 |
+
"OpenAI": "openai",
|
981 |
+
"Gemini": "gemini",
|
982 |
+
"DeepL": "deepl",
|
983 |
+
"Google Translate": "google translate"
|
984 |
+
}
|
985 |
+
|
986 |
+
Translator.TRANSLATION_METHOD = translation_methods.get(translation_method, "") # type: ignore
|
|
|
|
|
|
|
987 |
|
988 |
## api key as well
|
989 |
await set_translator_api_key(api_key)
|
|
|
1148 |
|
1149 |
##-------------------start-of-clear_translator_tab()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1150 |
|
1151 |
+
def clear_translator_tab() -> typing.Tuple[None, str, gr.File, gr.File, str, str, str]:
|
1152 |
|
1153 |
"""
|
1154 |
|
|
|
1157 |
Returns:
|
1158 |
input_txt_file_translator (gr.File) : An empty file.
|
1159 |
input_text_translator (str) : An empty string.
|
1160 |
+
input_translation_rules_file (gr.File) : An empty file.
|
1161 |
+
input_genders_file (gr.File) : An empty file.
|
1162 |
translator_translated_text_output_field (str) : An empty string.
|
1163 |
je_check_text_field_translator (str) : An empty string.
|
1164 |
translator_debug_log_output_field (str) : An empty string.
|
|
|
1176 |
input_text_translator = ""
|
1177 |
|
1178 |
## Also gonna want to reset the json input field to the default json file
|
1179 |
+
input_translation_rules_file = gr.File(value = FileEnsurer.config_translation_settings_path, label='Translation Settings File', file_count='single', file_types=['.json'], type='filepath', interactive=True)
|
1180 |
+
|
1181 |
+
## same for genders shit
|
1182 |
+
input_genders_file = gr.File(value=(FileEnsurer.config_translation_genders_path), label='Genders.json File', file_count='single', file_types=['.json'], type='filepath', interactive=True)
|
1183 |
|
1184 |
translator_translated_text_output_field = ""
|
1185 |
je_check_text_field_translator = ""
|
1186 |
translator_debug_log_output_field = ""
|
1187 |
|
1188 |
+
return input_file_translator, input_text_translator, input_translation_rules_file, input_genders_file, translator_translated_text_output_field, je_check_text_field_translator, translator_debug_log_output_field
|
1189 |
|
1190 |
##-------------------start-of-clear_log_button_click()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1191 |
|
|
|
1219 |
num_malformed_batch_retries:int,
|
1220 |
batch_retry_timeout:int,
|
1221 |
num_concurrent_batches:int,
|
1222 |
+
gender_context_insertion:bool,
|
1223 |
+
is_cote:bool,
|
1224 |
openai_model:str,
|
1225 |
openai_system_message:str,
|
1226 |
openai_temperature:float,
|
|
|
1265 |
num_malformed_batch_retries,
|
1266 |
batch_retry_timeout,
|
1267 |
num_concurrent_batches,
|
1268 |
+
gender_context_insertion,
|
1269 |
+
is_cote,
|
1270 |
openai_model,
|
1271 |
openai_system_message,
|
1272 |
openai_temperature,
|
|
|
1296 |
|
1297 |
## create the new key-value pair list
|
1298 |
new_key_value_tuple_pairs = create_new_key_value_tuple_pairs(settings_list)
|
1299 |
+
|
1300 |
try:
|
1301 |
+
## and then have the GuiJsonUtil apply the new translator settings
|
1302 |
GuiJsonUtil.update_translation_settings_with_new_values(input_translation_rules_file, new_key_value_tuple_pairs)
|
1303 |
|
1304 |
except:
|
|
|
1329 |
("base translation settings", "number_of_malformed_batch_retries", str),
|
1330 |
("base translation settings", "batch_retry_timeout", str),
|
1331 |
("base translation settings", "number_of_concurrent_batches", str),
|
1332 |
+
("base translation settings", "gender_context_insertion", bool),
|
1333 |
+
("base translation settings", "is_cote", bool),
|
1334 |
("openai settings", "openai_model", str),
|
1335 |
("openai settings", "openai_system_message", str),
|
1336 |
("openai settings", "openai_temperature", float),
|
|
|
1388 |
("base translation settings", "number_of_malformed_batch_retries", str),
|
1389 |
("base translation settings", "batch_retry_timeout", str),
|
1390 |
("base translation settings", "number_of_concurrent_batches", str),
|
1391 |
+
("base translation settings", "gender_context_insertion", bool),
|
1392 |
+
("base translation settings", "is_cote", bool),
|
1393 |
("openai settings", "openai_model", str),
|
1394 |
("openai settings", "openai_system_message", str),
|
1395 |
("openai settings", "openai_temperature", float),
|
|
|
1423 |
raise gr.Error("Invalid Custom Translation Settings File")
|
1424 |
|
1425 |
return return_batch
|
1426 |
+
|
1427 |
+
##-------------------start-of-set_genders_file()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1428 |
+
|
1429 |
+
def set_genders_file(input_gender_file:gr.File):
|
1430 |
+
|
1431 |
+
"""
|
1432 |
+
|
1433 |
+
Sets the genders file.
|
1434 |
+
|
1435 |
+
"""
|
1436 |
+
|
1437 |
+
try:
|
1438 |
+
|
1439 |
+
contents = gui_get_json_from_file(input_gender_file)
|
1440 |
+
|
1441 |
+
with open(FileEnsurer.config_translation_genders_path, "w", encoding="utf-8") as f:
|
1442 |
+
json.dump(contents, f, indent=4)
|
1443 |
+
|
1444 |
+
except Exception as e:
|
1445 |
+
raise gr.Error(f"Error {e} occurred while setting genders file.")
|
1446 |
+
|
1447 |
+
##----------------start-of-clear_genders_file()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1448 |
+
|
1449 |
+
|
1450 |
+
def clear_genders_file(temp_file:gr.File) -> None:
|
1451 |
+
|
1452 |
+
"""
|
1453 |
+
|
1454 |
+
Clears the genders file.
|
1455 |
+
|
1456 |
+
"""
|
1457 |
+
|
1458 |
+
try:
|
1459 |
+
|
1460 |
+
with open(FileEnsurer.config_translation_genders_path, "w", encoding="utf-8") as f:
|
1461 |
+
json.dump(FileEnsurer.DEFAULT_GENDER_SETTINGS, f, indent=4)
|
1462 |
+
|
1463 |
+
with open(temp_file.name, "w", encoding="utf-8") as f: # type: ignore
|
1464 |
+
json.dump(FileEnsurer.DEFAULT_GENDER_SETTINGS, f, indent=4)
|
1465 |
+
|
1466 |
+
except:
|
1467 |
+
|
1468 |
+
pass
|
1469 |
+
|
1470 |
+
return None
|
1471 |
|
1472 |
##-------------------start-of-clear_translation_settings_input_fields()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1473 |
|
|
|
1489 |
"num_malformed_batch_retries_value": None,
|
1490 |
"batch_retry_timeout_value": None,
|
1491 |
"num_concurrent_batches_value": None,
|
1492 |
+
"gender_context_insertion_value": None,
|
1493 |
+
"is_cote_value": None,
|
1494 |
"openai_model_value": None,
|
1495 |
"openai_system_message_value": None,
|
1496 |
"openai_temperature_value": None,
|
|
|
1643 |
self.logging_tab_debug_log_output_field, ## debug log on log tab
|
1644 |
self.logging_tab_error_log_output_field]) ## error log on log tab
|
1645 |
|
|
|
|
|
1646 |
##-------------------start-of-translate_with_translator()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1647 |
|
1648 |
## for the actual translation, and the je check text
|
|
|
1667 |
|
1668 |
every=.1) ## update every 100ms
|
1669 |
|
|
|
1670 |
##-------------------start-of translator_calculate_costs_button_click()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1671 |
|
1672 |
self.translator_calculate_cost_button.click(translator_calculate_costs_button_click,
|
|
|
1715 |
self.input_txt_file_translator, ## input txt file
|
1716 |
self.input_text_translator, ## input text
|
1717 |
self.input_translation_rules_file, ## Translation Settings File
|
1718 |
+
self.input_genders_file, ## Gender File
|
1719 |
self.translator_translated_text_output_field, ## translation output field
|
1720 |
self.translator_je_check_text_output_field, ## je check text field on translator tab
|
1721 |
self.translator_debug_log_output_field], ## debug log on translator tab
|
|
|
1743 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1744 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1745 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
1746 |
+
self.gender_context_insertion_input_field, ## gender context insertion input field
|
1747 |
+
self.is_cote_input_field, ## is cote input field
|
1748 |
self.openai_model_input_field, ## openai model input field
|
1749 |
self.openai_system_message_input_field, ## openai system message input field
|
1750 |
self.openai_temperature_input_field, ## openai temperature input field
|
|
|
1785 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1786 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1787 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
1788 |
+
self.gender_context_insertion_input_field, ## gender context insertion input field
|
1789 |
+
self.is_cote_input_field, ## is cote input field
|
1790 |
self.openai_model_input_field, ## openai model input field
|
1791 |
self.openai_system_message_input_field, ## openai system message input field
|
1792 |
self.openai_temperature_input_field, ## openai temperature input field
|
|
|
1825 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1826 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1827 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
1828 |
+
self.gender_context_insertion_input_field, ## gender context insertion input field
|
1829 |
+
self.is_cote_input_field, ## is cote input field
|
1830 |
self.openai_model_input_field, ## openai model input field
|
1831 |
self.openai_system_message_input_field, ## openai system message input field
|
1832 |
self.openai_temperature_input_field, ## openai temperature input field
|
|
|
1865 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1866 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1867 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
1868 |
+
self.gender_context_insertion_input_field, ## gender context insertion input field
|
1869 |
+
self.is_cote_input_field, ## is cote input field
|
1870 |
self.openai_model_input_field, ## openai model input field
|
1871 |
self.openai_system_message_input_field, ## openai system message input field
|
1872 |
self.openai_temperature_input_field, ## openai temperature input field
|
|
|
1904 |
self.number_of_malformed_batch_retries_input_field, ## num malformed batch retries input field
|
1905 |
self.batch_retry_timeout_input_field, ## batch retry timeout input field
|
1906 |
self.number_of_concurrent_batches_input_field, ## num concurrent batches input field
|
1907 |
+
self.gender_context_insertion_input_field, ## gender context insertion input field
|
1908 |
+
self.is_cote_input_field, ## is cote input field
|
1909 |
self.openai_model_input_field, ## openai model input field
|
1910 |
self.openai_system_message_input_field, ## openai system message input field
|
1911 |
self.openai_temperature_input_field, ## openai temperature input field
|
|
|
1930 |
self.deepl_split_sentences_input_field, ## deepl split sentences input field
|
1931 |
self.deepl_preserve_formatting_input_field, ## deepl preserve formatting input field
|
1932 |
self.deepl_formality_input_field]) ## deepl formality input field
|
1933 |
+
|
1934 |
+
|
1935 |
+
##----------------start-of-input_genders_file_upload()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1936 |
+
|
1937 |
+
self.input_genders_file.upload(set_genders_file,
|
1938 |
+
inputs=[self.input_genders_file],
|
1939 |
+
outputs=[]
|
1940 |
+
)
|
1941 |
+
|
1942 |
+
self.input_genders_file.clear(clear_genders_file,
|
1943 |
+
inputs=[self.input_genders_file],
|
1944 |
+
outputs=[self.input_genders_file]
|
1945 |
+
)
|
1946 |
|
1947 |
##-------------------start-of-logging_tab.select()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
1948 |
|