|
|
|
|
|
|
|
import string |
|
import time |
|
import re |
|
import base64 |
|
import time |
|
|
|
|
|
from translation_services.deepl_service import DeepLService |
|
from modules.common.toolkit import Toolkit |
|
from modules.common.file_ensurer import FileEnsurer |
|
from modules.common.logger import Logger |
|
from modules.common.decorators import permission_error_decorator |
|
from modules.common.exceptions import AuthorizationException, QuotaExceededException |
|
|
|
|
|
|
|
class Kaiseki: |
|
|
|
""" |
|
|
|
Kaiseki is a secondary class that is used to interact with the Deepl API and translate Japanese text sentence by sentence. |
|
|
|
Kaiseki is considered inferior to Kijiku, please consider using Kijiku instead. |
|
|
|
""" |
|
|
|
|
|
|
|
text_to_translate = [] |
|
|
|
translated_text = [] |
|
|
|
je_check_text = [] |
|
|
|
error_text = [] |
|
|
|
translation_print_result = "" |
|
|
|
|
|
|
|
sentence_parts = [] |
|
|
|
sentence_punctuation = [] |
|
|
|
|
|
special_punctuation = [] |
|
|
|
current_sentence = "" |
|
|
|
translated_sentence = "" |
|
|
|
|
|
|
|
@staticmethod |
|
def translate() -> None: |
|
|
|
""" |
|
|
|
Translates the text. |
|
|
|
""" |
|
|
|
Logger.clear_batch() |
|
|
|
time_start = time.time() |
|
|
|
try: |
|
|
|
Kaiseki.initialize() |
|
|
|
|
|
time_start = time.time() |
|
|
|
Kaiseki.commence_translation() |
|
|
|
except Exception as e: |
|
|
|
Kaiseki.translation_print_result += "An error has occurred, outputting results so far..." |
|
|
|
FileEnsurer.handle_critical_exception(e) |
|
|
|
finally: |
|
|
|
time_end = time.time() |
|
|
|
Kaiseki.assemble_results(time_start, time_end) |
|
|
|
|
|
|
|
@staticmethod |
|
def initialize() -> None: |
|
|
|
""" |
|
|
|
Initializes the Kaiseki class by getting the API key and creating the translator object. |
|
|
|
""" |
|
|
|
|
|
try: |
|
|
|
with open(FileEnsurer.deepl_api_key_path, 'r', encoding='utf-8') as file: |
|
api_key = base64.b64decode((file.read()).encode('utf-8')).decode('utf-8') |
|
|
|
DeepLService.set_api_key(api_key) |
|
DeepLService.test_api_key_validity() |
|
|
|
Logger.log_action("Used saved api key in " + FileEnsurer.deepl_api_key_path, output=True) |
|
|
|
|
|
except Exception as e: |
|
|
|
api_key = input("DO NOT DELETE YOUR COPY OF THE API KEY\n\nPlease enter the deepL api key you have : ") |
|
|
|
|
|
try: |
|
|
|
DeepLService.set_api_key(api_key) |
|
DeepLService.test_api_key_validity() |
|
|
|
time.sleep(.1) |
|
|
|
FileEnsurer.standard_overwrite_file(FileEnsurer.deepl_api_key_path, base64.b64encode(api_key.encode('utf-8')).decode('utf-8'), omit=True) |
|
|
|
time.sleep(.1) |
|
|
|
|
|
except AuthorizationException: |
|
|
|
Toolkit.clear_console() |
|
|
|
Logger.log_action("Authorization error with creating translator object, please double check your api key as it appears to be incorrect.\nKaiseki will now exit.", output=True) |
|
|
|
Toolkit.pause_console() |
|
|
|
raise e |
|
|
|
|
|
except Exception as e: |
|
|
|
Toolkit.clear_console() |
|
|
|
Logger.log_action("Unknown error with creating translator object, The error is as follows " + str(e) + "\nKaiseki will now exit.", output=True) |
|
|
|
Toolkit.pause_console() |
|
|
|
raise e |
|
|
|
Toolkit.clear_console() |
|
Logger.log_barrier() |
|
|
|
|
|
|
|
@staticmethod |
|
def reset_static_variables() -> None: |
|
|
|
""" |
|
|
|
Resets the static variables of the Kaiseki class. |
|
For when running multiple translations in a row through webgui. |
|
|
|
""" |
|
|
|
Logger.clear_batch() |
|
|
|
Kaiseki.text_to_translate = [] |
|
Kaiseki.translated_text = [] |
|
Kaiseki.je_check_text = [] |
|
Kaiseki.error_text = [] |
|
Kaiseki.translation_print_result = "" |
|
Kaiseki.sentence_parts = [] |
|
Kaiseki.sentence_punctuation = [] |
|
Kaiseki.special_punctuation = [] |
|
Kaiseki.current_sentence = "" |
|
Kaiseki.translated_sentence = "" |
|
|
|
|
|
|
|
@staticmethod |
|
def commence_translation() -> None: |
|
|
|
""" |
|
|
|
Commences the translation process using all the functions in the Kaiseki class. |
|
|
|
""" |
|
|
|
i = 0 |
|
|
|
while(i < len(Kaiseki.text_to_translate)): |
|
|
|
|
|
if(FileEnsurer.do_interrupt == True): |
|
raise Exception("Interrupted by user.") |
|
|
|
Kaiseki.current_sentence = Kaiseki.text_to_translate[i] |
|
|
|
Logger.log_action("Initial Sentence : " + Kaiseki.current_sentence) |
|
|
|
|
|
if(any(char in Kaiseki.current_sentence for char in ["▼", "△", "◇"])): |
|
Kaiseki.translated_text.append(Kaiseki.current_sentence + '\n') |
|
Logger.log_action("Sentence : " + Kaiseki.current_sentence + ", Sentence is a pov change... leaving intact.") |
|
|
|
elif("part" in Kaiseki.current_sentence.lower() or all(char in ["1","2","3","4","5","6","7","8","9", " "] for char in Kaiseki.current_sentence) and not all(char in [" "] for char in Kaiseki.current_sentence) and Kaiseki.current_sentence != '"..."' and Kaiseki.current_sentence != "..."): |
|
Kaiseki.translated_text.append(Kaiseki.current_sentence + '\n') |
|
Logger.log_action("Sentence : " + Kaiseki.current_sentence + ", Sentence is part marker... leaving intact.") |
|
|
|
elif bool(re.match(r'^[\W_\s\n-]+$', Kaiseki.current_sentence)) and not any(char in Kaiseki.current_sentence for char in ["」", "「", "«", "»"]): |
|
Logger.log_action("Sentence : " + Kaiseki.current_sentence + ", Sentence is punctuation... skipping.") |
|
Kaiseki.translated_text.append(Kaiseki.current_sentence + "\n") |
|
|
|
elif(bool(re.match(r'^[A-Za-z0-9\s\.,\'\?!]+\n*$', Kaiseki.current_sentence))): |
|
Logger.log_action("Sentence : " + Kaiseki.current_sentence + ", Sentence is english... skipping translation.") |
|
Kaiseki.translated_text.append(Kaiseki.current_sentence + "\n") |
|
|
|
elif(len(Kaiseki.current_sentence) == 0 or Kaiseki.current_sentence.isspace() == True): |
|
Logger.log_action("Sentence is empty... skipping translation.\n") |
|
Kaiseki.translated_text.append(Kaiseki.current_sentence + "\n") |
|
|
|
else: |
|
|
|
Kaiseki.separate_sentence() |
|
|
|
Kaiseki.translate_sentence() |
|
|
|
|
|
if(len(Kaiseki.translated_text[i]) > 0 and Kaiseki.translated_text[i] != "" and Kaiseki.translated_text[i][-2] not in string.punctuation and Kaiseki.sentence_punctuation[-1] == None): |
|
Kaiseki.translated_text[i] = Kaiseki.translated_text[i] + "." |
|
|
|
|
|
if(Kaiseki.special_punctuation[0] == True): |
|
Kaiseki.translated_text[i] = '"' + Kaiseki.translated_text[i] + '"' |
|
|
|
|
|
elif('"' in Kaiseki.translated_text[i]): |
|
Kaiseki.translated_text[i] = Kaiseki.translated_text[i].replace('"',"'") |
|
|
|
|
|
if(Kaiseki.special_punctuation[3] == True): |
|
Kaiseki.translated_text[i] = "'" + Kaiseki.translated_text[i] + "'" |
|
|
|
|
|
if(Kaiseki.special_punctuation[4] == True): |
|
Kaiseki.translated_text[i] = "(" + Kaiseki.translated_text[i] + ")" |
|
|
|
Logger.log_action("Translated and Reassembled Sentence : " + Kaiseki.translated_text[i]) |
|
|
|
Kaiseki.translated_text[i] += "\n" |
|
|
|
Kaiseki.je_check_text.append(str(i+1) + ": " + Kaiseki.current_sentence + "\n " + Kaiseki.translated_text[i] + "\n") |
|
|
|
i+=1 |
|
|
|
Toolkit.clear_console() |
|
|
|
Logger.log_action(str(i) + "/" + str(len(Kaiseki.text_to_translate)) + " completed.", output=True) |
|
Logger.log_barrier() |
|
|
|
|
|
|
|
@staticmethod |
|
def separate_sentence() -> None: |
|
|
|
""" |
|
|
|
This function separates the sentence into parts and punctuation. |
|
|
|
""" |
|
|
|
|
|
Kaiseki.sentence_parts = [] |
|
Kaiseki.sentence_punctuation = [] |
|
Kaiseki.special_punctuation = [False,False,False,False,False] |
|
|
|
i = 0 |
|
|
|
buildString = "" |
|
|
|
|
|
if('"' in Kaiseki.current_sentence): |
|
Kaiseki.current_sentence = Kaiseki.current_sentence.replace('"', '') |
|
Kaiseki.special_punctuation[0] = True |
|
|
|
|
|
if('~' in Kaiseki.current_sentence): |
|
Kaiseki.special_punctuation[1] = True |
|
|
|
|
|
if(Kaiseki.current_sentence.count("'") == 2 and (Kaiseki.current_sentence[0] != "'" and Kaiseki.current_sentence[-1] != "'")): |
|
Kaiseki.special_punctuation[2] = True |
|
|
|
|
|
elif(Kaiseki.current_sentence.count("'") == 2): |
|
Kaiseki.special_punctuation[3] = True |
|
Kaiseki.current_sentence = Kaiseki.current_sentence.replace("'", "") |
|
|
|
|
|
if("(" in Kaiseki.current_sentence and ")" in Kaiseki.current_sentence): |
|
Kaiseki.special_punctuation[4] = True |
|
Kaiseki.current_sentence= Kaiseki.current_sentence.replace("(","") |
|
Kaiseki.current_sentence= Kaiseki.current_sentence.replace(")","") |
|
|
|
while(i < len(Kaiseki.current_sentence)): |
|
|
|
if(Kaiseki.current_sentence[i] in [".","!","?","-"]): |
|
|
|
if(i+5 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+6] in ["......"]): |
|
|
|
if(i+6 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+7] in ["......'"]): |
|
buildString += "'" |
|
i+=1 |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i:i+6]) |
|
i+=5 |
|
buildString = "" |
|
|
|
if(i+4 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+5] in [".....","...!?"]): |
|
|
|
if(i+5 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+6] in [".....'","...!?'"]): |
|
buildString += "'" |
|
i+=1 |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i:i+5]) |
|
i+=4 |
|
buildString = "" |
|
|
|
elif(i+3 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+4] in ["...!","...?","---.","....","!..."]): |
|
|
|
if(i+4 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+5] in ["...!'","...?'","---.'","....'","!...'"]): |
|
buildString += "'" |
|
i+=1 |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i:i+4]) |
|
i+=3 |
|
buildString = "" |
|
|
|
elif(i+2 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+3] in ["---","..."]): |
|
|
|
if(i+3 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+4] in ["---'","...'"]): |
|
buildString += "'" |
|
i+=1 |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i:i+3]) |
|
i+=2 |
|
buildString = "" |
|
|
|
elif(i+1 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+2] == '!?'): |
|
|
|
if(i+2 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i:i+3] == "!?'"): |
|
buildString += "'" |
|
i+=1 |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i:i+2]) |
|
i+=1 |
|
buildString = "" |
|
|
|
|
|
elif(Kaiseki.current_sentence[i] != "-"): |
|
|
|
if(i+1 < len(Kaiseki.current_sentence) and Kaiseki.current_sentence[i+1] == "'"): |
|
buildString += "'" |
|
|
|
if(buildString != ""): |
|
Kaiseki.sentence_parts.append(buildString) |
|
|
|
Kaiseki.sentence_punctuation.append(Kaiseki.current_sentence[i]) |
|
buildString = "" |
|
|
|
|
|
else: |
|
buildString += Kaiseki.current_sentence[i] |
|
else: |
|
buildString += Kaiseki.current_sentence[i] |
|
|
|
i += 1 |
|
|
|
|
|
if(buildString): |
|
Kaiseki.sentence_parts.append(buildString) |
|
Kaiseki.sentence_punctuation.append(None) |
|
|
|
Logger.log_action("Fragmented Sentence Parts " + str(Kaiseki.sentence_parts)) |
|
Logger.log_action("Sentence Punctuation " + str(Kaiseki.sentence_punctuation)) |
|
Logger.log_action("Does Sentence Have Special Punctuation : " + str(Kaiseki.special_punctuation)) |
|
|
|
|
|
Kaiseki.sentence_parts = [part.strip() for part in Kaiseki.sentence_parts] |
|
|
|
|
|
|
|
@staticmethod |
|
def translate_sentence() -> None: |
|
|
|
""" |
|
|
|
This function translates each part of a sentence. |
|
|
|
""" |
|
|
|
i = 0 |
|
ii = 0 |
|
|
|
quote = "" |
|
error = "" |
|
|
|
tilde_active = False |
|
single_quote_active = False |
|
|
|
while(i < len(Kaiseki.sentence_parts)): |
|
|
|
|
|
if(Kaiseki.special_punctuation[1] == True and "~" in Kaiseki.sentence_parts[i]): |
|
Kaiseki.sentence_parts[i] = Kaiseki.sentence_parts[i].replace("~","") |
|
tilde_active = True |
|
|
|
|
|
if(Kaiseki.special_punctuation[2] == True and "'" in Kaiseki.sentence_parts[i] and (Kaiseki.sentence_parts[i][0] != "'" and Kaiseki.sentence_parts[i][-1] != "'")): |
|
|
|
sentence = Kaiseki.sentence_parts[i] |
|
substring_start = sentence.index("'") |
|
substring_end = 0 |
|
quote = "" |
|
|
|
ii = substring_start |
|
while(ii < len(sentence)): |
|
if(sentence[ii] == "'"): |
|
substring_end = ii |
|
ii+=1 |
|
|
|
quote = sentence[substring_start+1:substring_end] |
|
Kaiseki.sentence_parts[i] = sentence[:substring_start+1] + "quote" + sentence[substring_end:] |
|
|
|
single_quote_active = True |
|
|
|
try: |
|
results = DeepLService.translate(Kaiseki.sentence_parts[i], source_lang= "JA", target_lang="EN-US") |
|
|
|
translated_part = results.rstrip(''.join(c for c in string.punctuation if c not in "'\"")) |
|
translated_part = translated_part.rstrip() |
|
|
|
|
|
if(tilde_active == True): |
|
translated_part += "~" |
|
tilde_active = False |
|
|
|
|
|
if(single_quote_active == True): |
|
quote = DeepLService.translate(quote, source_lang= "JA", target_lang="EN-US") |
|
|
|
quote = quote.rstrip(''.join(c for c in string.punctuation if c not in "'\"")) |
|
quote = quote.rstrip() |
|
|
|
translated_part = translated_part.replace("'quote'","'" + quote + "'",1) |
|
|
|
|
|
if(len(Kaiseki.sentence_punctuation) > len(Kaiseki.sentence_parts)): |
|
Kaiseki.translated_sentence += Kaiseki.sentence_punctuation[0] |
|
Kaiseki.sentence_punctuation.pop(0) |
|
|
|
if(Kaiseki.sentence_punctuation[i] != None): |
|
Kaiseki.translated_sentence += translated_part + Kaiseki.sentence_punctuation[i] |
|
else: |
|
Kaiseki.translated_sentence += translated_part |
|
|
|
if(i != len(Kaiseki.sentence_punctuation)-1): |
|
Kaiseki.translated_sentence += " " |
|
|
|
except QuotaExceededException as e: |
|
|
|
Logger.log_action("DeepL API quota exceeded.", output=True) |
|
|
|
Toolkit.pause_console() |
|
|
|
raise e |
|
|
|
except ValueError as e: |
|
|
|
if(str(e) == "Text must not be empty."): |
|
Kaiseki.translated_sentence += "" |
|
else: |
|
Kaiseki.translated_sentence += "ERROR" |
|
error = str(e) |
|
|
|
Logger.log_action("Error is : " + error) |
|
Kaiseki.error_text.append("Error is : " + error) |
|
|
|
i+=1 |
|
|
|
Kaiseki.translated_text.append(Kaiseki.translated_sentence) |
|
Kaiseki.translated_sentence = "" |
|
|
|
|
|
|
|
@staticmethod |
|
def assemble_results(time_start:float, time_end:float) -> None: |
|
|
|
""" |
|
|
|
Prepares the results of the translation for printing. |
|
|
|
Parameters: |
|
time_start (float) : the time the translation started. |
|
time_end (float) : the time the translation ended. |
|
|
|
""" |
|
|
|
Kaiseki.translation_print_result += "Time Elapsed : " + Toolkit.get_elapsed_time(time_start, time_end) |
|
|
|
Kaiseki.translation_print_result += "\n\nDebug text have been written to : " + FileEnsurer.debug_log_path |
|
Kaiseki.translation_print_result += "\nJ->E text have been written to : " + FileEnsurer.je_check_path |
|
Kaiseki.translation_print_result += "\nTranslated text has been written to : " + FileEnsurer.translated_text_path |
|
Kaiseki.translation_print_result += "\nErrors have been written to : " + FileEnsurer.error_log_path + "\n" |
|
|
|
|
|
|
|
@staticmethod |
|
@permission_error_decorator() |
|
def write_kaiseki_results() -> None: |
|
|
|
""" |
|
|
|
This function is called to write the results of the Kaiseki translation module to the output directory. |
|
|
|
""" |
|
|
|
|
|
FileEnsurer.standard_create_directory(FileEnsurer.output_dir) |
|
|
|
with open(FileEnsurer.error_log_path, 'a+', encoding='utf-8') as file: |
|
file.writelines(Kaiseki.error_text) |
|
|
|
with open(FileEnsurer.je_check_path, 'w', encoding='utf-8') as file: |
|
file.writelines(Kaiseki.je_check_text) |
|
|
|
with open(FileEnsurer.translated_text_path, 'w', encoding='utf-8') as file: |
|
file.writelines(Kaiseki.translated_text) |
|
|
|
|
|
FileEnsurer.standard_create_directory(FileEnsurer.archive_dir) |
|
|
|
timestamp = Toolkit.get_timestamp(is_archival=True) |
|
|
|
|
|
Logger.push_batch() |
|
Logger.clear_batch() |
|
|
|
list_of_result_tuples = [('kaiseki_translated_text', Kaiseki.translated_text), |
|
('kaiseki_je_check_text', Kaiseki.je_check_text), |
|
('kaiseki_error_log', Kaiseki.error_text), |
|
('debug_log', FileEnsurer.standard_read_file(Logger.log_file_path))] |
|
|
|
FileEnsurer.archive_results(list_of_result_tuples, |
|
module='kaiseki', timestamp=timestamp) |
|
|