Spaces:

Bikatr7
/

Kudasai

Running

App Files Files Community

Kudasai / handlers /json_handler.py

Bikatr7

move to v3.4.9-beta

8f28c9c verified 3 days ago

raw history blame contribute delete

No virus

22.7 kB

	## built-in libraries
	import json
	import typing
	import logging

	## third-party libraries
	from easytl import ALLOWED_GEMINI_MODELS, ALLOWED_OPENAI_MODELS

	## custom modules
	from modules.common.file_ensurer import FileEnsurer
	from modules.common.toolkit import Toolkit

	##-------------------start-of-JsonHandler---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	class JsonHandler:

	"""

	Handles the translation_settings.json file and interactions with it.

	"""

	current_translation_settings = dict()

	with open(FileEnsurer.translation_settings_description_path, 'r', encoding='utf-8') as file:
	translation_settings_message = file.read()

	##-------------------start-of-validate_json()--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def validate_json() -> None:

	"""

	Validates the translation_settings.json file.

	"""

	base_translation_keys = [
	"prompt_assembly_mode",
	"number_of_lines_per_batch",
	"sentence_fragmenter_mode",
	"je_check_mode",
	"number_of_malformed_batch_retries",
	"batch_retry_timeout",
	"number_of_concurrent_batches",
	"gender_context_insertion",
	"is_cote"
	]

	openai_keys = [
	"openai_model",
	"openai_system_message",
	"openai_temperature",
	"openai_top_p",
	"openai_n",
	"openai_stream",
	"openai_stop",
	"openai_logit_bias",
	"openai_max_tokens",
	"openai_presence_penalty",
	"openai_frequency_penalty"
	]

	gemini_keys = [
	"gemini_model",
	"gemini_prompt",
	"gemini_temperature",
	"gemini_top_p",
	"gemini_top_k",
	"gemini_candidate_count",
	"gemini_stream",
	"gemini_stop_sequences",
	"gemini_max_output_tokens"
	]

	deepl_keys = [
	"deepl_context",
	"deepl_split_sentences",
	"deepl_preserve_formatting",
	"deepl_formality"
	]

	validation_rules = {
	"prompt_assembly_mode": lambda x: isinstance(x, int) and 1 <= x <= 2,
	"number_of_lines_per_batch": lambda x: isinstance(x, int) and x > 0,
	"sentence_fragmenter_mode": lambda x: isinstance(x, int) and 1 <= x <= 2,
	"je_check_mode": lambda x: isinstance(x, int) and 1 <= x <= 2,
	"number_of_malformed_batch_retries": lambda x: isinstance(x, int) and x >= 0,
	"batch_retry_timeout": lambda x: isinstance(x, int) and x >= 0,
	"gender_context_insertion": lambda x: isinstance(x, bool),
	"is_cote": lambda x: isinstance(x, bool),
	"number_of_concurrent_batches": lambda x: isinstance(x, int) and x >= 0,
	"openai_model": lambda x: isinstance(x, str) and x in ALLOWED_OPENAI_MODELS,
	"openai_system_message": lambda x: x not in ["", "None", None],
	"openai_temperature": lambda x: isinstance(x, float) and 0 <= x <= 2,
	"openai_top_p": lambda x: isinstance(x, float) and 0 <= x <= 1,
	"openai_max_tokens": lambda x: x is None or isinstance(x, int) and x > 0,
	"openai_presence_penalty": lambda x: isinstance(x, float) and -2 <= x <= 2,
	"gemini_model": lambda x: isinstance(x, str) and x in ALLOWED_GEMINI_MODELS,
	"gemini_prompt": lambda x: x not in ["", "None", None],
	"gemini_temperature": lambda x: isinstance(x, float) and 0 <= x <= 2,
	"gemini_top_p": lambda x: x is None or (isinstance(x, float) and 0 <= x <= 2),
	"gemini_top_k": lambda x: x is None or (isinstance(x, int) and x >= 0),
	"gemini_max_output_tokens": lambda x: x is None or isinstance(x, int),
	"deepl_context": lambda x: isinstance(x, str),
	"deepl_split_sentences": lambda x: isinstance(x, str),
	"deepl_preserve_formatting": lambda x: isinstance(x, bool),
	"deepl_formality": lambda x: isinstance(x, str)
	}

	try:
	## ensure categories are present
	assert "base translation settings" in JsonHandler.current_translation_settings, "base translation settings not found"
	assert "openai settings" in JsonHandler.current_translation_settings, "openai settings not found"
	assert "gemini settings" in JsonHandler.current_translation_settings, "gemini settings not found"
	assert "deepl settings" in JsonHandler.current_translation_settings, "deepl settings not found"

	## assign to variables to reduce repetitive access
	base_translation_settings = JsonHandler.current_translation_settings["base translation settings"]
	openai_settings = JsonHandler.current_translation_settings["openai settings"]
	gemini_settings = JsonHandler.current_translation_settings["gemini settings"]
	deepl_settings = JsonHandler.current_translation_settings["deepl settings"]

	## ensure all keys are present
	assert all(key in base_translation_settings for key in base_translation_keys), "base translation settings keys missing"
	assert all(key in openai_settings for key in openai_keys), "openai settings keys missing"
	assert all(key in gemini_settings for key in gemini_keys), "gemini settings keys missing"
	assert all(key in deepl_settings for key in deepl_keys), "deepl settings keys missing"

	## ensure that those sections don't have any extra keys
	assert all(key in base_translation_keys for key in base_translation_settings), "base translation settings has extra keys"
	assert all(key in openai_keys for key in openai_settings), "openai settings has extra keys"
	assert all(key in gemini_keys for key in gemini_settings), "gemini settings has extra keys"
	assert all(key in deepl_keys for key in deepl_settings), "deepl settings has extra keys"

	## validate each key using the validation rules
	for key, validate in validation_rules.items():
	if(key in base_translation_settings and not validate(base_translation_settings[key])):
	raise ValueError(f"Invalid value for {key}")
	elif(key in openai_settings and not validate(openai_settings[key])):
	raise ValueError(f"Invalid value for {key}")
	elif(key in gemini_settings and not validate(gemini_settings[key])):
	raise ValueError(f"Invalid value for {key}")
	elif(key in deepl_settings and not validate(deepl_settings[key])):
	raise ValueError(f"Invalid value for {key}")


	## force stop/logit_bias into None
	openai_settings["openai_stop"] = None
	openai_settings["openai_logit_bias"] = None
	openai_settings["openai_stream"] = False

	gemini_settings["gemini_stop_sequences"] = None
	gemini_settings["gemini_stream"] = False

	## force n and candidate_count to 1
	openai_settings["openai_n"] = 1
	gemini_settings["gemini_candidate_count"] = 1

	## ensure deepl_formality and deepl_split_sentences are in allowed values
	if(isinstance(deepl_settings["deepl_formality"], str) and deepl_settings["deepl_formality"] not in ["default", "more", "less", "prefer_more", "prefer_less"]):
	raise ValueError("Invalid value for deepl_formality")

	if(isinstance(deepl_settings["deepl_split_sentences"], str) and deepl_settings["deepl_split_sentences"] not in ["OFF", "ALL", "NO_NEWLINES"]):
	raise ValueError("Invalid value for deepl_split_sentences")

	except Exception as e:
	logging.warning(f"translation_settings.json is not valid, setting to invalid_placeholder, current:"
	f"\n{JsonHandler.current_translation_settings}"
	f"\nReason: {e}")

	JsonHandler.current_translation_settings = FileEnsurer.INVALID_TRANSLATION_SETTINGS_PLACEHOLDER

	logging.debug(f"translation_settings.json is valid, current:"
	f"\n{JsonHandler.current_translation_settings}")

	##-------------------start-of-reset_translation_settings_to_default()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def reset_translation_settings_to_default() -> None:

	"""

	Resets the translation_settings.json to default.

	"""

	JsonHandler.current_translation_settings = FileEnsurer.DEFAULT_TRANSLATION_SETTING

	JsonHandler.dump_translation_settings()

	JsonHandler.load_translation_settings()

	##-------------------start-of-dump_translation_settings()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def dump_translation_settings() -> None:

	"""

	Dumps the translation_settings.json file to disk.

	"""

	with open(FileEnsurer.config_translation_settings_path, 'w+', encoding='utf-8') as file:
	json.dump(JsonHandler.current_translation_settings, file)

	##-------------------start-of-load_translation_settings()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def load_translation_settings() -> None:

	"""

	Loads the translation_settings.json file into memory.

	"""

	with open(FileEnsurer.config_translation_settings_path, 'r', encoding='utf-8') as file:
	JsonHandler.current_translation_settings = json.load(file)

	##-------------------start-of-log_translation_settings()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def log_translation_settings(output_to_console:bool=False, specific_section:str \| None = None) -> None:

	"""

	Prints the translation_settings.json file to the log.
	Logs by default, but can be set to print to console as well.

	Parameters:
	output_to_console (bool \| optional \| default=False) : Whether to print to console as well.

	"""

	sections = ["base translation settings", "openai settings", "gemini settings", "deepl settings"]

	## if a specific section is provided, only print that section and base translation settings
	if(specific_section is not None):
	specific_section = specific_section.lower()
	sections = [section for section in sections if section.lower() == specific_section or section == "base translation settings"]

	for section in sections:
	print("-------------------")
	print(f"{section.capitalize()}:")
	print("-------------------")

	for key, value in JsonHandler.current_translation_settings.get(section, {}).items():
	log_message = f"{key} : {value}"
	logging.debug(log_message)
	if(output_to_console):
	print(log_message)

	##-------------------start-of-change_translation_settings()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def change_translation_settings() -> None:

	"""

	Allows the user to change the settings of the translation_settings.json file

	"""

	while(True):

	Toolkit.clear_console()

	settings_print_message = JsonHandler.translation_settings_message + SettingsChanger.generate_settings_change_menu()

	action = input(settings_print_message).lower()

	if(action == 'q'):
	break

	## loads a custom json directly
	if(action == "c"):
	SettingsChanger.load_custom_json()

	elif(action == "d"):
	print("Resetting to default settings.")
	JsonHandler.reset_translation_settings_to_default()

	elif(action in JsonHandler.current_translation_settings["base translation settings"]):
	SettingsChanger.change_setting("base translation settings", action)

	elif(action in JsonHandler.current_translation_settings["openai settings"]):
	SettingsChanger.change_setting("openai settings", action)

	elif(action in JsonHandler.current_translation_settings["gemini settings"]):
	SettingsChanger.change_setting("gemini settings", action)

	elif(action in JsonHandler.current_translation_settings["deepl settings"]):
	SettingsChanger.change_setting("deepl settings", action)

	else:
	print("Invalid setting name. Please try again.")

	Toolkit.pause_console("\nPress enter to continue.")

	JsonHandler.dump_translation_settings()

	##-------------------start-of-convert_to_correct_type()-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def convert_to_correct_type(setting_name:str, initial_value:str) -> typing.Any:

	"""

	Converts the input string to the correct type based on the setting name.

	Parameters:
	setting_name (str) : The name of the setting to convert.
	initial_value (str) : The initial value to convert.

	Returns:
	(typing.Any) : The converted value.

	"""

	value = initial_value

	type_expectations = {
	"prompt_assembly_mode": {"type": int, "constraints": lambda x: 1 <= x <= 2},
	"number_of_lines_per_batch": {"type": int, "constraints": lambda x: x > 0},
	"sentence_fragmenter_mode": {"type": int, "constraints": lambda x: 1 <= x <= 2},
	"je_check_mode": {"type": int, "constraints": lambda x: 1 <= x <= 2},
	"number_of_malformed_batch_retries": {"type": int, "constraints": lambda x: x >= 0},
	"batch_retry_timeout": {"type": int, "constraints": lambda x: x >= 0},
	"number_of_concurrent_batches": {"type": int, "constraints": lambda x: x >= 0},
	"gender_context_insertion": {"type": bool, "constraints": lambda x: isinstance(x, bool)},
	"is_cote": {"type": bool, "constraints": lambda x: isinstance(x, bool)},
	"openai_model": {"type": str, "constraints": lambda x: x in ALLOWED_OPENAI_MODELS},
	"openai_system_message": {"type": str, "constraints": lambda x: x not in ["", "None", None]},
	"openai_temperature": {"type": float, "constraints": lambda x: 0 <= x <= 2},
	"openai_top_p": {"type": float, "constraints": lambda x: 0 <= x <= 2},
	"openai_n": {"type": int, "constraints": lambda x: x == 1},
	"openai_stream": {"type": bool, "constraints": lambda x: x is False},
	"openai_stop": {"type": None, "constraints": lambda x: x is None},
	"openai_logit_bias": {"type": None, "constraints": lambda x: x is None},
	"openai_max_tokens": {"type": int, "constraints": lambda x: x is None or isinstance(x, int)},
	"openai_presence_penalty": {"type": float, "constraints": lambda x: -2 <= x <= 2},
	"openai_frequency_penalty": {"type": float, "constraints": lambda x: -2 <= x <= 2},
	"gemini_model": {"type": str, "constraints": lambda x: x in ALLOWED_GEMINI_MODELS},
	"gemini_prompt": {"type": str, "constraints": lambda x: x not in ["", "None", None]},
	"gemini_temperature": {"type": float, "constraints": lambda x: 0 <= x <= 2},
	"gemini_top_p": {"type": float, "constraints": lambda x: x is None or (isinstance(x, float) and 0 <= x <= 2)},
	"gemini_top_k": {"type": int, "constraints": lambda x: x is None or x >= 0},
	"gemini_candidate_count": {"type": int, "constraints": lambda x: x == 1},
	"gemini_stream": {"type": bool, "constraints": lambda x: x is False},
	"gemini_stop_sequences": {"type": None, "constraints": lambda x: x is None},
	"gemini_max_output_tokens": {"type": int, "constraints": lambda x: x is None or isinstance(x, int)},
	"deepl_context": {"type": str, "constraints": lambda x: isinstance(x, str)},
	"deepl_split_sentences": {"type": str, "constraints": lambda x: isinstance(x, str)},
	"deepl_preserve_formatting": {"type": bool, "constraints": lambda x: isinstance(x, bool)},
	"deepl_formality": {"type": str, "constraints": lambda x: isinstance(x, str)}
	}

	if(setting_name not in type_expectations):
	raise ValueError("Invalid setting name")

	setting_info = type_expectations[setting_name]

	if("stream" in setting_name):
	value = Toolkit.string_to_bool(initial_value)

	elif(initial_value.lower() in ["none","null"]):
	value = None

	if(setting_info["type"] is None):
	converted_value = None

	elif(setting_info["type"] == int) or (setting_info["type"] == float):

	if(value is None or value == ''):
	converted_value = None

	elif(setting_info["type"] == int):
	converted_value = int(value)

	else:
	converted_value = float(value)

	else:
	converted_value = setting_info["type"](value)

	if("constraints" in setting_info and not setting_info["constraints"](converted_value)):
	raise ValueError(f"{setting_name} out of range")

	return converted_value

	##-------------------start-of-SettingsChanger---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	class SettingsChanger:

	"""

	Handles changing the settings of the translation_settings.json file.

	"""

	##-------------------start-of-generate_settings_change_menu()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def generate_settings_change_menu() -> str:

	"""

	Generates the settings change menu.

	Returns:
	menu (str) : The settings change menu.

	"""

	menu = """
	Current settings:
	----------------------------------------------------------------
	"""

	settings = ["base translation settings", "openai settings", "gemini settings", "deepl settings"]

	for setting in settings:
	for key, value in JsonHandler.current_translation_settings[setting].items():
	menu += key + " : " + str(value) + "\n"
	menu += "\n"

	menu += """
	It is recommended that you maximize the console window for this. You will have to to see the settings above.

	Enter the name of the setting you want to change, type d to reset to default, type c to load an external/custom json directly, or type 'q' to quit settings change :
	"""

	return menu

	##-------------------start-of-load_custom_json()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def load_custom_json() -> None:

	"""

	Loads a custom json into the translation_settings.json file.

	"""

	Toolkit.clear_console()

	## saves old rules in case on invalid json
	old_translation_settings = JsonHandler.current_translation_settings

	try:

	## loads the custom json file
	with open(FileEnsurer.external_translation_settings_path, 'r', encoding='utf-8') as file:
	JsonHandler.current_translation_settings = json.load(file)

	JsonHandler.validate_json()

	## validate_json() sets a dict to the invalid placeholder if it's invalid, so if it's that, it's invalid
	assert JsonHandler.current_translation_settings != FileEnsurer.INVALID_TRANSLATION_SETTINGS_PLACEHOLDER

	JsonHandler.dump_translation_settings()

	print("Settings loaded successfully.")

	except AssertionError:
	print("Invalid JSON file. Please try again.")
	JsonHandler.current_translation_settings = old_translation_settings

	except FileNotFoundError:
	print("Missing JSON file. Make sure you have a json in the same directory as kudasai.py and that the json is named \"translation_settings.json\". Please try again.")
	JsonHandler.current_translation_settings = old_translation_settings

	##-------------------start-of-change_setting()---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

	@staticmethod
	def change_setting(setting_area:str, setting_name:str) -> None:

	"""

	Changes the setting of the translation_settings.json file.

	Parameters:
	setting_area (str) : The area of the setting to change.
	setting_name (str) : The name of the setting to change.

	"""

	new_value = input(f"\nEnter a new value for {setting_name}: ")

	try:
	converted_value = JsonHandler.convert_to_correct_type(setting_name, new_value)

	JsonHandler.current_translation_settings[setting_area][setting_name] = converted_value
	print(f"Updated {setting_name} to {converted_value}.")

	except Exception as e:
	print(f"Invalid input. No changes made. {e}")