Spaces:

Aeon-Avinash
/

GenAI_Multi_Language_Translator

Runtime error

App Files Files Community

GenAI_Multi_Language_Translator / lang_codes.py

Aeon-Avinash

Update lang_codes.py

c96c4a2 verified about 1 month ago

raw history blame contribute delete

No virus

2.21 kB

	import requests
	import json

	def generate_lang_code_file():
	# URL of the Flores README containing the language codes
	url = 'https://raw.githubusercontent.com/openlanguagedata/flores/main/README.md'

	# Fetch the page content
	response = requests.get(url)
	content = response.text

	# Extract the table content by parsing the plain text
	lines = content.split('\n')

	# Initialize a flag to start capturing data
	languages = []
	start_parsing = False

	for line in lines:
	if "Language coverage" in line:
	start_parsing = True
	continue

	if start_parsing:
	if line.strip() == "":
	continue
	if '\|' not in line:
	continue
	parts = line.split('\|')
	if len(parts) >= 2:
	code = parts[1].strip()[1:-1]
	identifier = parts[2].strip()[1:-1]
	name = parts[3].strip()
	languages.append({"code": code, "identifier": identifier, "name": name})

	# Omit the labels and divider
	languages = languages[2:]

	# Convert to JSON
	json_data = json.dumps(languages, indent=4)

	# Save the JSON data to a file
	file_path = 'flores_language_codes.json'
	with open(file_path, 'w') as file:
	file.write(json_data)

	print(f"JSON data saved to {file_path}")

	# generate_lang_code_file()

	def get_language_code(language_name,
	json_file_path='flores_language_codes.json'):
	# Load the JSON data from the file
	with open(json_file_path, 'r') as file:
	languages = json.load(file)

	# Search for the language code by language name
	for language in languages:
	if language['name'].lower() == language_name.lower():
	return language['code']

	return None # Return None if the language name is not found

	def get_language_list(
	json_file_path='flores_language_codes.json'):
	# Load the JSON data from the file
	with open(json_file_path, 'r') as file:
	languages = json.load(file)

	# extract language name
	language_names = [language['name'] for language in languages]
	return language_names