import requests import json def generate_lang_code_file(): # URL of the Flores README containing the language codes url = 'https://raw.githubusercontent.com/openlanguagedata/flores/main/README.md' # Fetch the page content response = requests.get(url) content = response.text # Extract the table content by parsing the plain text lines = content.split('\n') # Initialize a flag to start capturing data languages = [] start_parsing = False for line in lines: if "Language coverage" in line: start_parsing = True continue if start_parsing: if line.strip() == "": continue if '|' not in line: continue parts = line.split('|') if len(parts) >= 2: code = parts[1].strip()[1:-1] identifier = parts[2].strip()[1:-1] name = parts[3].strip() languages.append({"code": code, "identifier": identifier, "name": name}) # Omit the labels and divider languages = languages[2:] # Convert to JSON json_data = json.dumps(languages, indent=4) # Save the JSON data to a file file_path = 'flores_language_codes.json' with open(file_path, 'w') as file: file.write(json_data) print(f"JSON data saved to {file_path}") # generate_lang_code_file() def get_language_code(language_name, json_file_path='flores_language_codes.json'): # Load the JSON data from the file with open(json_file_path, 'r') as file: languages = json.load(file) # Search for the language code by language name for language in languages: if language['name'].lower() == language_name.lower(): return language['code'] return None # Return None if the language name is not found def get_language_list( json_file_path='flores_language_codes.json'): # Load the JSON data from the file with open(json_file_path, 'r') as file: languages = json.load(file) # extract language name language_names = [language['name'] for language in languages] return language_names