Aeon-Avinash commited on
Commit
98fefd6
1 Parent(s): abf61f0

Create lang_codes.py

Browse files
Files changed (1) hide show
  1. lang_codes.py +72 -0
lang_codes.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ def generate_lang_code_file():
5
+ # URL of the Flores README containing the language codes
6
+ url = 'https://raw.githubusercontent.com/openlanguagedata/flores/main/README.md'
7
+
8
+ # Fetch the page content
9
+ response = requests.get(url)
10
+ content = response.text
11
+
12
+ # Extract the table content by parsing the plain text
13
+ lines = content.split('\n')
14
+
15
+ # Initialize a flag to start capturing data
16
+ languages = []
17
+ start_parsing = False
18
+
19
+ for line in lines:
20
+ if "Language coverage" in line:
21
+ start_parsing = True
22
+ continue
23
+
24
+ if start_parsing:
25
+ if line.strip() == "":
26
+ continue
27
+ if '|' not in line:
28
+ continue
29
+ parts = line.split('|')
30
+ if len(parts) >= 2:
31
+ code = parts[1].strip()[1:-1]
32
+ identifier = parts[2].strip()[1:-1]
33
+ name = parts[3].strip()
34
+ languages.append({"code": code, "identifier": identifier, "name": name})
35
+
36
+ # Omit the labels and divider
37
+ languages = languages[2:]
38
+
39
+ # Convert to JSON
40
+ json_data = json.dumps(languages, indent=4)
41
+
42
+ # Save the JSON data to a file
43
+ file_path = '/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'
44
+ with open(file_path, 'w') as file:
45
+ file.write(json_data)
46
+
47
+ print(f"JSON data saved to {file_path}")
48
+
49
+ # generate_lang_code_file()
50
+
51
+ def get_language_code(language_name,
52
+ json_file_path='/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'):
53
+ # Load the JSON data from the file
54
+ with open(json_file_path, 'r') as file:
55
+ languages = json.load(file)
56
+
57
+ # Search for the language code by language name
58
+ for language in languages:
59
+ if language['name'].lower() == language_name.lower():
60
+ return language['code']
61
+
62
+ return None # Return None if the language name is not found
63
+
64
+ def get_language_list(
65
+ json_file_path='/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'):
66
+ # Load the JSON data from the file
67
+ with open(json_file_path, 'r') as file:
68
+ languages = json.load(file)
69
+
70
+ # extract language name
71
+ language_names = [language['name'] for language in languages]
72
+ return language_names