LouisMonawe commited on
Commit
76f85b9
Β·
1 Parent(s): cc13458
Files changed (2) hide show
  1. app.py +48 -127
  2. two.py +146 -0
app.py CHANGED
@@ -1,144 +1,65 @@
1
- # import gradio as gr
2
- # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
-
4
- # # Load tokenizer and model (this will download ~3.5GB)
5
- # model_name = "facebook/nllb-200-distilled-600M"
6
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
8
-
9
- # # Supported South African languages codes for NLLB
10
- # LANGUAGES = {
11
- # "English β†’ Afrikaans": "afr_Latn",
12
- # "English β†’ Xhosa": "xho_Latn",
13
- # "English β†’ Zulu": "zul_Latn",
14
- # "English β†’ Sesotho": "sot_Latn",
15
- # "English β†’ Tswana": "tsn_Latn",
16
- # "English β†’ Northern Sotho": "nso_Latn",
17
- # "English β†’ Swati": "ssw_Latn",
18
- # "English β†’ Tsonga": "tso_Latn",
19
- # "English β†’ Venda": "ven_Latn",
20
- # }
21
-
22
-
23
- # def translate(text, lang_label):
24
- # if not text.strip():
25
- # return "Please enter some text to translate."
26
-
27
- # target_lang = LANGUAGES[lang_label]
28
- # # Format input for NLLB: prefix target language token
29
- # input_text = f">>{target_lang}<< {text}"
30
-
31
- # inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
32
- # outputs = model.generate(**inputs, max_length=512)
33
- # translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
- # return translated_text
35
-
36
-
37
- # iface = gr.Interface(
38
- # fn=translate,
39
- # inputs=[
40
- # gr.Textbox(label="English Text"),
41
- # gr.Dropdown(list(LANGUAGES.keys()), label="Target Language"),
42
- # ],
43
- # outputs="text",
44
- # title="NLLB-200 English to South African Languages",
45
- # description="Translate English text to South African languages using Meta's NLLB-200 model locally.",
46
- # )
47
-
48
- # iface.launch()
49
-
50
- # from transformers import MarianMTModel, MarianTokenizer, pipeline
51
- # import gradio as gr
52
-
53
- # # Define supported models for South African languages
54
- # language_models = {
55
- # "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
56
- # "Zulu": "Helsinki-NLP/opus-mt-en-zu",
57
- # "Xhosa": "Helsinki-NLP/opus-mt-en-xh",
58
- # "Sesotho": "Helsinki-NLP/opus-mt-en-st",
59
- # "Setswana": "Helsinki-NLP/opus-mt-en-tn",
60
- # }
61
-
62
-
63
- # # Translation function
64
- # def translate(text, target_language):
65
- # model_name = language_models[target_language]
66
- # tokenizer = MarianTokenizer.from_pretrained(model_name)
67
- # model = MarianMTModel.from_pretrained(model_name)
68
-
69
- # # Setup pipeline
70
- # translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer)
71
-
72
- # # Translate
73
- # result = translation_pipeline(text)
74
- # return result[0]["translation_text"]
75
 
 
 
76
 
77
- # # Build Gradio interface
78
- # interface = gr.Interface(
79
- # fn=translate,
80
- # inputs=[
81
- # gr.Textbox(label="Enter English Text"),
82
- # gr.Dropdown(choices=list(language_models.keys()), label="Translate to"),
83
- # ],
84
- # outputs="text",
85
- # title="African Language Translator",
86
- # description="Translate English text into Afrikaans, Zulu, Xhosa, Sesotho or Setswana",
87
- # )
88
 
89
- # # Launch the app
90
- # interface.launch()
91
 
92
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
93
- import gradio as gr
 
94
 
95
- # Load the tokenizer and model
96
- model_name = "facebook/nllb-200-distilled-600M"
97
- tokenizer = AutoTokenizer.from_pretrained(model_name)
98
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
99
 
100
- # Language code map
101
- lang_map = {
102
- "English": "eng_Latn",
103
- "Afrikaans": "afr_Latn",
104
- "Zulu": "zul_Latn",
105
- "Xhosa": "xho_Latn",
106
- "French": "fra_Latn",
107
- "Spanish": "spa_Latn",
108
- "Swahili": "swh_Latn",
109
- }
110
 
 
 
 
111
 
112
- # Translation function
113
- def translate(text, src_lang, tgt_lang):
114
- src_code = lang_map[src_lang]
115
- tgt_code = lang_map[tgt_lang]
116
 
117
- tokenizer.src_lang = src_code
118
- inputs = tokenizer(text, return_tensors="pt", padding=True)
119
 
120
- generated_tokens = model.generate(
121
- **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code]
122
- )
123
- translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
124
- return translated
125
 
126
 
127
- # Gradio interface
128
- iface = gr.Interface(
129
  fn=translate,
130
  inputs=[
131
- gr.Textbox(label="Enter text"),
132
- gr.Dropdown(
133
- choices=list(lang_map.keys()), label="From Language", value="English"
134
- ),
135
- gr.Dropdown(
136
- choices=list(lang_map.keys()), label="To Language", value="Afrikaans"
137
- ),
138
  ],
139
- outputs="text",
140
- title="NLLB-200 Custom Language Translator",
141
- description="Translate text using Facebook's distilled NLLB-200 model with selectable languages.",
142
  )
143
 
144
- iface.launch()
 
1
+ import requests
2
+ import gradio as gr
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ # Load environment variables
7
+ load_dotenv()
8
+ HF_TOKEN = os.getenv("HF_TOKEN")
9
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
10
+
11
+ # Language to ISO 639-3 codes (used for NLLB-200)
12
+ LANGUAGES = {
13
+ "English β†’ Afrikaans": "afr",
14
+ "English β†’ Xhosa": "xho",
15
+ "English β†’ Zulu": "zul",
16
+ "English β†’ Sesotho": "sot",
17
+ "English β†’ Tswana": "tsn",
18
+ "English β†’ Northern Sotho": "nso",
19
+ "English β†’ Swati": "ssw",
20
+ "English β†’ Tsonga": "tso",
21
+ "English β†’ Venda": "ven",
22
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ MODEL_NAME = "facebook/nllb-200-distilled-600M"
25
+ API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"
26
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def query(payload):
29
+ response = requests.post(API_URL, headers=headers, json=payload)
30
 
31
+ if response.status_code != 200:
32
+ print(f"[ERROR] API failed: {response.status_code} - {response.text}")
33
+ return {"error": f"Request failed with {response.status_code}"}
34
 
35
+ try:
36
+ return response.json()
37
+ except requests.exceptions.JSONDecodeError:
38
+ print(f"[ERROR] Failed to parse JSON: {response.text}")
39
+ return {"error": "Invalid JSON from API"}
40
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def translate(input_text, language_label):
43
+ language_code = LANGUAGES[language_label]
44
+ formatted_input = f">>{language_code}<< {input_text}"
45
 
46
+ response = query({"inputs": formatted_input, "options": {"wait_for_model": True}})
 
 
 
47
 
48
+ if "error" in response:
49
+ return f"Error: {response['error']}"
50
 
51
+ return response[0]["translation_text"]
 
 
 
 
52
 
53
 
54
+ translator = gr.Interface(
 
55
  fn=translate,
56
  inputs=[
57
+ gr.Textbox(label="Input Text", placeholder="Type text here..."),
58
+ gr.Dropdown(list(LANGUAGES.keys()), label="Select Language Target"),
 
 
 
 
 
59
  ],
60
+ outputs=gr.Textbox(label="Translation"),
61
+ title="Translademia",
62
+ description="Translate English text to South African languages using Meta's NLLB-200 model.",
63
  )
64
 
65
+ translator.launch()
two.py CHANGED
@@ -59,3 +59,149 @@ translator = gr.Interface(
59
  )
60
 
61
  translator.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
 
61
  translator.launch(share=True)
62
+
63
+
64
+ # import gradio as gr
65
+ # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
66
+
67
+ # # Load tokenizer and model (this will download ~3.5GB)
68
+ # model_name = "facebook/nllb-200-distilled-600M"
69
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
70
+ # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
71
+
72
+ # # Supported South African languages codes for NLLB
73
+ # LANGUAGES = {
74
+ # "English β†’ Afrikaans": "afr_Latn",
75
+ # "English β†’ Xhosa": "xho_Latn",
76
+ # "English β†’ Zulu": "zul_Latn",
77
+ # "English β†’ Sesotho": "sot_Latn",
78
+ # "English β†’ Tswana": "tsn_Latn",
79
+ # "English β†’ Northern Sotho": "nso_Latn",
80
+ # "English β†’ Swati": "ssw_Latn",
81
+ # "English β†’ Tsonga": "tso_Latn",
82
+ # "English β†’ Venda": "ven_Latn",
83
+ # }
84
+
85
+
86
+ # def translate(text, lang_label):
87
+ # if not text.strip():
88
+ # return "Please enter some text to translate."
89
+
90
+ # target_lang = LANGUAGES[lang_label]
91
+ # # Format input for NLLB: prefix target language token
92
+ # input_text = f">>{target_lang}<< {text}"
93
+
94
+ # inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
95
+ # outputs = model.generate(**inputs, max_length=512)
96
+ # translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
97
+ # return translated_text
98
+
99
+
100
+ # iface = gr.Interface(
101
+ # fn=translate,
102
+ # inputs=[
103
+ # gr.Textbox(label="English Text"),
104
+ # gr.Dropdown(list(LANGUAGES.keys()), label="Target Language"),
105
+ # ],
106
+ # outputs="text",
107
+ # title="NLLB-200 English to South African Languages",
108
+ # description="Translate English text to South African languages using Meta's NLLB-200 model locally.",
109
+ # )
110
+
111
+ # iface.launch()
112
+
113
+ # from transformers import MarianMTModel, MarianTokenizer, pipeline
114
+ # import gradio as gr
115
+
116
+ # # Define supported models for South African languages
117
+ # language_models = {
118
+ # "Afrikaans": "Helsinki-NLP/opus-mt-en-af",
119
+ # "Zulu": "Helsinki-NLP/opus-mt-en-zu",
120
+ # "Xhosa": "Helsinki-NLP/opus-mt-en-xh",
121
+ # "Sesotho": "Helsinki-NLP/opus-mt-en-st",
122
+ # "Setswana": "Helsinki-NLP/opus-mt-en-tn",
123
+ # }
124
+
125
+
126
+ # # Translation function
127
+ # def translate(text, target_language):
128
+ # model_name = language_models[target_language]
129
+ # tokenizer = MarianTokenizer.from_pretrained(model_name)
130
+ # model = MarianMTModel.from_pretrained(model_name)
131
+
132
+ # # Setup pipeline
133
+ # translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer)
134
+
135
+ # # Translate
136
+ # result = translation_pipeline(text)
137
+ # return result[0]["translation_text"]
138
+
139
+
140
+ # # Build Gradio interface
141
+ # interface = gr.Interface(
142
+ # fn=translate,
143
+ # inputs=[
144
+ # gr.Textbox(label="Enter English Text"),
145
+ # gr.Dropdown(choices=list(language_models.keys()), label="Translate to"),
146
+ # ],
147
+ # outputs="text",
148
+ # title="African Language Translator",
149
+ # description="Translate English text into Afrikaans, Zulu, Xhosa, Sesotho or Setswana",
150
+ # )
151
+
152
+ # # Launch the app
153
+ # interface.launch()
154
+
155
+ # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
156
+ # import gradio as gr
157
+
158
+ # # Load the tokenizer and model
159
+ # model_name = "facebook/nllb-200-distilled-600M"
160
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
161
+ # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
162
+
163
+ # # Language code map
164
+ # lang_map = {
165
+ # "English": "eng_Latn",
166
+ # "Afrikaans": "afr_Latn",
167
+ # "Zulu": "zul_Latn",
168
+ # "Xhosa": "xho_Latn",
169
+ # "French": "fra_Latn",
170
+ # "Spanish": "spa_Latn",
171
+ # "Swahili": "swh_Latn",
172
+ # }
173
+
174
+
175
+ # # Translation function
176
+ # def translate(text, src_lang, tgt_lang):
177
+ # src_code = lang_map[src_lang]
178
+ # tgt_code = lang_map[tgt_lang]
179
+
180
+ # tokenizer.src_lang = src_code
181
+ # inputs = tokenizer(text, return_tensors="pt", padding=True)
182
+
183
+ # generated_tokens = model.generate(
184
+ # **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_code]
185
+ # )
186
+ # translated = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
187
+ # return translated
188
+
189
+
190
+ # # Gradio interface
191
+ # iface = gr.Interface(
192
+ # fn=translate,
193
+ # inputs=[
194
+ # gr.Textbox(label="Enter text"),
195
+ # gr.Dropdown(
196
+ # choices=list(lang_map.keys()), label="From Language", value="English"
197
+ # ),
198
+ # gr.Dropdown(
199
+ # choices=list(lang_map.keys()), label="To Language", value="Afrikaans"
200
+ # ),
201
+ # ],
202
+ # outputs="text",
203
+ # title="NLLB-200 Custom Language Translator",
204
+ # description="Translate text using Facebook's distilled NLLB-200 model with selectable languages.",
205
+ # )
206
+
207
+ # iface.launch()