jisukim8873 commited on
Commit
59db409
Β·
1 Parent(s): ac6d958
Files changed (1) hide show
  1. app.py +85 -38
app.py CHANGED
@@ -1,57 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
 
5
- en2ko = "KoJLabs/nllb-finetuned-en2ko"
6
- ko2en = "KoJLabs/nllb-finetuned-ko2en"
7
- style = "KoJLabs/bart-speech-style-converter"
8
 
9
- en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
10
- ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
11
- style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
12
-
13
- en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
14
- ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
15
- style_tokenizer = AutoTokenizer.from_pretrained(style)
16
 
17
  def translate(source, target, text):
18
- formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
19
- src = formats[source]
20
- tgt = formats[target]
21
-
22
- if src == "eng_Latn":
23
- translator = pipeline(
24
- 'translation',
25
- model=en2ko_model,
26
- tokenizer=ko2en_tokenizer,
27
- src_lang=src,
28
- tgt_lang=tgt,
29
- )
30
-
31
- if src == "kor_Hang":
32
- translator = pipeline(
33
- 'translation',
34
- model=ko2en_model,
35
- tokenizer=en2ko_tokenizer,
36
- src_lang=src,
37
- tgt_lang=tgt
38
- )
39
-
40
  output = translator(text)
41
  translated_text = output[0]['translation_text']
42
-
43
  return translated_text
44
 
45
- title = 'KoTAN Translator & Speech-style converter'
46
- lang = ['English','Korean']
47
-
 
48
  translator_app = gr.Interface(
49
  fn=translate,
50
  inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
51
  outputs=[gr.outputs.Textbox(label='Translated Text')],
52
  title=title,
53
- description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
54
- article='Jisu, Kim. Juhwan, Lee',
 
 
 
 
 
 
 
55
  enable_queue=True,
56
  )
57
 
 
1
+ # import os
2
+ # import gradio as gr
3
+ # from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
4
+
5
+ # en2ko = "KoJLabs/nllb-finetuned-en2ko"
6
+ # ko2en = "KoJLabs/nllb-finetuned-ko2en"
7
+ # style = "KoJLabs/bart-speech-style-converter"
8
+
9
+ # en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
10
+ # ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
11
+ # style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
12
+
13
+ # en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
14
+ # ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
15
+ # style_tokenizer = AutoTokenizer.from_pretrained(style)
16
+
17
+ # def translate(source, target, text):
18
+ # formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
19
+ # src = formats[source]
20
+ # tgt = formats[target]
21
+
22
+ # if src == "eng_Latn":
23
+ # translator = pipeline(
24
+ # 'translation',
25
+ # model=en2ko_model,
26
+ # tokenizer=ko2en_tokenizer,
27
+ # src_lang=src,
28
+ # tgt_lang=tgt,
29
+ # )
30
+
31
+ # if src == "kor_Hang":
32
+ # translator = pipeline(
33
+ # 'translation',
34
+ # model=ko2en_model,
35
+ # tokenizer=en2ko_tokenizer,
36
+ # src_lang=src,
37
+ # tgt_lang=tgt
38
+ # )
39
+
40
+ # output = translator(text)
41
+ # translated_text = output[0]['translation_text']
42
+
43
+ # return translated_text
44
+
45
+ # title = 'KoTAN Translator & Speech-style converter'
46
+ # lang = ['English','Korean']
47
+
48
+ # translator_app = gr.Interface(
49
+ # fn=translate,
50
+ # inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
51
+ # outputs=[gr.outputs.Textbox(label='Translated Text')],
52
+ # title=title,
53
+ # description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
54
+ # article='Jisu, Kim. Juhwan, Lee',
55
+ # enable_queue=True,
56
+ # )
57
+
58
+ # translator_app.launch()
59
+
60
  import os
61
  import gradio as gr
62
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
63
 
 
 
 
64
 
65
+ checkpoint = 'facebook/nllb-200-distilled-1.3B'
66
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
67
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 
 
 
 
68
 
69
  def translate(source, target, text):
70
+ formats = {'English':'eng_Latn', 'Asante': 'aka_Latn', 'Akuapem': 'twi_Latn', 'Ewe':'ewe_Latn', 'Hausa': 'hau_Latn'}
71
+ source_fmt = formats[source]
72
+ target_fmt = formats[target]
73
+ translator = pipeline('translation',
74
+ model=model,
75
+ tokenizer=tokenizer,
76
+ src_lang=source_fmt,
77
+ tgt_lang=target_fmt,
78
+ max_length = 400)
79
+
 
 
 
 
 
 
 
 
 
 
 
 
80
  output = translator(text)
81
  translated_text = output[0]['translation_text']
 
82
  return translated_text
83
 
84
+ # hf_token = os.getenv('HF_TOKEN')
85
+ title = 'Ananse AI | Ghanaian Language Translator'
86
+ # hf_writer = gr.HuggingFaceDatasetSaver(hf_token, 'crowdsourced-GLT')
87
+ lang = ['Akuapem','Asante', 'English', 'Ewe','Hausa']
88
  translator_app = gr.Interface(
89
  fn=translate,
90
  inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
91
  outputs=[gr.outputs.Textbox(label='Translated Text')],
92
  title=title,
93
+ description = 'The Ghanaian Languages considered now are the Asante, Akuapem, Ewe, and Hausa. Kindly use the Flag button to rate (πŸ‘πŸΌ, πŸ‘ŽπŸΌ) your translation to help us improve. Thanks',
94
+ article='Ananse AI | hnmensah',
95
+ examples = [['English','Asante','Kwame went to Kaneshie to buy tomates.'],
96
+ ['English','Ewe','The event should be hosted at the Accra Mall.'],
97
+ ['English','Akuapem','The trader is suffering from Malaria so she did not go to work.'],
98
+ ['English','Hausa','The last person to get to the class will be sacked.']],
99
+ #allow_flagging='manual',
100
+ #flagging_options=['πŸ‘πŸΌ','πŸ‘ŽπŸΌ'],
101
+ #flagging_callback=hf_writer,
102
  enable_queue=True,
103
  )
104