EricaCorral commited on
Commit
2570705
1 Parent(s): caff3d5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypinyin import pinyin
2
+ from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
3
+ from LAC import LAC
4
+ import gradio as gr
5
+
6
+ model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
7
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")
8
+ lac = LAC(mode="seg")
9
+
10
+ def make_request(chinese_text):
11
+ tokenizer.src_lang="zh"
12
+ encoded_zh = tokenizer(chinese_text, return_tensors="pt")
13
+ generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
14
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
15
+
16
+ def generatepinyin(input):
17
+ pinyin_list = pinyin(input)
18
+ pinyin_string = ""
19
+ for piece in pinyin_list:
20
+ pinyin_string = pinyin_string+" "+piece[0]
21
+ return pinyin_string
22
+
23
+ def generate_response(string_to_translate):
24
+ response = []
25
+ response.append([string_to_translate,make_request(string_to_translate),generatepinyin(string_to_translate)])
26
+ segmented_string_list = lac.run(string_to_translate)
27
+ for piece in segmented_string_list:
28
+ response.append([piece,make_request(piece),generatepinyin(piece)])
29
+ return response
30
+
31
+ iface = gr.Interface(
32
+ fn=generate_response,
33
+ title="English to Chinese",
34
+ description="English to Chinese with a SOTA model",
35
+ inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Chinese"),
36
+ outputs="text")
37
+
38
+ iface.launch()