EricaCorral commited on
Commit
0fe7247
1 Parent(s): 79ced6d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypinyin import pinyin
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ from LAC import LAC
4
+ import gradio as gr
5
+ import torch
6
+
7
+ model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
8
+ model.eval()
9
+ tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
10
+ lac = LAC(mode="seg")
11
+
12
+ def make_request(chinese_text):
13
+ with torch.no_grad():
14
+ encoded_zh = tokenizer.prepare_seq2seq_batch([chinese_text], return_tensors="pt")
15
+ generated_tokens = model.generate(**encoded_zh)
16
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
17
+
18
+ def generatepinyin(input):
19
+ pinyin_list = pinyin(input)
20
+ pinyin_string = ""
21
+ for piece in pinyin_list:
22
+ pinyin_string = pinyin_string+" "+piece[0]
23
+ return pinyin_string
24
+
25
+ def generate_response(Chinese_to_translate):
26
+ response = []
27
+ response.append([Chinese_to_translate,make_request(Chinese_to_translate),generatepinyin(Chinese_to_translate)])
28
+ segmented_string_list = lac.run(Chinese_to_translate)
29
+ for piece in segmented_string_list:
30
+ response.append([piece,make_request(piece),generatepinyin(piece)])
31
+ return response
32
+
33
+ iface = gr.Interface(
34
+ fn=generate_response,
35
+ title="Chinese to English",
36
+ description="Chinese to English with a state-of-the-art model (facebook-research m2m-100 1.2B)",
37
+ inputs=gr.inputs.Textbox(lines=5, placeholder="Enter text in Chinese"),
38
+ outputs="text")
39
+
40
+ iface.launch()