minhdang commited on
Commit
937ca11
1 Parent(s): 27d059b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py CHANGED
@@ -16,7 +16,79 @@ MAX_MAX_NEW_TOKENS = 2048
16
  DEFAULT_MAX_NEW_TOKENS = 1024
17
  total_count=0
18
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  DESCRIPTION="""CODE"""
22
 
@@ -43,6 +115,7 @@ def gen(
43
  print(total_count)
44
  os.system("nvidia-smi")
45
  conversation = []
 
46
  if system_prompt:
47
  conversation.append({"role": "system", "content": system_prompt})
48
  for user, assistant in chat_history:
 
16
  DEFAULT_MAX_NEW_TOKENS = 1024
17
  total_count=0
18
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
19
+ import gradio as gr
20
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
21
+
22
+ dict_map = {
23
+ "òa": "oà",
24
+ "Òa": "Oà",
25
+ "ÒA": "OÀ",
26
+ "óa": "oá",
27
+ "Óa": "Oá",
28
+ "ÓA": "OÁ",
29
+ "ỏa": "oả",
30
+ "Ỏa": "Oả",
31
+ "ỎA": "OẢ",
32
+ "õa": "oã",
33
+ "Õa": "Oã",
34
+ "ÕA": "OÃ",
35
+ "ọa": "oạ",
36
+ "Ọa": "Oạ",
37
+ "ỌA": "OẠ",
38
+ "òe": "oè",
39
+ "Òe": "Oè",
40
+ "ÒE": "OÈ",
41
+ "óe": "oé",
42
+ "Óe": "Oé",
43
+ "ÓE": "OÉ",
44
+ "ỏe": "oẻ",
45
+ "Ỏe": "Oẻ",
46
+ "ỎE": "OẺ",
47
+ "õe": "oẽ",
48
+ "Õe": "Oẽ",
49
+ "ÕE": "OẼ",
50
+ "ọe": "oẹ",
51
+ "Ọe": "Oẹ",
52
+ "ỌE": "OẸ",
53
+ "ùy": "uỳ",
54
+ "Ùy": "Uỳ",
55
+ "ÙY": "UỲ",
56
+ "úy": "uý",
57
+ "Úy": "Uý",
58
+ "ÚY": "UÝ",
59
+ "ủy": "uỷ",
60
+ "Ủy": "Uỷ",
61
+ "ỦY": "UỶ",
62
+ "ũy": "uỹ",
63
+ "Ũy": "Uỹ",
64
+ "ŨY": "UỸ",
65
+ "ụy": "uỵ",
66
+ "Ụy": "Uỵ",
67
+ "ỤY": "UỴ",
68
+ }
69
 
70
+ tokenizer_vi2en = AutoTokenizer.from_pretrained("vinai/vinai-translate-vi2en-v2", src_lang="vi_VN")
71
+ model_vi2en = AutoModelForSeq2SeqLM.from_pretrained("vinai/vinai-translate-vi2en-v2")
72
+
73
+ def translate_vi2en(vi_text: str) -> str:
74
+ for i, j in dict_map.items():
75
+ vi_text = vi_text.replace(i, j)
76
+ input_ids = tokenizer_vi2en(vi_text, return_tensors="pt").input_ids
77
+ output_ids = model_vi2en.generate(
78
+ input_ids,
79
+ decoder_start_token_id=tokenizer_vi2en.lang_code_to_id["en_XX"],
80
+ num_return_sequences=1,
81
+ # # With sampling
82
+ # do_sample=True,
83
+ # top_k=100,
84
+ # top_p=0.8,
85
+ # With beam search
86
+ num_beams=5,
87
+ early_stopping=True
88
+ )
89
+ en_text = tokenizer_vi2en.batch_decode(output_ids, skip_special_tokens=True)
90
+ en_text = " ".join(en_text)
91
+ return en_text
92
 
93
  DESCRIPTION="""CODE"""
94
 
 
115
  print(total_count)
116
  os.system("nvidia-smi")
117
  conversation = []
118
+ message = translate_vi2en(message)
119
  if system_prompt:
120
  conversation.append({"role": "system", "content": system_prompt})
121
  for user, assistant in chat_history: