potsawee commited on
Commit
ada39d6
1 Parent(s): a73088d

Add application file

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +84 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  title: Mt5 Translate Summ
3
- emoji: 😻
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.38.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Mt5 Translate Summ
3
+ emoji: 🎓
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.38.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import torch
4
+ from transformers import MT5Tokenizer, MT5ForConditionalGeneration
5
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+
7
+ tokenizer = MT5Tokenizer.from_pretrained("potsawee/mt5-english-thai-large-translation")
8
+ translator = MT5ForConditionalGeneration.from_pretrained("potsawee/mt5-english-thai-large-translation")
9
+ summarizer = MT5ForConditionalGeneration.from_pretrained("potsawee/mt5-english-thai-large-summarization")
10
+ translator.eval()
11
+ summarizer.eval()
12
+ translator.to(device)
13
+ summarizer.to(device)
14
+
15
+
16
+ # def generate_multiple_choice_question(
17
+ # context
18
+ # ):
19
+ # num_questions = 1
20
+ # question_item = question_generation_sampling(
21
+ # g1_model, g1_tokenizer,
22
+ # g2_model, g2_tokenizer,
23
+ # context, num_questions, device
24
+ # )[0]
25
+ # question = question_item['question']
26
+ # options = question_item['options']
27
+ # options[0] = f"{options[0]} [ANSWER]"
28
+ # random.shuffle(options)
29
+ # output_string = f"Question: {question}\n[A] {options[0]}\n[B] {options[1]}\n[C] {options[2]}\n[D] {options[3]}"
30
+ # return output_string
31
+ #
32
+ # demo = gr.Interface(
33
+ # fn=generate_multiple_choice_question,
34
+ # inputs=gr.Textbox(lines=8, placeholder="Context Here..."),
35
+ # outputs=gr.Textbox(lines=5, placeholder="Question: \n[A] \n[B] \n[C] \n[D] "),
36
+ # title="Multiple-choice Question Generator",
37
+ # description="Provide some context (e.g. news article or any passage) in the context box and click **Submit**. The models currently support English only. This demo is a part of MQAG - https://github.com/potsawee/mqag0.",
38
+ # allow_flagging='never'
39
+ # )
40
+
41
+ def generate_output(
42
+ task,
43
+ text,
44
+ ):
45
+ inputs = tokenizer(
46
+ [text],
47
+ padding="longest",
48
+ max_length=1024,
49
+ truncation=True,
50
+ return_tensors="pt",
51
+ ).to(device)
52
+ if task == 'Translation':
53
+ outputs = translator.generate(
54
+ **inputs,
55
+ max_new_tokens=256,
56
+ )
57
+ elif task == 'Summarization':
58
+ outputs = summarizer.generate(
59
+ **inputs,
60
+ max_new_tokens=256,
61
+ )
62
+ else:
63
+ raise ValueError("task undefined!")
64
+ gen_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
65
+ return gen_text
66
+
67
+ TASKS = ["Translation", "Summarization"]
68
+
69
+ demo = gr.Interface(
70
+ fn=generate_output,
71
+ inputs=[
72
+ gr.components.Radio(label="Task", choices=TASKS, value="Translation"),
73
+ gr.components.Textbox(label="Text (in English)", lines=10),
74
+ ],
75
+ outputs=gr.Textbox(label="Text (in Thai)", lines=4),
76
+ # examples=[["Building a translation demo with Gradio is so easy!", "eng_Latn", "spa_Latn"]],
77
+ cache_examples=False,
78
+ title="English🇬🇧 to Thai🇹🇭 | Translation or Summarization",
79
+ description="Provide some text (in English) & select one of the tasks (Translation or Summarization). Note that currently the model only supports text up to 1024 tokens. The base architecture is mt5-large with the embeddings filtered to only English and Thai tokens and fine-tuned to XSum (Eng2Thai) Dataset (https://huggingface.co/datasets/potsawee/xsum_eng2thai).",
80
+ allow_flagging='never'
81
+
82
+ )
83
+
84
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ torch>=1.10
2
+ transformers>=4.11.3