Sultannn commited on
Commit
fac3c34
β€’
1 Parent(s): c5410dc

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +100 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ #mT5 Deployment OtherLanguage2ID
4
+
5
+ # library
6
+ import gradio as gr
7
+ import tensorflow as tf
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
+
10
+
11
+ # function to run
12
+ def run_model(input_text,
13
+ min_length):
14
+
15
+ #mT5 Transformer
16
+ model_name = "csebuetnlp/mT5_m2m_crossSum_enhanced"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
18
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
19
+
20
+ #get id language
21
+ get_lang_id = lambda lang: tokenizer._convert_token_to_id(
22
+ model.config.task_specific_params["langid_map"][lang][1])
23
+
24
+ target_lang = "indonesian" # for a list of available language names see below / defaul define
25
+
26
+ # preprocessing text input
27
+ input_text = str(input_text)
28
+ input_text = ' '.join(input_text.split()) # hapus white space dan
29
+
30
+ #encode input to vector
31
+ input_ids = tokenizer(input_text,
32
+ return_tensors="pt",
33
+ padding="max_length",
34
+ truncation=True,
35
+ max_length=512)["input_ids"]
36
+
37
+ #generate input
38
+ output_ids = model.generate(
39
+ input_ids=input_ids,
40
+ decoder_start_token_id=get_lang_id(target_lang),
41
+ min_length=min_length,
42
+ max_length=512,
43
+ no_repeat_ngram_size=2,
44
+ repetition_penalty=1.5,
45
+ temperature= 0.5, # [0.8 if temper == "Creative" else 0.2 if temper == "Better" else 0.2][0],
46
+ early_stopping=True,
47
+ num_beams=4)[0]
48
+
49
+
50
+ #decode output to text
51
+ summary = tokenizer.decode(
52
+ output_ids,
53
+ skip_special_tokens=True,
54
+ clean_up_tokenization_spaces=False)
55
+
56
+ return ' '.join(summary.split(' ')[1:])# get output to str
57
+
58
+ # end
59
+
60
+ #example
61
+ # contoh = [["TAMPAN"]]
62
+
63
+ #judul
64
+ title = "Text Summarization ID"
65
+
66
+ #deskripsi
67
+ description = "Demo for Text Summarization ID. Models are mT5"
68
+
69
+ #footer
70
+ # article = "<p style='text-align: center'><a href='https://github.com/sultanbst123/Text_summarization-id2id' target='_blank'><u>Untuk penjelasan lihat di repo ku</u> 😁</a></p>"
71
+
72
+ #run gradio
73
+ gr.Interface(
74
+ fn=run_model,
75
+ #input text
76
+ inputs=[
77
+ gr.inputs.Textbox(
78
+ lines=3,
79
+ placeholder="Ketik disini...",
80
+ label="Text",
81
+ ),
82
+ gr.inputs.Slider(
83
+ minimum=100,
84
+ maximum=400,
85
+ step=10,
86
+ default=150,
87
+ label="Max Length(Maximal Sentence Length)",
88
+ ),
89
+ ],
90
+ #output text
91
+ outputs=
92
+ gr.outputs.Textbox(
93
+ label="Output text",
94
+ ),
95
+ title=title,
96
+ description=description,
97
+ # article=article,
98
+ # examples=contoh
99
+ ).launch(debug = True)
100
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.34.0
2
+ tensorflow==2.12.0
3
+ transformers==4.30.0