Spaces:

Sultannn
/

Text_Summarize_ID

Runtime error

App Files Files Community

Sultannn commited on Jun 9, 2023

Commit

fac3c34

•

1 Parent(s): c5410dc

Upload 2 files

Browse files

Files changed (2) hide show

app.py +100 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+#mT5 Deployment OtherLanguage2ID
+# library
+import gradio as gr
+import tensorflow as tf
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# function to run
+def run_model(input_text,
+              min_length):
+    #mT5 Transformer
+    model_name = "csebuetnlp/mT5_m2m_crossSum_enhanced"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    #get id language
+    get_lang_id = lambda lang: tokenizer._convert_token_to_id(
+    model.config.task_specific_params["langid_map"][lang][1])
+    target_lang = "indonesian" # for a list of available language names see below / defaul define
+    # preprocessing text input
+    input_text = str(input_text)
+    input_text = ' '.join(input_text.split()) # hapus white space dan
+    #encode input to vector
+    input_ids = tokenizer(input_text,
+                          return_tensors="pt",
+                          padding="max_length",
+                          truncation=True,
+                          max_length=512)["input_ids"]
+    #generate input
+    output_ids = model.generate(
+        input_ids=input_ids,
+        decoder_start_token_id=get_lang_id(target_lang),
+        min_length=min_length,
+        max_length=512,
+        no_repeat_ngram_size=2,
+        repetition_penalty=1.5,
+        temperature= 0.5, # [0.8 if temper == "Creative" else 0.2 if temper == "Better" else 0.2][0],
+        early_stopping=True,
+        num_beams=4)[0]
+    #decode output to text
+    summary = tokenizer.decode(
+        output_ids,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False)
+    return ' '.join(summary.split(' ')[1:])# get output to str
+# end
+#example
+# contoh = [["TAMPAN"]]
+#judul
+title = "Text Summarization ID"
+#deskripsi
+description = "Demo for Text Summarization ID. Models are mT5"
+#footer
+# article = "<p style='text-align: center'><a href='https://github.com/sultanbst123/Text_summarization-id2id' target='_blank'><u>Untuk penjelasan lihat di repo ku</u> 😁</a></p>"
+#run gradio
+gr.Interface(
+    fn=run_model,
+    #input text
+    inputs=[
+        gr.inputs.Textbox(
+            lines=3,
+            placeholder="Ketik disini...",
+            label="Text",
+        ),
+        gr.inputs.Slider(
+           minimum=100,
+           maximum=400,
+           step=10,
+           default=150,
+           label="Max Length(Maximal Sentence Length)",
+       ),
+    ],
+    #output text
+    outputs=
+    gr.outputs.Textbox(
+            label="Output text",
+    ),
+    title=title,
+    description=description,
+    # article=article,
+    # examples=contoh
+    ).launch(debug = True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.34.0
+tensorflow==2.12.0
+transformers==4.30.0