Spaces:
Paused
Paused
File size: 1,643 Bytes
9e5c5bb 7d93f13 9e5c5bb e6eb31e 9e5c5bb 7d93f13 9e5c5bb 7d93f13 9e5c5bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import torch
from tqdm import tqdm
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
MODEL_NAME = "csebuetnlp/mT5_multilingual_XLSum"
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
def summarize(text):
input_ids = tokenizer(
[WHITESPACE_HANDLER(text)],
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512
)["input_ids"]
output_ids = model.generate(
input_ids=input_ids,
max_length=84,
no_repeat_ngram_size=2,
num_beams=4
)[0]
summary = tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
return summary
demo = gr.Blocks(title="⭐ Summ4rizer ⭐")
demo.encrypt = False
with demo:
gr.Markdown(f'''
<div>
<h1 style='text-align: center'>Text Summarizer</h1>
</div>
<div>
Using summarization Model from <a href='https://huggingface.co/{MODEL_NAME}' target='_blank'><b>{MODEL_NAME}</b></a>.
</div>
''')
text = gr.Textbox(label="Text here !!", lines=1, interactive=True)
summarize_btn = gr.Button("Let's Summarize",)
summarization = gr.Textbox()
html_output = gr.Markdown()
summarize_btn.click(summarize, [text], outputs=[html_output, summarization])
demo.launch() |