File size: 1,242 Bytes
a5cb665
 
 
 
 
 
 
bfcb0af
a5cb665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

SAVED_MODEL_PATH = 'bart_base_full_finetune_save'
model_name = "facebook/bart-base"
model = AutoModelForSeq2SeqLM.from_pretrained(SAVED_MODEL_PATH).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset = load_dataset("samsum")

train_data = dataset["train"]
validation_data = dataset["validation"] 
test_data = dataset["test"]

def summarize(tokenizer, model, text):
    inputs = tokenizer(f"Summarize dialogue >>\n {text}", return_tensors="pt", max_length=1000, truncation=True, padding="max_length").to(device)
    summary_ids = model.generate(inputs.input_ids, num_beams=4, max_length=100, early_stopping=True)
    summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
    return summary[0]

def summarize_dialogue(text):
    return summarize(tokenizer, model, text)

iface = gr.Interface(
    fn=summarize_dialogue,
    inputs=gr.inputs.Textbox(lines=10, label="Input Dialogue"),
    outputs=gr.outputs.Textbox(label="Generated Summary")
)

iface.launch()