Spaces:

EE21
/

ToS-Summarization

Running

File size: 1,991 Bytes

2f8382c
ccd2173
e09e77e
 
 
 
2eea40e
b8bd0f2
 
2eea40e
f25eb0c
 
e09e77e
2eea40e
1724904
e09e77e
a5dca46
2eea40e
f25eb0c
2eea40e
 
 
ada4af5
2f8382c
f25eb0c
 
8ba527e
2eea40e
e377fb1
2f8382c
f25eb0c
 
 
2eea40e
f25eb0c

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Load the fine-tuned BART tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify")
model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify")

# Load the BART-large-cnn tokenizer and model
tokenizer_cnn = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model_cnn = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

# Function to summarize using the fine-tuned BART model
def summarize_with_bart_ft(input_text):
    inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to summarize using bart-large-cnn model
def summarize_with_bart_cnn(input_text):
    inputs = tokenizer_cnn.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model_cnn.generate(inputs, max_length=300, min_length=100, num_beams=1, early_stopping=True, length_penalty=1)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to summarize using led-base-book-summary
def summarize_with_led(input_text):
    pipe_led = pipeline("summarization", model="pszemraj/led-base-book-summary")
    summary = pipe_led(input_text, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1)
    return summary[0]['summary_text']

# Function to summarize using long-t5-tglobal-base-sci-simplify
def summarize_with_t5(input_text):
    pipe_t5 = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify")
    summary = pipe_t5(input_text, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1)
    return summary[0]['summary_text']