from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Load the fine-tuned BART tokenizer and model tokenizer = AutoTokenizer.from_pretrained("EE21/BART-ToSSimplify") model = AutoModelForSeq2SeqLM.from_pretrained("EE21/BART-ToSSimplify") # Load the BART-large-cnn tokenizer and model tokenizer_cnn = AutoTokenizer.from_pretrained("facebook/bart-large-cnn") model_cnn = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn") # Function to summarize using the fine-tuned BART model def summarize_with_bart_ft(input_text): inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model.generate(inputs, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Function to summarize using bart-large-cnn model def summarize_with_bart_cnn(input_text): inputs = tokenizer_cnn.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model_cnn.generate(inputs, max_length=300, min_length=100, num_beams=1, early_stopping=True, length_penalty=1) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Function to summarize using led-base-book-summary def summarize_with_led(input_text): pipe_led = pipeline("summarization", model="pszemraj/led-base-book-summary") summary = pipe_led(input_text, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1) return summary[0]['summary_text'] # Function to summarize using long-t5-tglobal-base-sci-simplify def summarize_with_t5(input_text): pipe_t5 = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify") summary = pipe_t5(input_text, max_length=300, min_length=100, num_beams=1, early_stopping=False, length_penalty=1) return summary[0]['summary_text']