import gradio as gr from PyPDF2 import PdfReader from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from gtts import gTTS from io import BytesIO import pydub from pydub import AudioSegment import base64 import re model_name = "ArtifactAI/led_large_16384_arxiv_summarization" model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def extract_first_sentence(text): """ Extracts the first sentence from the given text. """ sentences = re.split(r'(?