import gradio as gr from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer import pdfplumber from docx import Document import textract from langdetect import detect # Function to detect language from the text def detect_language(text): try: return detect(text) except Exception as e: print(f"Error detecting language: {e}") return None # Function to read document and extract text def read_document(file_path): text = "" try: if file_path.endswith('.pdf'): with pdfplumber.open(file_path) as pdf: for page in pdf.pages: text += page.extract_text() elif file_path.endswith('.docx'): doc = Document(file_path) for para in doc.paragraphs: text += para.text else: text = textract.process(file_path).decode() except Exception as e: print(f"An error occurred: {e}") return text # Function to get summarization model def get_summarizer(language): model_name = "facebook/bart-large-cnn" if language == "en" else "facebook/mbart-large-50" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) return pipeline("summarization", model=model, tokenizer=tokenizer) # Gradio interface function to summarize the document def summarize_document(file_info): try: # Read the uploaded file and extract text file_path = file_info["name"] text = read_document(file_path) if not text.strip(): return "The document is empty or could not be read." # Detect the language of the text language = detect_language(text) if not language: return "Language detection failed." # Get the appropriate summarizer model summarizer = get_summarizer(language) # Generate summary summary = summarizer(text, max_length=130, min_length=30, truncation=True) return summary[0]['summary_text'] except Exception as e: # This will print the error message and the traceback print(f"An error occurred: {e}") traceback.print_exc() return str(e) # Return the error message as output to the user # Gradio app interface iface = gr.Interface( fn=summarize_document, inputs=gr.File(label="Upload your document (PDF, DOCX, or TXT)", type="binary"), outputs="text", title="Document Summarizer", description="Upload your document and get a summarized version of its content. Currently supports English and French." ) # Run the Gradio app iface.launch()