import gradio as gr import docx import PyPDF2 from pptx import Presentation from transformers import pipeline from docx import Document from io import BytesIO import tempfile # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis summarizer = pipeline("summarization", model="facebook/bart-large-cnn") rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True) sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") # Function to read content from different file types def read_file(file, file_type): content = "" try: if file_type == "docx": doc = Document(file) for para in doc.paragraphs: content += para.text + "\n" elif file_type == "txt": content = file.read().decode("utf-8") elif file_type == "pdf": pdf_reader = PyPDF2.PdfReader(file) for page in pdf_reader.pages: content += page.extract_text() + "\n" elif file_type == "pptx": prs = Presentation(file) for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): content += shape.text + "\n" except Exception as e: content = f"Error reading the file: {str(e)}" return content # Function to process the file and generate outputs def process_file(file, file_type, language="en"): content = read_file(file, file_type) # Check if content is not empty if not content.strip() or "Error" in content: return "Error: The document is empty or unsupported format.", None, None, None, None, None # Summarize the content try: summary = summarizer(content, max_length=150, min_length=50, do_sample=False) summary_text = summary[0]['summary_text'] except Exception as e: summary_text = f"Summary Error: {str(e)}" # Rephrase the entire content in manageable chunks rephrased_text = "" try: chunk_size = 500 content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)] for chunk in content_chunks: rephrased = rephraser(chunk) rephrased_text += rephrased[0]['generated_text'] + " " except Exception as e: rephrased_text = f"Rephrase Error: {str(e)}" # Sentiment analysis try: sentiment = sentiment_analyzer(content[:512]) sentiment_text = sentiment[0]['label'] except Exception as e: sentiment_text = f"Sentiment Analysis Error: {str(e)}" # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method) keywords = ' '.join([word for word in content.split()[:10]]) # Saving processed file (for download link) try: with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file: temp_file.write(content.encode('utf-8')) processed_file_path = temp_file.name except Exception as e: processed_file_path = f"Error saving processed document: {str(e)}" return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path # Define the functions for the different pages def home_page(): with gr.Blocks() as home: # Header gr.Markdown("## Upload a Document to Process") # Menu bar as buttons with gr.Row(): home_btn = gr.Button("Home") full_analysis_btn = gr.Button("Full Analysis", variant="primary") # Display content on home page gr.Markdown("Welcome to the Document Processor!") gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.") # File upload and content output file_input = gr.File(label="Upload Document") content_output = gr.Textbox(label="Original Content") rephrased_output = gr.Textbox(label="Rephrased Content") def on_file_upload(file): if not file: return "No file uploaded.", None content, rephrased, _, _, _, _ = process_file(file, file_type="docx") return content, rephrased # Process file on upload file_input.change(on_file_upload, inputs=file_input, outputs=[content_output, rephrased_output]) return home def detailed_page(): with gr.Blocks() as detailed: # Header gr.Markdown("## Detailed Analysis Page") # Menu bar as buttons with gr.Row(): home_btn = gr.Button("Home", variant="primary") full_analysis_btn = gr.Button("Full Analysis") # File upload and processing components file_input = gr.File(label="Upload Document") file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type") keywords_output = gr.Textbox(label="Keywords") sentiment_output = gr.Textbox(label="Sentiment Analysis") download_link = gr.File(label="Download Processed Document") def on_file_upload(file, file_type): if not file: return "No file uploaded.", None, None, None _, _, _, sentiment, keywords, download_path = process_file(file, file_type) return keywords, sentiment, download_path # Process file on upload file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[keywords_output, sentiment_output, download_link]) # Sample output or content for the detailed analysis page gr.Markdown("Here you will see detailed analysis outputs after document upload.") return detailed # Main application interface with tabbed navigation iface = gr.TabbedInterface([home_page(), detailed_page()], ["Home", "Full Analysis"]) iface.launch()