Spaces:
Sleeping
Sleeping
| """ | |
| Meeting Summarizer - Applicazione Gradio | |
| Web app per l'analisi e sintesi automatica di meeting tramite GPT-4o-mini. | |
| """ | |
| import os | |
| import tempfile | |
| import shutil | |
| import gradio as gr | |
| from typing import Tuple, Optional | |
| # Import moduli locali | |
| from utils.text_extraction import extract_text, get_supported_extensions | |
| from utils.transcription import transcribe_audio, is_audio_file, get_supported_audio_extensions | |
| from utils.llm_analysis import analyze_meeting, format_analysis_for_display | |
| from utils.pdf_generator import generate_pdf, cleanup_temp_pdf | |
| from utils.data_persistence import save_meeting_to_dataset | |
| # Configurazione logging | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Variabili globali per file temporanei | |
| _temp_files = [] | |
| def process_meeting(file, api_key: str, hf_token: str = "") -> Tuple[str, str, str, str, str]: | |
| """ | |
| Processa un file di meeting e restituisce l'analisi completa. | |
| Args: | |
| file: File caricato dall'utente | |
| api_key (str): Chiave API OpenAI | |
| hf_token (str): Token Hugging Face (opzionale) | |
| Returns: | |
| Tuple[str, str, str, str, str]: (summary, topics, keywords, pdf_path, message) | |
| """ | |
| global _temp_files | |
| try: | |
| # Verifica input e debug | |
| logger.info(f"DEBUG: file ricevuto tipo={type(file)} valore={file}") | |
| if not file: | |
| return "", "", "", None, "β Error: No file uploaded" | |
| if not api_key: | |
| return "", "", "", None, "β Error: OpenAI API key required" | |
| # Gestisci l'oggetto file di Gradio | |
| # Nelle versioni recenti di Gradio, il file puΓ² essere una stringa (path) o un oggetto | |
| if isinstance(file, str): | |
| file_path = file | |
| else: | |
| # Se Γ¨ un oggetto file, estrai il path | |
| file_path = file.name if hasattr(file, 'name') else str(file) | |
| logger.info(f"DEBUG: file_path={file_path} exists={os.path.exists(file_path)} isfile={os.path.isfile(file_path) if os.path.exists(file_path) else 'N/A'}") | |
| # Verify that the file exists and is a file (not a directory) | |
| if not os.path.exists(file_path): | |
| return "", "", "", None, "β Error: File not found or invalid" | |
| if not os.path.isfile(file_path): | |
| return "", "", "", None, f"β Error: Path is a directory, not a file: {file_path}" | |
| # Estrai testo dal file | |
| text = "" | |
| file_name = os.path.basename(file_path) | |
| logger.info(f"Processamento file: {file_name}") | |
| logger.info(f"Percorso file: {file_path}") | |
| # Determine if it's an audio file | |
| if is_audio_file(file_name): | |
| logger.info("Audio file detected, starting transcription...") | |
| text = transcribe_audio(file_path) | |
| if not text: | |
| return "", "", "", None, "β Error: Transcription failed" | |
| logger.info("Transcription completed") | |
| else: | |
| # Extract text from document | |
| logger.info("Document file detected, extracting text...") | |
| text = extract_text(file_path) | |
| if not text: | |
| return "", "", "", None, "β Error: Text extraction failed" | |
| logger.info("Text extraction completed") | |
| # Verify that the text is not empty | |
| if not text.strip(): | |
| return "", "", "", None, "β Error: No text extracted from file" | |
| # Analyze with GPT-4o-mini | |
| logger.info("Starting analysis with GPT-4o-mini...") | |
| analysis = analyze_meeting(text, api_key) | |
| if not analysis: | |
| return "", "", "", None, "β Error: Analysis failed" | |
| # Formatta per display | |
| formatted_analysis = format_analysis_for_display(analysis) | |
| # Generate PDF | |
| logger.info("Generating PDF...") | |
| pdf_path = generate_pdf(analysis) | |
| # Debug: verify that pdf_path is valid | |
| if pdf_path: | |
| logger.info(f"PDF generated: {pdf_path}") | |
| logger.info(f"PDF exists: {os.path.exists(pdf_path)}") | |
| logger.info(f"PDF is a file: {os.path.isfile(pdf_path)}") | |
| # If not a valid file, set to None | |
| if not os.path.isfile(pdf_path): | |
| logger.warning(f"PDF path invalid: {pdf_path}") | |
| pdf_path = None | |
| # Save to dataset if token provided | |
| if hf_token: | |
| logger.info("Saving to Hugging Face Dataset...") | |
| meeting_data = { | |
| "file_name": file_name, | |
| "transcription": text, | |
| "summary": analysis.get("summary", ""), | |
| "topics": analysis.get("topics", []), | |
| "keywords": analysis.get("keywords", []) | |
| } | |
| if save_meeting_to_dataset(meeting_data, hf_token): | |
| logger.info("Meeting saved to HF Dataset") | |
| else: | |
| logger.warning("Saving to HF Dataset failed") | |
| # Add PDF to temporary files for cleanup | |
| if pdf_path: | |
| _temp_files.append(pdf_path) | |
| # Success message | |
| success_msg = f"β Meeting analyzed successfully!\n\nπ File: {file_name}\nπ Characters analyzed: {len(text)}\nπ Topics identified: {len(analysis.get('topics', []))}\nπ Keywords: {len(analysis.get('keywords', []))}" | |
| if hf_token: | |
| success_msg += "\nπΎ Data saved to Hugging Face Dataset" | |
| return ( | |
| formatted_analysis["summary"], | |
| formatted_analysis["topics"], | |
| formatted_analysis["keywords"], | |
| pdf_path if pdf_path else None, | |
| success_msg | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error during processing: {str(e)}") | |
| return "", "", "", None, f"β Error: {str(e)}" | |
| def cleanup_temp_files(): | |
| """Clean up temporary files.""" | |
| global _temp_files | |
| for file_path in _temp_files: | |
| if os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| except Exception as e: | |
| logger.warning(f"Unable to delete {file_path}: {str(e)}") | |
| _temp_files.clear() | |
| def create_interface(): | |
| """Create the Gradio interface.""" | |
| # Supported extensions | |
| supported_extensions = get_supported_extensions() + get_supported_audio_extensions() | |
| with gr.Blocks( | |
| title="Meeting Summarizer", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .success-message { | |
| background-color: #d4edda; | |
| border: 1px solid #c3e6cb; | |
| color: #155724; | |
| padding: 10px; | |
| border-radius: 5px; | |
| } | |
| """ | |
| ) as app: | |
| gr.Markdown( | |
| """ | |
| # π― Meeting Summarizer | |
| Upload a meeting file (audio, PDF, DOCX, TXT) and automatically get: | |
| - π **Complete summary** of the meeting | |
| - π·οΈ **Main topics** discussed | |
| - π **Relevant keywords** | |
| - π **Downloadable PDF** with all results | |
| --- | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Input file | |
| file_input = gr.File( | |
| label="π Upload Meeting File", | |
| file_types=supported_extensions, | |
| file_count="single" | |
| ) | |
| # API Key OpenAI | |
| api_key_input = gr.Textbox( | |
| label="π OpenAI API Key", | |
| placeholder="Enter your OpenAI API key...", | |
| type="password", | |
| info="Required for analysis with GPT-4o-mini" | |
| ) | |
| # HF Token (optional) | |
| hf_token_input = gr.Textbox( | |
| label="π€ Hugging Face Token (Optional)", | |
| placeholder="Enter your HF token to save data...", | |
| type="password", | |
| info="Optional: to save results to Hugging Face Dataset" | |
| ) | |
| # Analyze button | |
| analyze_btn = gr.Button( | |
| "π Analyze Meeting", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Status message | |
| status_msg = gr.Textbox( | |
| label="π Status", | |
| interactive=False, | |
| visible=True | |
| ) | |
| with gr.Column(scale=2): | |
| # Output summary | |
| summary_output = gr.Markdown( | |
| label="π Meeting Summary", | |
| value="The summary will appear here after analysis..." | |
| ) | |
| # Output topics | |
| topics_output = gr.Markdown( | |
| label="π·οΈ Main Topics", | |
| value="The main topics will appear here..." | |
| ) | |
| # Output keywords | |
| keywords_output = gr.Markdown( | |
| label="π Keywords", | |
| value="The keywords will appear here..." | |
| ) | |
| # Download PDF | |
| pdf_download = gr.File( | |
| label="π Download PDF Report", | |
| visible=True | |
| ) | |
| # Footer | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### βΉοΈ Information | |
| **Supported formats:** | |
| - π΅ **Audio**: MP3, WAV, M4A, FLAC, OGG | |
| - π **Documents**: PDF, DOCX, TXT | |
| **Features:** | |
| - π€ Automatic audio transcription with Whisper | |
| - π€ Intelligent analysis with GPT-4o-mini | |
| - π Topic and keyword extraction | |
| - πΎ Save to Hugging Face Datasets | |
| - π Professional PDF generation | |
| **Notes:** | |
| - Audio files are automatically transcribed | |
| - Analysis is optimized for meetings | |
| - Data is saved only if you provide an HF token | |
| """ | |
| ) | |
| # Eventi | |
| analyze_btn.click( | |
| fn=process_meeting, | |
| inputs=[file_input, api_key_input, hf_token_input], | |
| outputs=[summary_output, topics_output, keywords_output, pdf_download, status_msg], | |
| show_progress=True | |
| ) | |
| # Cleanup al chiudere | |
| app.unload(cleanup_temp_files) | |
| return app | |
| def main(): | |
| """Main function.""" | |
| logger.info("Starting Meeting Summarizer...") | |
| # Create interface | |
| app = create_interface() | |
| # Launch server | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True, | |
| quiet=False | |
| ) | |
| if __name__ == "__main__": | |
| main() | |