Spaces:

turanhasan
/

BoraAk

Sleeping

File size: 19,902 Bytes

import streamlit as st
import os
import google.generativeai as genai
from google.ai.generativelanguage_v1beta.types import content
import json
from tempfile import NamedTemporaryFile
from datetime import datetime
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
import smtplib
from email.mime.text import MIMEText
from streamlit_mic_recorder import mic_recorder
import wave

# Install streamlit-mic-recorder if not already installed:
# pip install streamlit-mic-recorder

# Initialize session state for chat history if it doesn't exist
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

if 'diarization_output' not in st.session_state:
    st.session_state.diarization_output = None

if 'uploaded_file' not in st.session_state:
    st.session_state.uploaded_file = None

if 'language' not in st.session_state:
    st.session_state.language = "English"

if 'num_speakers' not in st.session_state:
    st.session_state.num_speakers = 2

if 'summary_output' not in st.session_state:
    st.session_state.summary_output = None

if 'key_decisions_output' not in st.session_state:
    st.session_state.key_decisions_output = None

if 'email_sent_message' not in st.session_state:
    st.session_state.email_sent_message = ""

if 'recorded_audio' not in st.session_state:
    st.session_state.recorded_audio = None


# Configuration for the page
st.set_page_config(
    page_title="AI Meeting Notes & Reporting",
    layout="wide"
)

# Function to generate PDF report
def generate_pdf_report(meeting_date, summary, key_decisions, transcription):
    buffer = io.BytesIO()
    p = canvas.Canvas(buffer, pagesize=letter)
    p.setFont("Helvetica-Bold", 16)
    p.drawString(inch, 10.5*inch, "Meeting Report")
    p.setFont("Helvetica", 12)
    p.drawString(inch, 10*inch, f"Date: {meeting_date.strftime('%Y-%m-%d')}")

    y_position = 9.5*inch

    p.setFont("Helvetica-Bold", 12)
    p.drawString(inch, y_position, "Summary:")
    y_position -= 0.3*inch
    p.setFont("Helvetica", 10)
    summary_lines = summary.split('\n')
    for line in summary_lines:
        p.drawString(inch, y_position, line)
        y_position -= 0.2*inch
        if y_position < 1*inch: # Simple page break to avoid content overflow - improve if needed
            p.showPage()
            y_position = 10.5*inch
            p.setFont("Helvetica", 10)


    if key_decisions:
        p.setFont("Helvetica-Bold", 12)
        p.drawString(inch, y_position, "Key Decisions:")
        y_position -= 0.3*inch
        p.setFont("Helvetica", 10)
        key_decisions_list = key_decisions.strip().split('\n')
        for decision in key_decisions_list:
            if decision.strip():
                p.drawString(inch, y_position, f"- {decision.strip()}")
                y_position -= 0.2*inch
                if y_position < 1*inch: # Simple page break
                    p.showPage()
                    y_position = 10.5*inch
                    p.setFont("Helvetica", 10)

    if transcription:
        p.setFont("Helvetica-Bold", 12)
        p.drawString(inch, y_position, "Transcription:")
        y_position -= 0.3*inch
        p.setFont("Helvetica", 8) # Smaller font for transcription
        transcription_lines = transcription.split('\n')
        for line in transcription_lines:
            p.drawString(inch, y_position, line)
            y_position -= 0.15*inch # Reduced line spacing for transcription
            if y_position < 1*inch: # Simple page break
                p.showPage()
                y_position = 10.5*inch
                p.setFont("Helvetica", 8)


    p.save()
    pdf_out = buffer.getvalue()
    buffer.close()
    return pdf_out

def send_email_report(email_address, meeting_date, summary, key_decisions, transcription):
    smtp_server = os.environ.get("SMTP_SERVER")
    smtp_port = os.environ.get("SMTP_PORT")
    smtp_username = os.environ.get("SMTP_USERNAME")
    smtp_password = os.environ.get("SMTP_PASSWORD")
    sender_email = smtp_username # For simplicity, assuming sender is the same as username

    if not all([smtp_server, smtp_port, smtp_username, smtp_password, sender_email]):
        return False, "SMTP configuration is missing. Please set environment variables: SMTP_SERVER, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD."

    subject = f"Meeting Report - {meeting_date.strftime('%Y-%m-%d')}"
    body = f"Meeting Date: {meeting_date.strftime('%Y-%m-%d')}\n\nSummary:\n{summary}\n\nKey Decisions:\n{key_decisions}\n\nTranscription:\n{transcription}"

    msg = MIMEText(body)
    msg['Subject'] = subject
    msg['From'] = sender_email
    msg['To'] = email_address

    try:
        with smtplib.SMTP(smtp_server, smtp_port) as server:
            server.starttls()
            server.login(smtp_username, smtp_password)
            server.sendmail(sender_email, email_address, msg.as_string())
        return True, "Email sent successfully!"
    except Exception as e:
        return False, f"Email sending failed: {e}"


# Main UI
st.title("AI Meeting Notes & Reporting")

# Meeting Date & Time
meeting_date_time = st.date_input("Meeting Date & Time", datetime.today())

# Number of speakers
num_speakers = st.number_input("Number of speakers", min_value=1, max_value=10, value=st.session_state.num_speakers)
st.session_state.num_speakers = num_speakers # Update session state

# Language selection
language = st.selectbox(
    "Language of report",
    ["English", "Turkish", "Spanish", "French", "German"],
    index=["English", "Turkish", "Spanish", "French", "German"].index(st.session_state.language) if st.session_state.language in ["English", "Turkish", "Spanish", "French", "German"] else 0
)
st.session_state.language = language # Update session state

# File upload
uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])

# Voice recording
audio_bytes = mic_recorder(start_prompt="Record", stop_prompt="Stop recording", key='recorder')

if audio_bytes:
    if isinstance(audio_bytes, dict) and "bytes" in audio_bytes: # Check if audio_bytes is a dict and has 'bytes' key
        st.audio(audio_bytes["bytes"], format="audio/wav")
        st.session_state.recorded_audio = audio_bytes["bytes"]
    else: # If not a dict or doesn't have 'bytes' key, assume it's raw bytes (fallback, might need adjustment)
        st.audio(audio_bytes, format="audio/wav")
        st.session_state.recorded_audio = audio_bytes


# Diarization, Summarization and Key Decisions logic - Automatically after upload or record
process_audio = False
audio_source_indicator = ""

if uploaded_file and uploaded_file != st.session_state.uploaded_file: # Check if a new file is uploaded
    st.session_state.uploaded_file = uploaded_file # Update session state
    st.session_state.recorded_audio = None # Reset recorded audio
    process_audio = True
    audio_source_indicator = f"Processing uploaded file: {uploaded_file.name}"
elif st.session_state.recorded_audio and st.session_state.recorded_audio != getattr(st.session_state.get('last_recorded_audio_hash'), 'value', None): # Check if new recording
    st.session_state.last_recorded_audio_hash = st.session_state.recorded_audio # Store hash to detect new recordings
    st.session_state.uploaded_file = None # Reset uploaded file
    process_audio = True
    audio_source_indicator = "Processing recorded audio"


if process_audio:
    st.session_state.diarization_output = None # Reset previous diarization output
    st.session_state.summary_output = None # Reset previous summary output
    st.session_state.key_decisions_output = None # Reset previous key decisions output
    st.session_state.chat_history = [] # Clear chat history for new file
    st.session_state.email_sent_message = "" # Clear email sent message

    with st.spinner(f"Processing audio and generating summary and key decisions... {audio_source_indicator}"):
        temp_path = None
        try:
            # Configure Gemini
            genai.configure(api_key=os.environ["GEMINI_API_KEY"])

            if st.session_state.uploaded_file: # Process uploaded file
                # Save uploaded file temporarily
                with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: # Assuming mp3 for wider compatibility, could adjust based on uploaded file type
                    tmp_file.write(st.session_state.uploaded_file.getvalue())
                    temp_path = tmp_file.name
                    mime_type = "audio/mpeg" # Assuming mp3, adjust if needed based on file type
                    gemini_file = genai.upload_file(temp_path, mime_type=mime_type)

            elif st.session_state.recorded_audio: # Process recorded audio
                # Save recorded audio temporarily (WAV from mic_recorder) and convert to MP3 if needed for Gemini
                with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file_wav:
                    tmp_file_wav.write(st.session_state.recorded_audio)
                    temp_path = tmp_file_wav.name
                    gemini_file = genai.upload_file(temp_path, mime_type="audio/wav") # Assuming WAV is directly compatible


            # --- Diarization ---
            diarization_config = {
                "temperature": 0.5,
                "top_p": 0.95, #0.95
                "top_k": 40,
                "max_output_tokens": 8192,
                "response_mime_type": "application/json",
            }

            diarization_model = genai.GenerativeModel(
                model_name="gemini-2.0-flash-exp",
                generation_config=diarization_config,
                safety_settings={
                    'HATE': 'BLOCK_NONE',
                    'HARASSMENT': 'BLOCK_NONE',
                    'SEXUAL': 'BLOCK_NONE',
                    'DANGEROUS': 'BLOCK_NONE'
                }
            )

            chat_session_diarization = diarization_model.start_chat(
                history=[{"role": "user", "parts": [gemini_file]}]
            )

            response_diarization = chat_session_diarization.send_message(
                f"Generate meeting diarization of the meeting audio record provided in the file. "
                f"The meeting may be in a foreign language, expect a mixture of words in local language "
                f"and words in english. Provided audio has {num_speakers} speakers. "
                f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
                f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
                f"speech (transcription). Do not transcribe filler words."
            )

            json_data_diarization = json.loads(response_diarization.text)
            formatted_output = ""
            for item in json_data_diarization:
                formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"

            st.session_state.diarization_output = formatted_output

            # --- Summarization ---
            summarization_config = {
                "temperature": 0.25,
                "top_p": 0.95,
                "top_k": 40,
                "max_output_tokens": 8192,
                "response_schema": content.Schema(
                    type=content.Type.OBJECT,
                    enum=[],
                    required=["summary"],
                    properties={
                        "summary": content.Schema(
                            type=content.Type.STRING,
                        ),
                    },
                ),
                "response_mime_type": "application/json",
            }

            summarization_model = genai.GenerativeModel(
                model_name="gemini-2.0-flash-exp",
                generation_config=summarization_config,
                safety_settings={
                    'HATE': 'BLOCK_NONE',
                    'HARASSMENT': 'BLOCK_NONE',
                    'SEXUAL': 'BLOCK_NONE',
                    'DANGEROUS': 'BLOCK_NONE'
                }
            )

            chat_session_summarization = summarization_model.start_chat(
                history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
            )

            response_summarization = chat_session_summarization.send_message(
                f"Generate a detailed summarization of the meeting, provide information on "
                f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
                f"Provide structured output with only one tag 'summary'. Generate response in {language}."
            )

            json_data_summarization = json.loads(response_summarization.text)
            summary = json_data_summarization.get('summary', "No summary found.")
            st.session_state.summary_output = summary
            st.session_state.chat_history.append(("Summary", summary))

            # --- Key Decisions ---
            key_decisions_config = {
                "temperature": 0.25,
                "top_p": 0.95,
                "top_k": 40,
                "max_output_tokens": 8192,
                "response_schema": content.Schema(
                    type=content.Type.OBJECT,
                    enum=[],
                    required=["key_decisions"],
                    properties={
                        "key_decisions": content.Schema(
                            type=content.Type.STRING,
                        ),
                    },
                ),
                "response_mime_type": "application/json",
            }

            key_decisions_model = genai.GenerativeModel(
                model_name="gemini-2.0-flash-exp",
                generation_config=key_decisions_config,
                safety_settings={
                    'HATE': 'BLOCK_NONE',
                    'HARASSMENT': 'BLOCK_NONE',
                    'SEXUAL': 'BLOCK_NONE',
                    'DANGEROUS': 'BLOCK_NONE'
                }
            )

            chat_session_key_decisions = key_decisions_model.start_chat(
                history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
            )

            response_key_decisions = chat_session_key_decisions.send_message(
                f"Identify and list the key decisions made during the meeting. "
                f"Generate response in {language}."
            )

            json_data_key_decisions = json.loads(response_key_decisions.text)
            key_decisions = json_data_key_decisions.get('key_decisions', "No key decisions found.")
            st.session_state.key_decisions_output = key_decisions


        except Exception as e:
            st.error(f"Error processing audio: {str(e)}")
        finally:
            # Clean up temp file if created
            if temp_path:
                os.unlink(temp_path)

# Diarization output display
if st.session_state.diarization_output:
    st.subheader("Diarization Output")
    st.text_area("Transcript", st.session_state.diarization_output, height=300)

# Summary output
if st.session_state.summary_output:
    st.subheader("Summary")
    st.write(st.session_state.summary_output)

# Key decisions output
if st.session_state.key_decisions_output:
    st.subheader("Key decisions")
    key_decisions_list = st.session_state.key_decisions_output.strip().split('\n') # Split by newline
    for decision in key_decisions_list:
        if decision.strip(): # make sure decision is not empty
            st.markdown(f"- {decision.strip()}")


# Generate PDF Report button
if st.button("Generate PDF report"):
    if st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output:
        pdf_bytes = generate_pdf_report(
            meeting_date_time,
            st.session_state.summary_output,
            st.session_state.key_decisions_output,
            st.session_state.diarization_output
        )
        st.download_button(
            label="Download PDF Report",
            data=pdf_bytes,
            file_name="meeting_report.pdf",
            mime="application/pdf"
        )
    else:
        st.warning("Please upload or record audio to generate report.")

# Q&A section
if st.session_state.diarization_output:
    st.subheader("Question Answering")
    question = st.text_input("Type in your question")
    if st.button("Send"):
        if question:
            # Add user question to chat history
            st.session_state.chat_history.append(("User", question))

            with st.spinner("Generating response..."):
                try:
                    # Configure QnA model
                    qna_config = {
                        "temperature": 0.25,
                        "top_p": 0.95,
                        "top_k": 40,
                        "max_output_tokens": 8192,
                        "response_schema": content.Schema(
                            type=content.Type.OBJECT,
                            enum=[],
                            required=["answer"],
                            properties={
                                "answer": content.Schema(
                                    type=content.Type.STRING,
                                ),
                            },
                        ),
                        "response_mime_type": "application/json",
                    }

                    qna_model = genai.GenerativeModel(
                        model_name="gemini-2.0-flash-exp",
                        generation_config=qna_config,
                        safety_settings={
                            'HATE': 'BLOCK_NONE',
                            'HARASSMENT': 'BLOCK_NONE',
                            'SEXUAL': 'BLOCK_NONE',
                            'DANGEROUS': 'BLOCK_NONE'
                        }
                    )

                    # Generate answer
                    chat_session_qna = qna_model.start_chat(
                        history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
                    )

                    response_qna = chat_session_qna.send_message(
                        f"Answer the following question based on the meeting: {question}. Generate response in {language}."
                        f"Provide structured output with only one tag 'answer'."
                    )

                    json_data_qna = json.loads(response_qna.text)
                    answer = json_data_qna.get('answer', "No answer found.")

                    # Add bot response to chat history
                    st.session_state.chat_history.append(("Bot", answer))
                    st.rerun() # Rerun to update the chat display

                except Exception as e:
                    st.error(f"Error generating answer: {str(e)}")

# Chat history display for Q&A
for role, message in st.session_state.chat_history:
    if role == "User":
        st.write(f"**Question**: {message}")
    elif role == "Bot":
        st.write(f"**Answer**: {message}")

# Email input and Send Report button
st.subheader("Share Report")
email_address = st.text_input("Email address:")

send_button = st.button("Send Report")
if send_button:
    if not email_address:
        st.warning("Please enter an email address.")
    elif not (st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output):
        st.warning("Please upload or record audio and generate report first.")
    else:
        success, message = send_email_report(
            email_address,
            meeting_date_time,
            st.session_state.summary_output,
            st.session_state.key_decisions_output,
            st.session_state.diarization_output
        )
        if success:
            st.success(message)
        else:
            st.error(message)