from faster_whisper import WhisperModel import os import requests import json from langchain.tools import tool from typing import TypedDict from langgraph.graph import StateGraph import gradio as gr from pydub import AudioSegment import tempfile MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY") MODEL = "mistral-large-latest" class State(TypedDict): file_path: str transcript: str summary_json: dict final_output: str def convert_to_wav(input_path: str) -> str: if input_path.lower().endswith(".wav"): return input_path # No conversion needed else: audio = AudioSegment.from_file(input_path) temp_wav_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") audio.export(temp_wav_file.name, format="wav") return temp_wav_file.name def transcribe_audio(state: State) -> State: model = WhisperModel("base", compute_type="float32") segments, _ = model.transcribe(state["file_path"], beam_size=5, language="en") transcript = " ".join(segment.text.strip() for segment in segments) return {**state, "transcript": transcript} def summarize_transcript(state: State) -> State: prompt = f""" You are an AI assistant summarizing meeting transcripts. Extract the following: 1. Attendees (Names only, if possible) 2. Key Discussion Points (bullet format) 3. Action Items (with owner and deadline, if mentioned) 4. Decisions Made (if any) Respond in this format: {{ "attendees": [...], "key_points": [...], "action_items": [...], "decisions": [...] }} Transcript: \"\"\"{state['transcript']}\"\"\" """ res = requests.post( "https://api.mistral.ai/v1/chat/completions", headers={ "Authorization": f"Bearer {MISTRAL_API_KEY}", "Content-Type": "application/json" }, json={ "model": MODEL, "messages": [{"role": "user", "content": prompt}], "temperature": 0.3 } ) content = res.json()['choices'][0]['message']['content'] return {**state, "summary_json": eval(content)} def format_output(state: State) -> State: summary = state["summary_json"] formatted = "๐Ÿ“Œ Attendees:\n" + "\n".join(f"- {name}" for name in summary["attendees"]) + "\n\n" formatted += "๐Ÿ’ฌ Key Points:\n" + "\n".join(f"- {pt}" for pt in summary["key_points"]) + "\n\n" formatted += "โœ… Action Items:\n" + "\n".join(f"- {item}" for item in summary["action_items"]) + "\n\n" formatted += "๐Ÿ“Ž Decisions:\n" + "\n".join(f"- {d}" for d in summary["decisions"]) return {**state, "final_output": formatted} # Graph setup builder = StateGraph(State) builder.add_node("transcribe_audio", transcribe_audio) builder.add_node("summarize_transcript", summarize_transcript) builder.add_node("format_output", format_output) builder.set_entry_point("transcribe_audio") builder.add_edge("transcribe_audio", "summarize_transcript") builder.add_edge("summarize_transcript", "format_output") graph = builder.compile() def process_meeting(file): print(f"Received file: {file}") wav_path = convert_to_wav(file) state = {"file_path": wav_path} print(f"State initialized: {state}") try: final_state = graph.invoke(state) print(f"Final State: {final_state}") return final_state["final_output"] except Exception as e: print(f"Error: {e}") return str(e) with gr.Blocks() as demo: gr.Markdown("### ๐Ÿค– MeetRecap - AI-Powered Meeting Summarizer") gr.Markdown(""" Upload your audio file (e.g., `.wav`, `.mp3`, `.m4a`) or record it. You'll receive a structured meeting summary including Attendees, Key Points, Action Items, and Decisions. """) with gr.Row(): with gr.Column(): audio_input = gr.Audio(type="filepath", label="๐ŸŽ™๏ธ Upload or Record Audio") submit_btn = gr.Button("๐Ÿ” Summarize") clear_btn = gr.Button("๐Ÿงน Clear") with gr.Column(): summary_output = gr.Textbox(label="๐Ÿ“ Meeting Summary", lines=20, max_lines=40) submit_btn.click(fn=process_meeting, inputs=audio_input, outputs=summary_output) clear_btn.click(lambda: (None, ""), outputs=[audio_input, summary_output]) demo.launch(share=True)