Spaces:
Running
Running
from faster_whisper import WhisperModel | |
import os | |
import requests | |
import json | |
from langchain.tools import tool | |
from typing import TypedDict | |
from langgraph.graph import StateGraph | |
import gradio as gr | |
from pydub import AudioSegment | |
import tempfile | |
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY") | |
MODEL = "mistral-large-latest" | |
class State(TypedDict): | |
file_path: str | |
transcript: str | |
summary_json: dict | |
final_output: str | |
def convert_to_wav(input_path: str) -> str: | |
if input_path.lower().endswith(".wav"): | |
return input_path # No conversion needed | |
else: | |
audio = AudioSegment.from_file(input_path) | |
temp_wav_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
audio.export(temp_wav_file.name, format="wav") | |
return temp_wav_file.name | |
def transcribe_audio(state: State) -> State: | |
model = WhisperModel("base", compute_type="float32") | |
segments, _ = model.transcribe(state["file_path"], beam_size=5, language="en") | |
transcript = " ".join(segment.text.strip() for segment in segments) | |
return {**state, "transcript": transcript} | |
def summarize_transcript(state: State) -> State: | |
prompt = f""" | |
You are an AI assistant summarizing meeting transcripts. | |
Extract the following: | |
1. Attendees (Names only, if possible) | |
2. Key Discussion Points (bullet format) | |
3. Action Items (with owner and deadline, if mentioned) | |
4. Decisions Made (if any) | |
Respond in this format: | |
{{ | |
"attendees": [...], | |
"key_points": [...], | |
"action_items": [...], | |
"decisions": [...] | |
}} | |
Transcript: | |
\"\"\"{state['transcript']}\"\"\" | |
""" | |
res = requests.post( | |
"https://api.mistral.ai/v1/chat/completions", | |
headers={ | |
"Authorization": f"Bearer {MISTRAL_API_KEY}", | |
"Content-Type": "application/json" | |
}, | |
json={ | |
"model": MODEL, | |
"messages": [{"role": "user", "content": prompt}], | |
"temperature": 0.3 | |
} | |
) | |
content = res.json()['choices'][0]['message']['content'] | |
return {**state, "summary_json": eval(content)} | |
def format_output(state: State) -> State: | |
summary = state["summary_json"] | |
formatted = "π Attendees:\n" + "\n".join(f"- {name}" for name in summary["attendees"]) + "\n\n" | |
formatted += "π¬ Key Points:\n" + "\n".join(f"- {pt}" for pt in summary["key_points"]) + "\n\n" | |
formatted += "β Action Items:\n" + "\n".join(f"- {item}" for item in summary["action_items"]) + "\n\n" | |
formatted += "π Decisions:\n" + "\n".join(f"- {d}" for d in summary["decisions"]) | |
return {**state, "final_output": formatted} | |
# Graph setup | |
builder = StateGraph(State) | |
builder.add_node("transcribe_audio", transcribe_audio) | |
builder.add_node("summarize_transcript", summarize_transcript) | |
builder.add_node("format_output", format_output) | |
builder.set_entry_point("transcribe_audio") | |
builder.add_edge("transcribe_audio", "summarize_transcript") | |
builder.add_edge("summarize_transcript", "format_output") | |
graph = builder.compile() | |
def process_meeting(file): | |
print(f"Received file: {file}") | |
wav_path = convert_to_wav(file) | |
state = {"file_path": wav_path} | |
print(f"State initialized: {state}") | |
try: | |
final_state = graph.invoke(state) | |
print(f"Final State: {final_state}") | |
return final_state["final_output"] | |
except Exception as e: | |
print(f"Error: {e}") | |
return str(e) | |
with gr.Blocks() as demo: | |
gr.Markdown("### π€ MeetRecap - AI-Powered Meeting Summarizer") | |
gr.Markdown(""" | |
Upload your audio file (e.g., `.wav`, `.mp3`, `.m4a`) or record it. You'll receive a structured meeting summary including Attendees, Key Points, Action Items, and Decisions. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio(type="filepath", label="ποΈ Upload or Record Audio") | |
submit_btn = gr.Button("π Summarize") | |
clear_btn = gr.Button("π§Ή Clear") | |
with gr.Column(): | |
summary_output = gr.Textbox(label="π Meeting Summary", lines=20, max_lines=40) | |
submit_btn.click(fn=process_meeting, inputs=audio_input, outputs=summary_output) | |
clear_btn.click(lambda: (None, ""), outputs=[audio_input, summary_output]) | |
demo.launch(share=True) |