|
|
import os |
|
|
import torch |
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else "cpu" |
|
|
AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3" |
|
|
BATCH_SIZE = 8 |
|
|
|
|
|
pipe = pipeline( |
|
|
task="automatic-speech-recognition", |
|
|
model=AUDIO_MODEL_NAME, |
|
|
chunk_length_s=30, |
|
|
device=device, |
|
|
) |
|
|
|
|
|
def transcribe(audio_input): |
|
|
"""Convert audio to text using Whisper.""" |
|
|
if audio_input is None: |
|
|
raise gr.Error("No audio file submitted!") |
|
|
|
|
|
output = pipe( |
|
|
audio_input, |
|
|
batch_size=BATCH_SIZE, |
|
|
generate_kwargs={"task": "transcribe"}, |
|
|
return_timestamps=True |
|
|
) |
|
|
return output["text"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" |
|
|
|
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") |
|
|
if not hf_token: |
|
|
raise ValueError("HF_TOKEN not found! Add it as a secret in your Space settings.") |
|
|
|
|
|
|
|
|
client = InferenceClient(token=hf_token, base_url="https://api-inference.huggingface.co") |
|
|
|
|
|
def build_messages(meeting_transcript) -> list: |
|
|
system_input = "You are an assistant that organizes meeting minutes." |
|
|
user_input = f""" |
|
|
Take this raw meeting transcript and return an organized, sectioned version. |
|
|
You may include a summary at the top. |
|
|
|
|
|
Transcript: |
|
|
{meeting_transcript} |
|
|
""" |
|
|
return [ |
|
|
{"role": "system", "content": system_input}, |
|
|
{"role": "user", "content": user_input}, |
|
|
] |
|
|
|
|
|
def organize_text(meeting_transcript): |
|
|
messages = build_messages(meeting_transcript) |
|
|
response = client.chat_completion( |
|
|
messages, model=TEXT_MODEL_NAME, max_tokens=300, seed=42 |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def meeting_transcript_tool(audio_input): |
|
|
meeting_text = transcribe(audio_input) |
|
|
organized_text = organize_text(meeting_text) |
|
|
return organized_text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=meeting_transcript_tool, |
|
|
inputs=gr.Audio(type="filepath"), |
|
|
outputs=gr.Textbox(show_copy_button=True, label="Organized Transcript"), |
|
|
title="🪶 Meeting Transcription Tool", |
|
|
description="Upload or record an audio file. This app transcribes it using Whisper and organizes the text using Phi-3", |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|