Ai-Transcript / app.py
Eldermind's picture
Update app.py
fa2d48a verified
import gradio as gr
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
import torch
# Load the model and processor for Wav2Vec 2.0
model_id = "facebook/wav2vec2-base-960h"
processor = Wav2Vec2Processor.from_pretrained(model_id)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
def transcribe(file_path):
try:
audio_input, sampling_rate = processor.audio_file_to_array(file_path)
input_values = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt").input_values
input_values = input_values.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
return transcription
except Exception as e:
print(f"Error during transcription: {e}")
return "Transcription error"
# Gradio interface setup
with gr.Blocks() as demo:
with gr.Tab("Transcribe Audio"):
with gr.Row():
audio_input = gr.Audio(label="Upload audio file or record", type="filepath")
with gr.Row():
audio_output = gr.Textbox(label="Transcription")
audio_input.change(transcribe, inputs=audio_input, outputs=audio_output)
demo.launch()