abunaim1's picture
little changes
84fe1e1
import warnings
import torch
import gradio as gr
import librosa
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
warnings.filterwarnings("ignore")
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
def transcribe_audio(audio_path):
try:
# 'sr' = 'sampling_rate'
audio, sr = librosa.load(audio_path, sr=16000)
input_values = processor(audio, return_tensors='pt', sampling_rate=sr).input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcriptions = processor.batch_decode(predicted_ids)[0]
return transcriptions
except Exception as e:
return str(e)
demo = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type='filepath'),
outputs='text',
title="Subtitle Generator",
description="This tool transcribes audio files into text"
)
demo.launch()