Macedonian-ASR / app.py
cigol123's picture
Update app.py
0667772 verified
raw
history blame
1.73 kB
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import soundfile as sf
import numpy as np
from scipy import signal
import os
# Set up directories
home_dir = os.path.expanduser("~")
cache_dir = os.path.join(home_dir, "cache")
flagged_dir = os.path.join(home_dir, "flagged")
# Configure cache
os.environ['TRANSFORMERS_CACHE'] = cache_dir
os.makedirs(cache_dir, exist_ok=True)
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3", cache_dir=cache_dir)
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3", cache_dir=cache_dir)
def process_audio(audio_path):
waveform, sr = sf.read(audio_path)
if len(waveform.shape) > 1:
waveform = waveform.mean(axis=1)
if sr != 16000:
num_samples = int(len(waveform) * 16000 / sr)
waveform = signal.resample(waveform, num_samples)
inputs = processor(waveform, sampling_rate=16000, return_tensors="pt")
predicted_ids = model.generate(**inputs, language="mk")
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Create Gradio interface with custom flagging directory
demo = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
outputs="text",
title="Македонско препознавање на говор / Macedonian Speech Recognition",
description="Качете аудио или користете микрофон за транскрипција на македонски говор / Upload audio or use microphone to transcribe Macedonian speech",
flagging_dir=flagged_dir
)
demo.launch(server_name="0.0.0.0", server_port=7860)