metaambod / app.py
unijoh's picture
Update app.py
e26488d verified
raw
history blame
2.23 kB
import subprocess
import sys
# Function to install a package if not already installed
def install_package(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Install Rust compiler if not present
def install_rust():
try:
subprocess.check_call(["rustc", "--version"])
except subprocess.CalledProcessError:
subprocess.check_call(["curl", "--proto", "=https", "--tlsv1.2", "-sSf", "https://sh.rustup.rs", "|", "sh"])
# List of required packages
required_packages = [
"transformers==4.10.3",
"datasets",
"huggingface-hub>=0.19",
"hf-transfer>=0.1.4",
"protobuf<4",
"click<8.1",
"pydantic~=1.0",
"librosa==0.8.1",
"torch==2.2.0",
"torchaudio==2.2.0",
"scipy",
"Cython==0.29.21",
"phonemizer==2.2.1",
"scikit-learn",
"matplotlib",
"gradio==3.1.4",
"sentencepiece",
"sacremoses",
"tokenizers==0.10.3",
"resampy>=0.2.2",
"numba>=0.43.0",
"soundfile>=0.10.2",
"pooch>=1.0",
"decorator>=3.0.0",
"joblib>=0.14",
"audioread>=2.0.0"
]
# Install all required packages
for package in required_packages:
install_package(package)
import gradio as gr
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
# Load pre-trained model and processor
model_name = "facebook/wav2vec2-base-960h"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
def transcribe(audio):
# Load audio
audio_input, _ = librosa.load(audio, sr=16000)
# Tokenize and process
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
# Get predicted ids
predicted_ids = torch.argmax(logits, dim=-1)
# Decode the ids to text
transcription = processor.batch_decode(predicted_ids)
return transcription[0]
# Define the Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text"
)
if __name__ == "__main__":
install_rust()
iface.launch()