Spaces:
Runtime error
Runtime error
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
import soundfile as sf | |
import torch | |
import gradio as gr | |
import torchaudio | |
# load model and processor | |
processor = Wav2Vec2Processor.from_pretrained("maher13/arabic-iti") | |
model = Wav2Vec2ForCTC.from_pretrained("maher13/arabic-iti").eval() | |
# define function to read in sound file | |
def map_to_array(file): | |
speech, sr = torchaudio.load(file) | |
if sr != 16000: | |
transform = torchaudio.transforms.Resample(orig_freq=sr, | |
new_freq=16000) | |
speech= transform(speech) | |
speech = speech[0] | |
speech = speech.numpy() | |
return speech | |
# tokenize | |
def inference(audio_file, audio_file2): | |
if audio_file: | |
input_values = processor(map_to_array(audio_file.name), return_tensors="pt", padding="longest").input_values # Batch size 1 | |
logits = model(input_values).logits | |
with torch.no_grad(): | |
predicted_ids = torch.argmax(logits, dim=-1) | |
predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id | |
transcription1 = processor.tokenizer.batch_decode(predicted_ids)[0] | |
else: | |
transcription1 = "N/A" | |
if audio_file2: | |
input_values = processor(map_to_array(audio_file2.name), return_tensors="pt", padding="longest").input_values # Batch size 1 | |
logits = model(input_values).logits | |
with torch.no_grad(): | |
predicted_ids = torch.argmax(logits, dim=-1) | |
predicted_ids[predicted_ids == -100] = processor.tokenizer.pad_token_id | |
transcription2 = processor.tokenizer.batch_decode(predicted_ids)[0] | |
else : | |
transcription2 = "N/A" | |
return transcription1, transcription2 | |
gradio_ui = gr.Interface( | |
fn=inference, | |
title="Speech to Text Graduation project \n sponsored by TensorGraph", | |
inputs= | |
[ | |
gr.inputs.Audio(source = 'microphone', type="file", optional = True), | |
gr.inputs.Audio(source = 'upload', type="file", optional = True) | |
], | |
outputs=[ | |
gr.outputs.Textbox(label="Auto-Transcript"), | |
gr.outputs.Textbox(label="Auto-Transcript") | |
], | |
) | |
gradio_ui.launch(share=True) |