fastinom
/

ASR_fassy

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

fastinom commited on 26 days ago

Commit

659b110

•

1 Parent(s): ea3a793

Update README.md

Files changed (1) hide show

README.md +1 -8

README.md CHANGED Viewed

@@ -69,7 +69,7 @@ Users (both direct and downstream) should be made aware of the risks, biases and
 Use the code below to get started with the model.
-[### Running the model
 <details>
 <summary> Click to expand </summary>
@@ -82,7 +82,6 @@ import torch
 import torchaudio
 model_id = "fastinom/ASR_fassy"
-# Load model and processor
 model = Wav2Vec2ForCTC.from_pretrained(model_id)
 processor = Wav2Vec2Processor.from_pretrained(model_id)
@@ -91,18 +90,12 @@ def load_audio(file_path):
     resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
     speech = resampler(speech_array).squeeze().numpy()
     return speech
-# Example audio file path
 audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
 speech = load_audio(audio_file)
-# Preprocess the audio
 inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
-# Perform inference
 with torch.no_grad():
     logits = model(inputs.input_values).logits
-# Decode the output
 predicted_ids = torch.argmax(logits, dim=-1)
 transcription = processor.batch_decode(predicted_ids)
 print(transcription[0])

 Use the code below to get started with the model.
+### Running the model
 <details>
 <summary> Click to expand </summary>
 import torchaudio
 model_id = "fastinom/ASR_fassy"
 model = Wav2Vec2ForCTC.from_pretrained(model_id)
 processor = Wav2Vec2Processor.from_pretrained(model_id)
     resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
     speech = resampler(speech_array).squeeze().numpy()
     return speech
 audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
 speech = load_audio(audio_file)
 inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
 with torch.no_grad():
     logits = model(inputs.input_values).logits
 predicted_ids = torch.argmax(logits, dim=-1)
 transcription = processor.batch_decode(predicted_ids)
 print(transcription[0])