Update README.md
Browse files
README.md
CHANGED
@@ -69,7 +69,7 @@ Users (both direct and downstream) should be made aware of the risks, biases and
|
|
69 |
|
70 |
Use the code below to get started with the model.
|
71 |
|
72 |
-
|
73 |
|
74 |
<details>
|
75 |
<summary> Click to expand </summary>
|
@@ -82,7 +82,6 @@ import torch
|
|
82 |
import torchaudio
|
83 |
|
84 |
model_id = "fastinom/ASR_fassy"
|
85 |
-
# Load model and processor
|
86 |
model = Wav2Vec2ForCTC.from_pretrained(model_id)
|
87 |
processor = Wav2Vec2Processor.from_pretrained(model_id)
|
88 |
|
@@ -91,18 +90,12 @@ def load_audio(file_path):
|
|
91 |
resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
|
92 |
speech = resampler(speech_array).squeeze().numpy()
|
93 |
return speech
|
94 |
-
# Example audio file path
|
95 |
audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
|
96 |
speech = load_audio(audio_file)
|
97 |
|
98 |
-
# Preprocess the audio
|
99 |
inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
|
100 |
-
|
101 |
-
# Perform inference
|
102 |
with torch.no_grad():
|
103 |
logits = model(inputs.input_values).logits
|
104 |
-
|
105 |
-
# Decode the output
|
106 |
predicted_ids = torch.argmax(logits, dim=-1)
|
107 |
transcription = processor.batch_decode(predicted_ids)
|
108 |
print(transcription[0])
|
|
|
69 |
|
70 |
Use the code below to get started with the model.
|
71 |
|
72 |
+
### Running the model
|
73 |
|
74 |
<details>
|
75 |
<summary> Click to expand </summary>
|
|
|
82 |
import torchaudio
|
83 |
|
84 |
model_id = "fastinom/ASR_fassy"
|
|
|
85 |
model = Wav2Vec2ForCTC.from_pretrained(model_id)
|
86 |
processor = Wav2Vec2Processor.from_pretrained(model_id)
|
87 |
|
|
|
90 |
resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
|
91 |
speech = resampler(speech_array).squeeze().numpy()
|
92 |
return speech
|
|
|
93 |
audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
|
94 |
speech = load_audio(audio_file)
|
95 |
|
|
|
96 |
inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
|
|
|
|
|
97 |
with torch.no_grad():
|
98 |
logits = model(inputs.input_values).logits
|
|
|
|
|
99 |
predicted_ids = torch.argmax(logits, dim=-1)
|
100 |
transcription = processor.batch_decode(predicted_ids)
|
101 |
print(transcription[0])
|