fastinom commited on
Commit
659b110
1 Parent(s): ea3a793

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -8
README.md CHANGED
@@ -69,7 +69,7 @@ Users (both direct and downstream) should be made aware of the risks, biases and
69
 
70
  Use the code below to get started with the model.
71
 
72
- [### Running the model
73
 
74
  <details>
75
  <summary> Click to expand </summary>
@@ -82,7 +82,6 @@ import torch
82
  import torchaudio
83
 
84
  model_id = "fastinom/ASR_fassy"
85
- # Load model and processor
86
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
87
  processor = Wav2Vec2Processor.from_pretrained(model_id)
88
 
@@ -91,18 +90,12 @@ def load_audio(file_path):
91
  resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
92
  speech = resampler(speech_array).squeeze().numpy()
93
  return speech
94
- # Example audio file path
95
  audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
96
  speech = load_audio(audio_file)
97
 
98
- # Preprocess the audio
99
  inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
100
-
101
- # Perform inference
102
  with torch.no_grad():
103
  logits = model(inputs.input_values).logits
104
-
105
- # Decode the output
106
  predicted_ids = torch.argmax(logits, dim=-1)
107
  transcription = processor.batch_decode(predicted_ids)
108
  print(transcription[0])
 
69
 
70
  Use the code below to get started with the model.
71
 
72
+ ### Running the model
73
 
74
  <details>
75
  <summary> Click to expand </summary>
 
82
  import torchaudio
83
 
84
  model_id = "fastinom/ASR_fassy"
 
85
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
86
  processor = Wav2Vec2Processor.from_pretrained(model_id)
87
 
 
90
  resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
91
  speech = resampler(speech_array).squeeze().numpy()
92
  return speech
 
93
  audio_file = "/content/drive/MyDrive/recordings/wavefiles/1.wa"#YOUR AUDIO PATH
94
  speech = load_audio(audio_file)
95
 
 
96
  inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
 
 
97
  with torch.no_grad():
98
  logits = model(inputs.input_values).logits
 
 
99
  predicted_ids = torch.argmax(logits, dim=-1)
100
  transcription = processor.batch_decode(predicted_ids)
101
  print(transcription[0])