pratikshahp commited on
Commit
1610c78
1 Parent(s): a006d14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -12,7 +12,10 @@ def transcribe_audio(audio_bytes):
12
  # Convert bytes to numpy array
13
  audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
14
 
15
- input_values = processor(audio_array, return_tensors="pt", sampling_rate=16000).input_values
 
 
 
16
  logits = model(input_values).logits
17
  predicted_ids = torch.argmax(logits, dim=-1)
18
  transcription = processor.decode(predicted_ids[0])
 
12
  # Convert bytes to numpy array
13
  audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
14
 
15
+ # Cast audio array to double precision and normalize
16
+ audio_tensor = torch.tensor(audio_array, dtype=torch.float64) / 32768.0
17
+
18
+ input_values = processor(audio_tensor, return_tensors="pt", sampling_rate=16000).input_values
19
  logits = model(input_values).logits
20
  predicted_ids = torch.argmax(logits, dim=-1)
21
  transcription = processor.decode(predicted_ids[0])