roman commited on
Commit
7bd33ad
·
1 Parent(s): 7d4b95e

trying as for basic whisper

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -54,21 +54,23 @@ if uploaded_file is not None:
54
 
55
  st.write("Transcribing audio...")
56
 
57
- # Load audio
58
- audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
59
- audio_input = np.array(audio_input.get_array_of_samples())
60
-
61
- # Normalize audio
62
- audio_input = (audio_input - np.mean(audio_input)) / np.std(audio_input)
63
-
64
- # Process the audio
65
- input_features = processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
66
-
67
- # Generate transcription
68
- with torch.no_grad():
69
- predicted_ids = model.generate(input_features)
70
-
71
- transcription = processor.batch_decode(predicted_ids)[0]
 
 
72
 
73
  st.write("Transcription:")
74
  st.write(transcription)
 
54
 
55
  st.write("Transcribing audio...")
56
 
57
+ # # Load audio
58
+ # audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
59
+ # audio_input = np.array(audio_input.get_array_of_samples())
60
+ #
61
+ # # Normalize audio
62
+ # audio_input = (audio_input - np.mean(audio_input)) / np.std(audio_input)
63
+ #
64
+ # # Process the audio
65
+ # input_features = processor(audio_input, return_tensors="pt", sampling_rate=16000).input_values
66
+ #
67
+ # # Generate transcription
68
+ # with torch.no_grad():
69
+ # predicted_ids = model.generate(input_features)
70
+ #
71
+ # transcription = processor.batch_decode(predicted_ids)[0]
72
+
73
+ transcription = model.transcribe(temp_wav_path)
74
 
75
  st.write("Transcription:")
76
  st.write(transcription)