yash-412 commited on
Commit
4f198be
1 Parent(s): 3642f6b

Create app.py

Browse files

App without streamlit_audio_recorder

Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import librosa
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig, AutoConfig
4
+ import requests
5
+ import io
6
+
7
+ # Initialize the processor and model outside the function
8
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
9
+ config_url = "https://raw.githubusercontent.com/yash-412/Yash-Projects/main/marathi-enhanced/config.json" # Specify the path to your config.json file
10
+ config_response = requests.get(config_url)
11
+ config_dict = config_response.json() if config_response.status_code == 200 else None
12
+
13
+ config = WhisperConfig.from_dict(config_dict) if config_dict else None
14
+ model = WhisperForConditionalGeneration.from_pretrained("yash-412/whisper-marathi", config=config)
15
+ model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="mr", task="transcribe")
16
+
17
+ def get_transcription(speech):
18
+ # Process audio using the Whisper processor
19
+ input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
20
+
21
+ # Generate transcription using the Whisper model
22
+ predicted_ids = model.generate(input_features)
23
+
24
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
+ return transcription
26
+
27
+ def main():
28
+ st.title("Marathi Enhanced-Whisper Transcription")
29
+ st.write("Upload an audio file")
30
+
31
+ uploaded_file = st.file_uploader("Choose an audio file", type=["mp3", "wav"])
32
+
33
+ if uploaded_file:
34
+ st.audio(uploaded_file, format='audio/wav', start_time=0)
35
+
36
+ audio_bytes = uploaded_file.read()
37
+ speech, _ = librosa.load(io.BytesIO(audio_bytes), sr=16000)
38
+
39
+ if st.button("Transcribe"):
40
+ transcription = get_transcription(speech)
41
+ st.subheader("Transcription:")
42
+ st.write(transcription[0])
43
+
44
+ if __name__ == "__main__":
45
+ main()