Spaces:
Sleeping
Sleeping
Create app.py
Browse filesApp without streamlit_audio_recorder
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import librosa
|
3 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig, AutoConfig
|
4 |
+
import requests
|
5 |
+
import io
|
6 |
+
|
7 |
+
# Initialize the processor and model outside the function
|
8 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
9 |
+
config_url = "https://raw.githubusercontent.com/yash-412/Yash-Projects/main/marathi-enhanced/config.json" # Specify the path to your config.json file
|
10 |
+
config_response = requests.get(config_url)
|
11 |
+
config_dict = config_response.json() if config_response.status_code == 200 else None
|
12 |
+
|
13 |
+
config = WhisperConfig.from_dict(config_dict) if config_dict else None
|
14 |
+
model = WhisperForConditionalGeneration.from_pretrained("yash-412/whisper-marathi", config=config)
|
15 |
+
model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="mr", task="transcribe")
|
16 |
+
|
17 |
+
def get_transcription(speech):
|
18 |
+
# Process audio using the Whisper processor
|
19 |
+
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
|
20 |
+
|
21 |
+
# Generate transcription using the Whisper model
|
22 |
+
predicted_ids = model.generate(input_features)
|
23 |
+
|
24 |
+
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
25 |
+
return transcription
|
26 |
+
|
27 |
+
def main():
|
28 |
+
st.title("Marathi Enhanced-Whisper Transcription")
|
29 |
+
st.write("Upload an audio file")
|
30 |
+
|
31 |
+
uploaded_file = st.file_uploader("Choose an audio file", type=["mp3", "wav"])
|
32 |
+
|
33 |
+
if uploaded_file:
|
34 |
+
st.audio(uploaded_file, format='audio/wav', start_time=0)
|
35 |
+
|
36 |
+
audio_bytes = uploaded_file.read()
|
37 |
+
speech, _ = librosa.load(io.BytesIO(audio_bytes), sr=16000)
|
38 |
+
|
39 |
+
if st.button("Transcribe"):
|
40 |
+
transcription = get_transcription(speech)
|
41 |
+
st.subheader("Transcription:")
|
42 |
+
st.write(transcription[0])
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
main()
|