Spaces:
Sleeping
Sleeping
import streamlit as st | |
import librosa | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig, AutoConfig | |
import requests | |
import io | |
# Initialize the processor and model outside the function | |
processor = WhisperProcessor.from_pretrained("openai/whisper-small") | |
config_url = "https://raw.githubusercontent.com/yash-412/Yash-Projects/main/marathi-enhanced/config.json" # Specify the path to your config.json file | |
config_response = requests.get(config_url) | |
config_dict = config_response.json() if config_response.status_code == 200 else None | |
config = WhisperConfig.from_dict(config_dict) if config_dict else None | |
model = WhisperForConditionalGeneration.from_pretrained("yash-412/whisper-marathi", config=config) | |
model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="mr", task="transcribe") | |
def get_transcription(speech): | |
# Process audio using the Whisper processor | |
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features | |
# Generate transcription using the Whisper model | |
predicted_ids = model.generate(input_features) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) | |
return transcription | |
def main(): | |
st.title("Marathi Enhanced-Whisper Transcription") | |
st.write("Upload an audio file") | |
uploaded_file = st.file_uploader("Choose an audio file", type=["mp3", "wav"]) | |
if uploaded_file: | |
st.audio(uploaded_file, format='audio/wav', start_time=0) | |
audio_bytes = uploaded_file.read() | |
speech, _ = librosa.load(io.BytesIO(audio_bytes), sr=16000) | |
if st.button("Transcribe"): | |
transcription = get_transcription(speech) | |
st.subheader("Transcription:") | |
st.write(transcription[0]) | |
if __name__ == "__main__": | |
main() |