import streamlit as st import os from audio_recorder_streamlit import audio_recorder from streamlit_float import * from openai import OpenAI #set title st.title(':rainbow[Audio to Text Converter ]🤖') #set info container for information and fetch API key with st.popover(label=":information_source:",help="Enter your API key"): api_key = st.text_input("Enter Open AI key?",type="password") multi = '''This is a demo Audio to Text app, Supported input languages are :orange[English, Hindi, Marathi, etc.]''' st.markdown(multi) st.image("https://raw.githubusercontent.com/openai/whisper/main/language-breakdown.svg",output_format="auto") st.markdown("**Note:** For more details details on OpenAI-WHISPER, you can refer documentation(https://github.com/openai/whisper?tab=readme-ov-file)].") st.markdown("Created with \ :heart: by [Predikly Team](XXXXXXXXXXXXXXXXXXXXXXXXXX)") client = OpenAI(api_key=api_key) def speech_to_text(audio_data): #convert audio to text with open(audio_data, "rb") as audio_file: translation = client.audio.translations.create( model="whisper-1", file=audio_file ) return translation.text #float feature intialization float_init() def initialize_session_state(): if "message" not in st.session_state: st.session_state.message = [ {"role":"Dipti","content":"How I can help you?"} ] initialize_session_state() #create footer container footer_container = st.container() if api_key: with footer_container: audio_bytes = audio_recorder( energy_threshold=(-10.0, 10.0), pause_threshold=10.0, text="Click to record", recording_color="#e8b62c", neutral_color="#6aa36f", icon_name="microphone", icon_size="2x", key="audio_recorder") else: st.warning("Please enter your API key using above ℹ icon") audio_bytes=None #assigning blank value to audio_bytes variable to avoid error for message in st.session_state.message: with st.chat_message(message["role"],avatar=":material/robot_2:"): st.write(message["content"]) if audio_bytes: #write audio byte to audio file with st.spinner("Converting..."): webm_file_path="audio.wav" with open(webm_file_path,"wb") as f: f.write(audio_bytes) transcript = speech_to_text(webm_file_path) if transcript: st.session_state.message.append({"role":"user","content":transcript}) with st.chat_message("user",avatar=":material/record_voice_over:"): st.write(transcript) os.remove(webm_file_path)