Spaces:
Sleeping
Sleeping
File size: 9,783 Bytes
4033f3c 55f9ee0 4033f3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import os
import streamlit as st
#from dotenv import load_dotenv
#load_dotenv()
# for audio stuff
import whisper
from pydub import AudioSegment
from io import BytesIO
from gtts import gTTS
# for langchain stuff
from langchain.llms import OpenAI
from langchain.agents import create_sql_agent,load_tools,initialize_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.agents import AgentExecutor
from langchain import SQLDatabaseChain
# for streaming stdout stuff
from contextlib import contextmanager
from io import StringIO
from streamlit.runtime.scriptrunner.script_run_context import SCRIPT_RUN_CONTEXT_ATTR_NAME
from threading import current_thread
import sys
@contextmanager
def st_redirect(src, dst):
placeholder = st.empty()
output_func = getattr(placeholder, dst)
with StringIO() as buffer:
old_write = src.write
def new_write(b):
if getattr(current_thread(), SCRIPT_RUN_CONTEXT_ATTR_NAME, None):
buffer.write(b)
output_func(buffer.getvalue())
sys.stdout.write('\n')
else:
old_write(b)
try:
src.write = new_write
yield
finally:
src.write = old_write
@contextmanager
def st_stdout(dst):
with st_redirect(sys.stdout, dst):
yield
@contextmanager
def st_stderr(dst):
with st_redirect(sys.stderr, dst):
yield
# Code taken from https://github.com/prateekralhan/OpenAI_Whisper_ASR/tree/main
st.set_page_config(
page_title="OpeneXplore insights",
page_icon="musical_note",
layout="wide",
initial_sidebar_state="auto",
)
audio_tags = {'comments': 'Converted using pydub!'}
upload_path = "uploads/"
download_path = "downloads/"
transcript_path = "transcripts/"
# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def to_mp3(audio_file, output_audio_file, upload_path, download_path):
## Converting Different Audio Formats To MP3 ##
if audio_file.name.split('.')[-1].lower()=="wav":
audio_data = AudioSegment.from_wav(os.path.join(upload_path,audio_file.name))
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="mp3":
audio_data = AudioSegment.from_mp3(os.path.join(upload_path,audio_file.name))
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="ogg":
audio_data = AudioSegment.from_ogg(os.path.join(upload_path,audio_file.name))
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="wma":
audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"wma")
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="aac":
audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"aac")
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="flac":
audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"flac")
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="flv":
audio_data = AudioSegment.from_flv(os.path.join(upload_path,audio_file.name))
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
elif audio_file.name.split('.')[-1].lower()=="mp4":
audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"mp4")
audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
return output_audio_file
# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def process_audio(filename, model_type):
model = whisper.load_model(model_type)
result = model.transcribe(filename)
return result["text"]
# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def save_transcript(transcript_data, txt_file):
with open(os.path.join(transcript_path, txt_file),"w") as f:
f.write(transcript_data)
# generic search agent
llm = OpenAI(temperature=0)
tools = load_tools(['serpapi','llm-math'], llm=llm, verbose=True)
agent = initialize_agent(tools, llm, agent='zero-shot-react-description', verbose=True)
# import pandas as pd
# df = pd.DataFrame({'date':pd.date_range('2023-01-01','2023-04-30'),
# 'city':[np.random.choice(['Edmonton']) for i in range(120)],
# 'sales':[np.random.randint(640000,1800000) for i in range(120)],
# 'max_temperature':[np.random.randint(25) for i in range(120)],
# 'min_temperature':[np.random.randint(-10,10) for i in range(120)],
# 'weather':[np.random.choice(['sunny','partly cloudy']) for i in range(120)]
# })
# df1 = pd.DataFrame({'date':pd.date_range('2023-01-01','2023-04-30'),
# 'city':[np.random.choice(['Kitchener']) for i in range(120)],
# 'sales':[np.random.randint(640000,1800000) for i in range(120)],
# 'max_temperature':[np.random.randint(25) for i in range(120)],
# 'min_temperature':[np.random.randint(-10,10) for i in range(120)],
# 'weather':[np.random.choice(['sunny','partly cloudy']) for i in range(120)]
# })
# df = pd.concat([df,df1])
# from sqlalchemy import create_engine
# disk_engine = create_engine('sqlite:///company_database.db')
# df.to_sql('daily_sales', disk_engine, if_exists='replace',index=False)
# db agent
db = SQLDatabase.from_uri('sqlite:///company_database.db')
db_chain = SQLDatabaseChain(llm=llm,database=db,verbose=True)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent_executor = create_sql_agent(
llm=OpenAI(temperature=0),
toolkit=toolkit,
verbose=True
)
st.title("π£π‘ OracleXplore - last mile for insights β¨")
# select journey - text or voice
input_format = st.radio(
"How would you like to ask the question?",
("Not sure","Type it","Say it"), horizontal=True
)
transcript=''
if input_format == "Type it":
transcript = st.text_input("type your question here",placeholder="Example: using our own data what is the total revenue from Edmonton in Feb?")
elif input_format == "Say it":
st.info('β¨ Supports all popular audio formats - WAV, MP3, MP4, OGG, WMA, AAC, FLAC, FLV')
col1, col2 = st.columns([3, 2])
with col1:
uploaded_file = st.file_uploader("Upload audio file", type=["wav","mp3","ogg","wma","aac","flac","mp4","flv"])
audio_file = None
if uploaded_file is not None:
generate_text = st.button("Generate speech to text")
audio_bytes = uploaded_file.read()
with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
f.write((uploaded_file).getbuffer())
with st.spinner(f"Processing Audio ... π«"):
output_audio_file = uploaded_file.name.split('.')[0] + '.mp3'
output_audio_file = to_mp3(uploaded_file, output_audio_file, upload_path, download_path)
audio_file = open(os.path.join(download_path,output_audio_file), 'rb')
audio_bytes = audio_file.read()
# transcribe the text
if generate_text:
with st.spinner(f"Generating Transcript... π«"):
transcript = process_audio(str(os.path.abspath(os.path.join(download_path,output_audio_file))), 'base')
st.write("Here's what our AI heard you say:")
output_txt_file = str(output_audio_file.split('.')[0]+".txt")
save_transcript(transcript, output_txt_file)
output_file = open(os.path.join(transcript_path,output_txt_file),"r")
output_file_data = output_file.read()
with col2:
if uploaded_file is not None:
print("Opening ",audio_file)
st.markdown("---")
st.markdown("Feel free to play your uploaded audio file πΌ")
st.audio(audio_bytes)
else:
st.markdown("---")
st.warning('β Please upload your audio file π―')
else:
pass
# process next steps
if transcript!='':
st.info(f"Question: {transcript}")
# run agent
# with st_stdout("error"):
if "using our own data" in transcript.lower():
response = agent_executor.run(transcript.replace("using our own data",""))
# with st.expander('Message history:'):
# st.info(memory.buffer)
else:
response = agent.run(transcript)
# convert this back into audio
sound_file = BytesIO()
tts = gTTS(response, lang='en')
tts.write_to_fp(sound_file)
st.info(f"Answer: {response}")
st.audio(sound_file)
st.markdown("<br><hr><center>Made with β€οΈ by <a href='mailto:itsvivekbharadwaj@gmail.com?subject=OracleXplore queries&body=Please specify the issue you are facing with the app.'><strong>Vivek</strong></a> with thanks to [Prateek Kralhan](https://github.com/prateekralhan/OpenAI_Whisper_ASR/tree/main) and other open source legends β¨</center><hr>", unsafe_allow_html=True)
|