File size: 9,783 Bytes
4033f3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55f9ee0
 
 
 
 
 
 
4033f3c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import os
import streamlit as st
#from dotenv import load_dotenv
#load_dotenv()

# for audio stuff
import whisper
from pydub import AudioSegment
from io import BytesIO
from gtts import gTTS

# for langchain stuff
from langchain.llms import OpenAI
from langchain.agents import create_sql_agent,load_tools,initialize_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.agents import AgentExecutor
from langchain import SQLDatabaseChain

# for streaming stdout stuff
from contextlib import contextmanager
from io import StringIO
from streamlit.runtime.scriptrunner.script_run_context import SCRIPT_RUN_CONTEXT_ATTR_NAME
from threading import current_thread
import sys

@contextmanager
def st_redirect(src, dst):
    placeholder = st.empty()
    output_func = getattr(placeholder, dst)

    with StringIO() as buffer:
        old_write = src.write

        def new_write(b):
            if getattr(current_thread(), SCRIPT_RUN_CONTEXT_ATTR_NAME, None):
                buffer.write(b)
                output_func(buffer.getvalue())
                sys.stdout.write('\n')
            else:
                old_write(b)

        try:
            src.write = new_write
            yield
        finally:
            src.write = old_write


@contextmanager
def st_stdout(dst):
    with st_redirect(sys.stdout, dst):
        yield

@contextmanager
def st_stderr(dst):
    with st_redirect(sys.stderr, dst):
        yield

# Code taken from https://github.com/prateekralhan/OpenAI_Whisper_ASR/tree/main

st.set_page_config(
    page_title="OpeneXplore insights",
    page_icon="musical_note",
    layout="wide",
    initial_sidebar_state="auto",
)

audio_tags = {'comments': 'Converted using pydub!'}

upload_path = "uploads/"
download_path = "downloads/"
transcript_path = "transcripts/"

# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def to_mp3(audio_file, output_audio_file, upload_path, download_path):
    ## Converting Different Audio Formats To MP3 ##
    if audio_file.name.split('.')[-1].lower()=="wav":
        audio_data = AudioSegment.from_wav(os.path.join(upload_path,audio_file.name))
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="mp3":
        audio_data = AudioSegment.from_mp3(os.path.join(upload_path,audio_file.name))
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="ogg":
        audio_data = AudioSegment.from_ogg(os.path.join(upload_path,audio_file.name))
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="wma":
        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"wma")
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="aac":
        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"aac")
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="flac":
        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"flac")
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="flv":
        audio_data = AudioSegment.from_flv(os.path.join(upload_path,audio_file.name))
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)

    elif audio_file.name.split('.')[-1].lower()=="mp4":
        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"mp4")
        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
    return output_audio_file

# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def process_audio(filename, model_type):
    model = whisper.load_model(model_type)
    result = model.transcribe(filename)
    return result["text"]

# @st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
def save_transcript(transcript_data, txt_file):
    with open(os.path.join(transcript_path, txt_file),"w") as f:
        f.write(transcript_data)

# generic search agent
llm = OpenAI(temperature=0)
tools = load_tools(['serpapi','llm-math'], llm=llm, verbose=True)
agent = initialize_agent(tools, llm, agent='zero-shot-react-description', verbose=True)

# import pandas as pd
# df = pd.DataFrame({'date':pd.date_range('2023-01-01','2023-04-30'),
#                     'city':[np.random.choice(['Edmonton']) for i in range(120)],
#                     'sales':[np.random.randint(640000,1800000) for i in range(120)],
#                    'max_temperature':[np.random.randint(25) for i in range(120)],
#                    'min_temperature':[np.random.randint(-10,10) for i in range(120)],
#                    'weather':[np.random.choice(['sunny','partly cloudy']) for i in range(120)]
#                    })
# df1 = pd.DataFrame({'date':pd.date_range('2023-01-01','2023-04-30'),
#                     'city':[np.random.choice(['Kitchener']) for i in range(120)],
#                     'sales':[np.random.randint(640000,1800000) for i in range(120)],
#                    'max_temperature':[np.random.randint(25) for i in range(120)],
#                    'min_temperature':[np.random.randint(-10,10) for i in range(120)],
#                    'weather':[np.random.choice(['sunny','partly cloudy']) for i in range(120)]
#                    })
# df = pd.concat([df,df1])
# from sqlalchemy import create_engine
# disk_engine = create_engine('sqlite:///company_database.db')
# df.to_sql('daily_sales', disk_engine, if_exists='replace',index=False)

# db agent
db = SQLDatabase.from_uri('sqlite:///company_database.db')
db_chain = SQLDatabaseChain(llm=llm,database=db,verbose=True)
toolkit = SQLDatabaseToolkit(db=db, llm=llm)
agent_executor = create_sql_agent(
    llm=OpenAI(temperature=0),
    toolkit=toolkit,
    verbose=True
)

st.title("πŸ—£πŸ’‘ OracleXplore - last mile for insights ✨")

# select journey - text or voice
input_format = st.radio(
    "How would you like to ask the question?",
    ("Not sure","Type it","Say it"), horizontal=True
)
transcript=''
if input_format == "Type it":
    transcript = st.text_input("type your question here",placeholder="Example: using our own data what is the total revenue from Edmonton in Feb?")
elif input_format == "Say it":
    st.info('✨ Supports all popular audio formats - WAV, MP3, MP4, OGG, WMA, AAC, FLAC, FLV')
    col1, col2 = st.columns([3, 2])
    with col1:
        uploaded_file = st.file_uploader("Upload audio file", type=["wav","mp3","ogg","wma","aac","flac","mp4","flv"])
        audio_file = None

        if uploaded_file is not None:
            generate_text = st.button("Generate speech to text") 
            audio_bytes = uploaded_file.read()
            with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
                f.write((uploaded_file).getbuffer())
            with st.spinner(f"Processing Audio ... πŸ’«"):
                output_audio_file = uploaded_file.name.split('.')[0] + '.mp3'
                output_audio_file = to_mp3(uploaded_file, output_audio_file, upload_path, download_path)
                audio_file = open(os.path.join(download_path,output_audio_file), 'rb')
                audio_bytes = audio_file.read()         

            # transcribe the text
            if generate_text:
                with st.spinner(f"Generating Transcript... πŸ’«"):
                    transcript = process_audio(str(os.path.abspath(os.path.join(download_path,output_audio_file))), 'base')
                    st.write("Here's what our AI heard you say:")
                    output_txt_file = str(output_audio_file.split('.')[0]+".txt")

                    save_transcript(transcript, output_txt_file)
                    output_file = open(os.path.join(transcript_path,output_txt_file),"r")
                    output_file_data = output_file.read()

    with col2:
        if uploaded_file is not None:
            print("Opening ",audio_file) 
            st.markdown("---")   
            st.markdown("Feel free to play your uploaded audio file 🎼")
            st.audio(audio_bytes) 
    
        else:
            st.markdown("---")   
            st.warning('⚠ Please upload your audio file 😯')  
else:
    pass       

# process next steps
if transcript!='':
    st.info(f"Question: {transcript}")

    # run agent
    # with st_stdout("error"):
    if  "using our own data" in transcript.lower():
        response = agent_executor.run(transcript.replace("using our own data",""))
        # with st.expander('Message history:'):
        #     st.info(memory.buffer)
    else:
        response = agent.run(transcript)


    # convert this back into audio
    sound_file = BytesIO()
    tts = gTTS(response, lang='en')
    tts.write_to_fp(sound_file)
    st.info(f"Answer: {response}")
    st.audio(sound_file)                 

st.markdown("<br><hr><center>Made with ❀️ by <a href='mailto:itsvivekbharadwaj@gmail.com?subject=OracleXplore queries&body=Please specify the issue you are facing with the app.'><strong>Vivek</strong></a> with thanks to [Prateek Kralhan](https://github.com/prateekralhan/OpenAI_Whisper_ASR/tree/main) and other open source legends ✨</center><hr>", unsafe_allow_html=True)