Spaces:
Sleeping
Sleeping
import streamlit as st | |
from audiorecorder import audiorecorder | |
import torch | |
from transformers import pipeline | |
import torch | |
import torchaudio | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain import HuggingFaceHub, LLMChain, PromptTemplate | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.document_loaders.csv_loader import CSVLoader | |
from langchain.vectorstores import FAISS | |
import tempfile | |
from streamlit_chat import message | |
import streamlit as st | |
from elevenlabs import set_api_key | |
from elevenlabs import clone, generate, play | |
from pydub import AudioSegment | |
import os | |
import re | |
import sys | |
import pandas as pd | |
import librosa | |
from helper import parse_transcription,hindi_to_english,translate_english_to_hindi,hindi_tts | |
def extract_text_from_html(html): | |
cleanr = re.compile('<.*?>') | |
cleantext = re.sub(cleanr, '', html) | |
def conversational_chat(llm_chain,query): | |
qery='Behave like a customer call agent and Dont do these website address, compnay name or any other parameter'+query | |
output = llm_chain.predict(human_input=query) | |
return extract_text_from_html(output) | |
def save_uploaded_file_as_mp3(uploaded_file, output_file_path): | |
audio = AudioSegment.from_file(uploaded_file) | |
audio.export(output_file_path, format="mp3") | |
user_api_key = st.sidebar.text_input( | |
label="#### Your OpenAI API key π", | |
placeholder="Paste your openAI API key, sk-", | |
type="password") | |
def ui(): | |
if user_api_key is not None and user_api_key.strip() != "": | |
os.environ["OPENAI_API_KEY"] =user_api_key | |
template = """ | |
Behave like a Telecomm customer servce call agent and don't include any website address, compnay name or any other parameter in your output | |
{history} | |
Me:{human_input} | |
Jack: | |
""" | |
# prompt = PromptTemplate( | |
# input_variables=["history", "human_input"], | |
# template=template | |
# ) | |
prompt = PromptTemplate( | |
input_variables=["history", "human_input"], | |
template=template | |
) | |
llm_chain = LLMChain( | |
llm = ChatOpenAI(temperature=0.0,model_name='gpt-3.5-turbo'), | |
prompt=prompt, | |
verbose=True, | |
memory=ConversationBufferWindowMemory(k=2) | |
) | |
good_morining_audio,sample_rate1=librosa.load('./good-morning.mp3') | |
hi_audio,sample_rate2=librosa.load('./good-morning-sir.mp3') | |
if 'history' not in st.session_state: | |
st.session_state['history'] = [] | |
st.session_state['history_text']=[] | |
if 'generated' not in st.session_state: | |
st.session_state['generated'] = [hi_audio] | |
st.session_state['generated_text']=[] | |
if 'past' not in st.session_state: | |
st.session_state['past'] = [good_morining_audio] | |
st.session_state['past_text']=[] | |
if user_api_key is not None and user_api_key.strip() != "": | |
eleven_labs_api_key = st.sidebar.text_input( | |
label="#### Your Eleven Labs API key π", | |
placeholder="Paste your Eleven Labs API key", | |
type="password") | |
set_api_key(eleven_labs_api_key) | |
#container for the chat history | |
response_container = st.container() | |
#container for the user's text input | |
container = st.container() | |
with container: | |
with st.form(key='my_form', clear_on_submit=True): | |
audio_file = st.file_uploader("Upload an audio file ", type=[ "wav,Mp4","Mp3"]) | |
submit_button = st.form_submit_button(label='Send') | |
if audio_file is not None and submit_button : | |
output_file_path = "./output_audio.mp3" | |
save_uploaded_file_as_mp3(audio_file,output_file_path ) | |
hindi_input_audio,sample_rate= librosa.load(output_file_path, sr=None, mono=True) | |
#applying the audio recognition | |
hindi_transcription=parse_transcription('./output_audio.mp3') | |
st.success(f"Audio file saved as {output_file_path}") | |
#convert hindi to english | |
english_input=hindi_to_english(hindi_transcription) | |
#feeding the input to the LLM | |
english_output = conversational_chat(llm_chain,english_input) | |
#converting english to hindi | |
hin_output=translate_english_to_hindi(str(english_output)) | |
#getting the hindi_tts | |
hindi_output_audio=hindi_tts(hin_output) | |
# hindi_output_file="./Hindi_output_Audio.Mp3" | |
# save_uploaded_file_as_mp3(hindi_out"put_audio,hindi_output_file) | |
# st.audio(hindi_output_audio) | |
st.text(hindi_output) | |
st.session_state['past'].append(hindi_input_audio) | |
st.session_state['past_text'].append(english_input) | |
st.session_state['generated_text'].append(english_output) | |
st.session_state['generated'].append(hindi_output_audio) | |
if 'generated' in st.session_state and st.session_state['generated']: | |
with response_container: | |
for i in range(len(st.session_state['generated'])): | |
if i==0: | |
st.audio(st.session_state["past"][i],sample_rate=sample_rate1,format='audio/wav') | |
st.audio(st.session_state["generated"][i],sample_rate=sample_rate2,format='audio/wav') | |
else: | |
# st.audio(st.session_state["past"][i],sample_rate=sample_rate1,format='audio/wav') | |
st.audio(st.session_state["generated"][i],sample_rate=sample_rate2,format='audio/wav') | |
if __name__ == '__main__': | |
ui() | |