ChatAI / app.py
Adamkong's picture
Create app.py
b49b56b
raw
history blame
2.42 kB
import openai, os
import gradio as gr
import azure.cognitiveservices.speech as speechsdk
from langchain import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
load_dotenv()
# openai.api_base = "https://api.chatanywhere.tech/v1"
# os.environ["OPENAI_API_KEY"]='sk-jfN3iXQqp6DGlhzr6dvxlePzbFa2juVEqQEg8Pi91PrDXNTi'
# #openai.api_key = os.environ["OPENAI_API_KEY"]
# os.environ['AZURE_SPEECH_KEY']='06901dd3964b4681834cfb9bf4fe9efe'
# os.environ['AZURE_SPEECH_REGION']='eastus'
memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
conversation = ConversationChain(
llm=OpenAI(model_name="gpt-3.5-turbo-0613",max_tokens=2048, temperature=0.5),
memory=memory,
)
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('AZURE_SPEECH_KEY'), region=os.environ.get('AZURE_SPEECH_REGION'))
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# The language of the voice that speaks.
speech_config.speech_synthesis_language='zh-CN'
speech_config.speech_synthesis_voice_name='zh-CN-XiaohanNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
def play_voice(text):
speech_synthesizer.speak_text_async(text)
def predict(input, history=[]):
history.append(input)
response = conversation.predict(input=input)
history.append(response)
play_voice(response)
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
return responses, history
def transcribe(audio):
os.rename(audio, audio + '.wav')
audio_file = open(audio + '.wav', "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
return transcript['text']
def process_audio(audio, history=[]):
text = transcribe(audio)
return predict(text, history)
with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as demo:
chatbot = gr.Chatbot(elem_id="chatbot")
state = gr.State([])
with gr.Row():
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter")
with gr.Row():
audio = gr.Audio(source="microphone", type="filepath")
txt.submit(predict, [txt, state], [chatbot, state])
audio.change(process_audio, [audio, state], [chatbot, state])
demo.launch()