Adamkong commited on
Commit
b49b56b
·
1 Parent(s): b2170f8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai, os
2
+ import gradio as gr
3
+ import azure.cognitiveservices.speech as speechsdk
4
+ from langchain import OpenAI
5
+ from langchain.chains import ConversationChain
6
+ from langchain.memory import ConversationSummaryBufferMemory
7
+ from langchain.chat_models import ChatOpenAI
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+ # openai.api_base = "https://api.chatanywhere.tech/v1"
11
+ # os.environ["OPENAI_API_KEY"]='sk-jfN3iXQqp6DGlhzr6dvxlePzbFa2juVEqQEg8Pi91PrDXNTi'
12
+ # #openai.api_key = os.environ["OPENAI_API_KEY"]
13
+ # os.environ['AZURE_SPEECH_KEY']='06901dd3964b4681834cfb9bf4fe9efe'
14
+ # os.environ['AZURE_SPEECH_REGION']='eastus'
15
+
16
+
17
+ memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
18
+ conversation = ConversationChain(
19
+ llm=OpenAI(model_name="gpt-3.5-turbo-0613",max_tokens=2048, temperature=0.5),
20
+ memory=memory,
21
+ )
22
+
23
+ speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('AZURE_SPEECH_KEY'), region=os.environ.get('AZURE_SPEECH_REGION'))
24
+ audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
25
+
26
+ # The language of the voice that speaks.
27
+ speech_config.speech_synthesis_language='zh-CN'
28
+ speech_config.speech_synthesis_voice_name='zh-CN-XiaohanNeural'
29
+
30
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
31
+
32
+ def play_voice(text):
33
+ speech_synthesizer.speak_text_async(text)
34
+
35
+ def predict(input, history=[]):
36
+ history.append(input)
37
+ response = conversation.predict(input=input)
38
+ history.append(response)
39
+ play_voice(response)
40
+ responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
41
+ return responses, history
42
+
43
+ def transcribe(audio):
44
+ os.rename(audio, audio + '.wav')
45
+ audio_file = open(audio + '.wav', "rb")
46
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
47
+ return transcript['text']
48
+
49
+ def process_audio(audio, history=[]):
50
+ text = transcribe(audio)
51
+ return predict(text, history)
52
+
53
+ with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as demo:
54
+ chatbot = gr.Chatbot(elem_id="chatbot")
55
+ state = gr.State([])
56
+
57
+ with gr.Row():
58
+ txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter")
59
+
60
+ with gr.Row():
61
+ audio = gr.Audio(source="microphone", type="filepath")
62
+
63
+ txt.submit(predict, [txt, state], [chatbot, state])
64
+ audio.change(process_audio, [audio, state], [chatbot, state])
65
+
66
+ demo.launch()