Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai, os
|
2 |
+
import gradio as gr
|
3 |
+
import azure.cognitiveservices.speech as speechsdk
|
4 |
+
from langchain import OpenAI
|
5 |
+
from langchain.chains import ConversationChain
|
6 |
+
from langchain.memory import ConversationSummaryBufferMemory
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
load_dotenv()
|
10 |
+
# openai.api_base = "https://api.chatanywhere.tech/v1"
|
11 |
+
# os.environ["OPENAI_API_KEY"]='sk-jfN3iXQqp6DGlhzr6dvxlePzbFa2juVEqQEg8Pi91PrDXNTi'
|
12 |
+
# #openai.api_key = os.environ["OPENAI_API_KEY"]
|
13 |
+
# os.environ['AZURE_SPEECH_KEY']='06901dd3964b4681834cfb9bf4fe9efe'
|
14 |
+
# os.environ['AZURE_SPEECH_REGION']='eastus'
|
15 |
+
|
16 |
+
|
17 |
+
memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
|
18 |
+
conversation = ConversationChain(
|
19 |
+
llm=OpenAI(model_name="gpt-3.5-turbo-0613",max_tokens=2048, temperature=0.5),
|
20 |
+
memory=memory,
|
21 |
+
)
|
22 |
+
|
23 |
+
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('AZURE_SPEECH_KEY'), region=os.environ.get('AZURE_SPEECH_REGION'))
|
24 |
+
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
|
25 |
+
|
26 |
+
# The language of the voice that speaks.
|
27 |
+
speech_config.speech_synthesis_language='zh-CN'
|
28 |
+
speech_config.speech_synthesis_voice_name='zh-CN-XiaohanNeural'
|
29 |
+
|
30 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
|
31 |
+
|
32 |
+
def play_voice(text):
|
33 |
+
speech_synthesizer.speak_text_async(text)
|
34 |
+
|
35 |
+
def predict(input, history=[]):
|
36 |
+
history.append(input)
|
37 |
+
response = conversation.predict(input=input)
|
38 |
+
history.append(response)
|
39 |
+
play_voice(response)
|
40 |
+
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
|
41 |
+
return responses, history
|
42 |
+
|
43 |
+
def transcribe(audio):
|
44 |
+
os.rename(audio, audio + '.wav')
|
45 |
+
audio_file = open(audio + '.wav', "rb")
|
46 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
47 |
+
return transcript['text']
|
48 |
+
|
49 |
+
def process_audio(audio, history=[]):
|
50 |
+
text = transcribe(audio)
|
51 |
+
return predict(text, history)
|
52 |
+
|
53 |
+
with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as demo:
|
54 |
+
chatbot = gr.Chatbot(elem_id="chatbot")
|
55 |
+
state = gr.State([])
|
56 |
+
|
57 |
+
with gr.Row():
|
58 |
+
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter")
|
59 |
+
|
60 |
+
with gr.Row():
|
61 |
+
audio = gr.Audio(source="microphone", type="filepath")
|
62 |
+
|
63 |
+
txt.submit(predict, [txt, state], [chatbot, state])
|
64 |
+
audio.change(process_audio, [audio, state], [chatbot, state])
|
65 |
+
|
66 |
+
demo.launch()
|