Spaces:
Build error
Build error
File size: 2,420 Bytes
b49b56b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import openai, os
import gradio as gr
import azure.cognitiveservices.speech as speechsdk
from langchain import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
load_dotenv()
# openai.api_base = "https://api.chatanywhere.tech/v1"
# os.environ["OPENAI_API_KEY"]='sk-jfN3iXQqp6DGlhzr6dvxlePzbFa2juVEqQEg8Pi91PrDXNTi'
# #openai.api_key = os.environ["OPENAI_API_KEY"]
# os.environ['AZURE_SPEECH_KEY']='06901dd3964b4681834cfb9bf4fe9efe'
# os.environ['AZURE_SPEECH_REGION']='eastus'
memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
conversation = ConversationChain(
llm=OpenAI(model_name="gpt-3.5-turbo-0613",max_tokens=2048, temperature=0.5),
memory=memory,
)
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('AZURE_SPEECH_KEY'), region=os.environ.get('AZURE_SPEECH_REGION'))
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
# The language of the voice that speaks.
speech_config.speech_synthesis_language='zh-CN'
speech_config.speech_synthesis_voice_name='zh-CN-XiaohanNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
def play_voice(text):
speech_synthesizer.speak_text_async(text)
def predict(input, history=[]):
history.append(input)
response = conversation.predict(input=input)
history.append(response)
play_voice(response)
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
return responses, history
def transcribe(audio):
os.rename(audio, audio + '.wav')
audio_file = open(audio + '.wav', "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
return transcript['text']
def process_audio(audio, history=[]):
text = transcribe(audio)
return predict(text, history)
with gr.Blocks(css="#chatbot{height:800px} .overflow-y-auto{height:800px}") as demo:
chatbot = gr.Chatbot(elem_id="chatbot")
state = gr.State([])
with gr.Row():
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter")
with gr.Row():
audio = gr.Audio(source="microphone", type="filepath")
txt.submit(predict, [txt, state], [chatbot, state])
audio.change(process_audio, [audio, state], [chatbot, state])
demo.launch() |