Spaces:
Sleeping
Sleeping
import gradio as gr | |
import uuid | |
import os | |
import speech_recognition as sr | |
from gtts import gTTS | |
from langchain_community.llms import Ollama | |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
from langchain_community.chat_message_histories import ChatMessageHistory | |
from langchain_core.runnables.history import RunnableWithMessageHistory | |
# Initialize the model and prompt template | |
chat = Ollama(model="llama3:latest") | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", """ | |
You are a helpful AI assistant. Your task is to engage in conversation with users, | |
answer their questions, and assist them with various tasks. | |
Communicate politely and maintain focus on the user's needs. | |
Keep responses concise, typically two to three sentences. | |
"""), | |
MessagesPlaceholder(variable_name="history"), | |
("human", "{input}"), | |
]) | |
runnable = prompt | chat | |
with_message_history = RunnableWithMessageHistory( | |
runnable, | |
lambda session_id: ChatMessageHistory(), | |
input_messages_key="input", | |
history_messages_key="history", | |
) | |
def text_to_speech(text, file_name): | |
tts = gTTS(text=text, lang='en', slow=False) | |
file_path = os.path.join(os.getcwd(), file_name) | |
tts.save(file_path) | |
return file_path | |
def speech_to_text(audio): | |
if audio is None: | |
return "No audio input received." | |
recognizer = sr.Recognizer() | |
try: | |
with sr.AudioFile(audio) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data) | |
print(text) | |
return text | |
except sr.UnknownValueError: | |
return "Speech recognition could not understand the audio" | |
except sr.RequestError: | |
return "Could not request results from the speech recognition service" | |
except Exception as e: | |
return f"Error processing audio: {str(e)}" | |
def chat_function(input_type, text_input=None, audio_input=None, history=None): | |
if history is None: | |
history = [] | |
if input_type == "text": | |
user_input = text_input | |
elif input_type == "audio": | |
if audio_input is not None: | |
user_input = speech_to_text(audio_input) | |
else: | |
user_input = "No audio input received." | |
else: | |
return history, history, None | |
print(f"User input: {user_input}") # Debug information | |
# Get LLM response | |
response = with_message_history.invoke( | |
{"input": user_input}, | |
config={"configurable": {"session_id": "chat_history"}}, | |
) | |
# Generate audio for LLM response | |
audio_file = f"response_{uuid.uuid4()}.mp3" | |
audio_path = text_to_speech(response, audio_file) | |
# Update history in the correct format | |
history.append((user_input, response)) | |
return history, history, audio_path | |
# Gradio interface | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() | |
with gr.Row(): | |
text_input = gr.Textbox(placeholder="Type your message here...") | |
audio_input = gr.Audio(sources=['microphone'], type="filepath") | |
with gr.Row(): | |
text_button = gr.Button("Send Text") | |
audio_button = gr.Button("Send Audio") | |
audio_output = gr.Audio() | |
def on_audio_change(audio): | |
if audio is not None: | |
return speech_to_text(audio) | |
return "" | |
audio_input.change(on_audio_change, inputs=[audio_input], outputs=[text_input]) | |
text_button.click(chat_function, inputs=[gr.Textbox(value="text"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output]) | |
audio_button.click(chat_function, inputs=[gr.Textbox(value="audio"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output]) | |
demo.launch(server_name='0.0.0.0',share=True,max_threads=10) |