maichat / app.py
xxxrokxxx's picture
now app.py
5f26384 verified
import gradio as gr
from huggingface_hub import InferenceClient
from gradio_client import Client
import os
import uuid
from anthropic import Anthropic
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
HF_TOKEN = os.getenv('HF_TOKEN')
anthropic = Anthropic()
MODEL_NAME = "claude-3-haiku-20240307"
# Define model parameters here
SYSTEM_MESSAGE = "You are a friendly Chatbot, who gives short answers."
MAX_TOKENS = 512
TEMPERATURE = 0
TOP_P = 0.95
def get_completion(prompt):
return anthropic.messages.create(
model=MODEL_NAME,
max_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
messages=[{
"role": 'user', "content": prompt
}]
).content[0].text
#chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token="")
tts_client = Client("xxxrokxxx/Multilingual-TTS", hf_token=HF_TOKEN)
languages = [
"English", "Spanish", "French", "German", "Italian",
"Chinese", "Japanese", "Slovenian", "Russian", "Vietnamese"
]
language_flags = {
"English": "๐Ÿ‡ฌ๐Ÿ‡ง", "Spanish": "๐Ÿ‡ช๐Ÿ‡ธ", "French": "๐Ÿ‡ซ๐Ÿ‡ท", "German": "๐Ÿ‡ฉ๐Ÿ‡ช", "Italian": "๐Ÿ‡ฎ๐Ÿ‡น",
"Chinese": "๐Ÿ‡จ๐Ÿ‡ณ", "Japanese": "๐Ÿ‡ฏ๐Ÿ‡ต", "Slovenian": "๐Ÿ‡ธ๐Ÿ‡ฎ", "Russian": "๐Ÿ‡ท๐Ÿ‡บ", "Vietnamese": "๐Ÿ‡ป๐Ÿ‡ณ"
}
def translate_text(text, from_lang, to_lang):
prompt = f"Translate the following text from {from_lang} to {to_lang}:\n\n{text}\n\nTranslation:"
return get_completion(prompt)
def get_speakers(language):
try:
result = tts_client.predict(language=language, api_name="/get_speakers")
speakers = result[0]['choices']
return [speaker[0] for speaker in speakers]
except Exception as e:
print(f"Error getting speakers for {language}: {e}")
return ["Default"]
def generate_unique_filename(extension=".wav"):
return str(uuid.uuid4()) + extension
def to_voice(text, language, speaker):
try:
#print(f"Generating voice for text: {text[:50]}... in language: {language} with speaker: {speaker}")
_, audio_path = tts_client.predict(
text=text,
language_code=language,
speaker=speaker,
tashkeel_checkbox=False,
api_name="/text_to_speech_edge"
)
#print(f"Audio generated at path: {audio_path}")
# Create a new file with a shorter, unique name
new_filename = generate_unique_filename()
new_path = os.path.join(os.path.dirname(audio_path), new_filename)
os.rename(audio_path, new_path)
print(f"Audio file renamed to: {new_path}")
return new_path
except Exception as e:
print(f"Error generating voice: {e}")
return None
def respond(message, chat_history, language):
language_instruction = f"Please respond in {language}."
full_prompt = f"{SYSTEM_MESSAGE} {language_instruction}\n\nUser: {message}\nAssistant:"
for human, ai in chat_history:
full_prompt += f"\n\nUser: {human}\nAssistant: {ai}"
full_prompt += f"\n\nUser: {message}\nAssistant:"
ai_message = get_completion(full_prompt)
return ai_message
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
return x.liked
def add_message(history, message, response_type):
history.append((message, None))
return history, gr.Textbox(value="", interactive=True)
def bot(history, response_type, language1, language2, speaker):
human_message = history[-1][0]
ai_message = respond(human_message, history[:-1], language1)
audio_path = None
if response_type in ["text + audio"]:
#print(f"Attempting to generate audio for response type: {response_type}")
# If languages are different, translate before generating audio
if language1 != language2:
#print(f"Translating from {language1} to {language2}")
translated_message = translate_text(ai_message, language1, language2)
audio_text = translated_message
else:
audio_text = ai_message
audio_path = to_voice(audio_text, language2, speaker)
#print(f"Audio path after generation: {audio_path}")
history[-1] = (human_message, ai_message)
return history, audio_path, response_type
def update_speakers(language):
language = language.split(" ")[-1] # Remove flag from language
speakers = get_speakers(language)
return gr.Dropdown(choices=speakers, value=speakers[0] if speakers else None, label="Speaker")
custom_css = """
.submit-btn, .play-btn {
background-color: transparent !important;
border: none !important;
padding: 0 !important;
}
.submit-btn:hover, .play-btn:hover {
background-color: transparent !important;
}
"""
with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo:
chatbot = gr.Chatbot(
elem_id="chatbot",
bubble_full_width=False,
height=400,
show_label=False,
)
with gr.Row():
msg = gr.Textbox(
show_label=False,
placeholder="Enter your message...",
scale=9
)
submit_btn = gr.Button(
"โžค",
elem_classes=["submit-btn"],
scale=1
)
response_type = gr.Radio(
["text", "text + audio"],
value="text",
label="Response Type",
)
with gr.Accordion("Language Selection", open=False):
with gr.Column():
gr.Markdown("### Text to Speech")
with gr.Row():
language1 = gr.Dropdown(choices=languages, label="Text Language", value="English")
language2 = gr.Dropdown(choices=languages, label="Speech Language", value="English")
initial_speakers = get_speakers("English")
speaker = gr.Dropdown(choices=initial_speakers, value=initial_speakers[0] if initial_speakers else None, label="Speaker")
#gr.Markdown("### Speech to Text")
#with gr.Row():
# language3 = gr.Dropdown(choices=languages, label="Speech Language", value="English")
# language4 = gr.Dropdown(choices=languages, label="Text Language", value="English")
audio_visible = response_type in ["text + audio"]
print("Audio Visible", audio_visible)
audio_player = gr.Audio(label="Response Audio", visible=audio_visible, elem_id="audio-player", autoplay=True)
play_btn = gr.Button("๐Ÿ”Š", elem_classes=["play-btn"], visible=False)
# Hidden components to store state
audio_path_state = gr.State()
response_type_state = gr.State()
# Event handlers
language1.change(update_speakers, inputs=[language1], outputs=[speaker])
language2.change(update_speakers, inputs=[language2], outputs=[speaker])
def process_response(history, audio_path, response_type):
#print(f"Processing response. Audio path: {audio_path}, Response type: {response_type}")
audio_visible = response_type in ["text + audio"] and audio_path is not None
return (
history,
audio_path if audio_visible else None,
audio_visible,
audio_path,
response_type
)
def play_audio(audio_path):
return gr.update(value=audio_path, visible=True, autoplay=True), gr.update(visible=True)
msg.submit(add_message, [chatbot, msg, response_type], [chatbot, msg]).then(
bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state]
).then(
process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state]
)
submit_btn.click(add_message, [chatbot, msg, response_type], [chatbot, msg]).then(
bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state]
).then(
process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state]
)
play_btn.click(play_audio, inputs=[audio_path_state], outputs=[audio_player, play_btn])
chatbot.like(print_like_dislike, None, None)
if __name__ == "__main__":
demo.launch()