|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
from gradio_client import Client |
|
import os |
|
import uuid |
|
from anthropic import Anthropic |
|
|
|
from dotenv import load_dotenv, find_dotenv |
|
_ = load_dotenv(find_dotenv()) |
|
|
|
|
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
|
|
anthropic = Anthropic() |
|
MODEL_NAME = "claude-3-haiku-20240307" |
|
|
|
|
|
SYSTEM_MESSAGE = "You are a friendly Chatbot, who gives short answers." |
|
MAX_TOKENS = 512 |
|
TEMPERATURE = 0 |
|
TOP_P = 0.95 |
|
|
|
def get_completion(prompt): |
|
return anthropic.messages.create( |
|
model=MODEL_NAME, |
|
max_tokens=MAX_TOKENS, |
|
temperature=TEMPERATURE, |
|
messages=[{ |
|
"role": 'user', "content": prompt |
|
}] |
|
).content[0].text |
|
|
|
|
|
tts_client = Client("xxxrokxxx/Multilingual-TTS", hf_token=HF_TOKEN) |
|
|
|
languages = [ |
|
"English", "Spanish", "French", "German", "Italian", |
|
"Chinese", "Japanese", "Slovenian", "Russian", "Vietnamese" |
|
] |
|
|
|
language_flags = { |
|
"English": "๐ฌ๐ง", "Spanish": "๐ช๐ธ", "French": "๐ซ๐ท", "German": "๐ฉ๐ช", "Italian": "๐ฎ๐น", |
|
"Chinese": "๐จ๐ณ", "Japanese": "๐ฏ๐ต", "Slovenian": "๐ธ๐ฎ", "Russian": "๐ท๐บ", "Vietnamese": "๐ป๐ณ" |
|
} |
|
|
|
def translate_text(text, from_lang, to_lang): |
|
prompt = f"Translate the following text from {from_lang} to {to_lang}:\n\n{text}\n\nTranslation:" |
|
return get_completion(prompt) |
|
|
|
def get_speakers(language): |
|
try: |
|
result = tts_client.predict(language=language, api_name="/get_speakers") |
|
speakers = result[0]['choices'] |
|
return [speaker[0] for speaker in speakers] |
|
except Exception as e: |
|
print(f"Error getting speakers for {language}: {e}") |
|
return ["Default"] |
|
|
|
def generate_unique_filename(extension=".wav"): |
|
return str(uuid.uuid4()) + extension |
|
|
|
def to_voice(text, language, speaker): |
|
try: |
|
|
|
_, audio_path = tts_client.predict( |
|
text=text, |
|
language_code=language, |
|
speaker=speaker, |
|
tashkeel_checkbox=False, |
|
api_name="/text_to_speech_edge" |
|
) |
|
|
|
|
|
|
|
|
|
new_filename = generate_unique_filename() |
|
new_path = os.path.join(os.path.dirname(audio_path), new_filename) |
|
os.rename(audio_path, new_path) |
|
|
|
print(f"Audio file renamed to: {new_path}") |
|
return new_path |
|
except Exception as e: |
|
print(f"Error generating voice: {e}") |
|
return None |
|
|
|
def respond(message, chat_history, language): |
|
language_instruction = f"Please respond in {language}." |
|
full_prompt = f"{SYSTEM_MESSAGE} {language_instruction}\n\nUser: {message}\nAssistant:" |
|
|
|
for human, ai in chat_history: |
|
full_prompt += f"\n\nUser: {human}\nAssistant: {ai}" |
|
|
|
full_prompt += f"\n\nUser: {message}\nAssistant:" |
|
|
|
ai_message = get_completion(full_prompt) |
|
return ai_message |
|
|
|
def print_like_dislike(x: gr.LikeData): |
|
print(x.index, x.value, x.liked) |
|
return x.liked |
|
|
|
def add_message(history, message, response_type): |
|
history.append((message, None)) |
|
return history, gr.Textbox(value="", interactive=True) |
|
|
|
def bot(history, response_type, language1, language2, speaker): |
|
human_message = history[-1][0] |
|
ai_message = respond(human_message, history[:-1], language1) |
|
|
|
audio_path = None |
|
if response_type in ["text + audio"]: |
|
|
|
|
|
|
|
if language1 != language2: |
|
|
|
translated_message = translate_text(ai_message, language1, language2) |
|
audio_text = translated_message |
|
else: |
|
audio_text = ai_message |
|
|
|
audio_path = to_voice(audio_text, language2, speaker) |
|
|
|
|
|
history[-1] = (human_message, ai_message) |
|
|
|
return history, audio_path, response_type |
|
|
|
|
|
|
|
def update_speakers(language): |
|
language = language.split(" ")[-1] |
|
speakers = get_speakers(language) |
|
return gr.Dropdown(choices=speakers, value=speakers[0] if speakers else None, label="Speaker") |
|
|
|
custom_css = """ |
|
.submit-btn, .play-btn { |
|
background-color: transparent !important; |
|
border: none !important; |
|
padding: 0 !important; |
|
} |
|
.submit-btn:hover, .play-btn:hover { |
|
background-color: transparent !important; |
|
} |
|
""" |
|
|
|
|
|
|
|
with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo: |
|
chatbot = gr.Chatbot( |
|
elem_id="chatbot", |
|
bubble_full_width=False, |
|
height=400, |
|
show_label=False, |
|
) |
|
|
|
with gr.Row(): |
|
msg = gr.Textbox( |
|
show_label=False, |
|
placeholder="Enter your message...", |
|
scale=9 |
|
) |
|
submit_btn = gr.Button( |
|
"โค", |
|
elem_classes=["submit-btn"], |
|
scale=1 |
|
) |
|
|
|
response_type = gr.Radio( |
|
["text", "text + audio"], |
|
value="text", |
|
label="Response Type", |
|
) |
|
|
|
with gr.Accordion("Language Selection", open=False): |
|
with gr.Column(): |
|
gr.Markdown("### Text to Speech") |
|
with gr.Row(): |
|
language1 = gr.Dropdown(choices=languages, label="Text Language", value="English") |
|
language2 = gr.Dropdown(choices=languages, label="Speech Language", value="English") |
|
initial_speakers = get_speakers("English") |
|
speaker = gr.Dropdown(choices=initial_speakers, value=initial_speakers[0] if initial_speakers else None, label="Speaker") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audio_visible = response_type in ["text + audio"] |
|
print("Audio Visible", audio_visible) |
|
audio_player = gr.Audio(label="Response Audio", visible=audio_visible, elem_id="audio-player", autoplay=True) |
|
play_btn = gr.Button("๐", elem_classes=["play-btn"], visible=False) |
|
|
|
|
|
audio_path_state = gr.State() |
|
response_type_state = gr.State() |
|
|
|
|
|
|
|
language1.change(update_speakers, inputs=[language1], outputs=[speaker]) |
|
language2.change(update_speakers, inputs=[language2], outputs=[speaker]) |
|
|
|
def process_response(history, audio_path, response_type): |
|
|
|
audio_visible = response_type in ["text + audio"] and audio_path is not None |
|
return ( |
|
history, |
|
audio_path if audio_visible else None, |
|
audio_visible, |
|
audio_path, |
|
response_type |
|
) |
|
def play_audio(audio_path): |
|
return gr.update(value=audio_path, visible=True, autoplay=True), gr.update(visible=True) |
|
|
|
msg.submit(add_message, [chatbot, msg, response_type], [chatbot, msg]).then( |
|
bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state] |
|
).then( |
|
process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state] |
|
) |
|
|
|
submit_btn.click(add_message, [chatbot, msg, response_type], [chatbot, msg]).then( |
|
bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state] |
|
).then( |
|
process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state] |
|
) |
|
|
|
play_btn.click(play_audio, inputs=[audio_path_state], outputs=[audio_player, play_btn]) |
|
|
|
chatbot.like(print_like_dislike, None, None) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |