import gradio as gr from huggingface_hub import InferenceClient from gradio_client import Client import os import uuid from anthropic import Anthropic from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # read local .env file HF_TOKEN = os.getenv('HF_TOKEN') anthropic = Anthropic() MODEL_NAME = "claude-3-haiku-20240307" # Define model parameters here SYSTEM_MESSAGE = "You are a friendly Chatbot, who gives short answers." MAX_TOKENS = 512 TEMPERATURE = 0 TOP_P = 0.95 def get_completion(prompt): return anthropic.messages.create( model=MODEL_NAME, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, messages=[{ "role": 'user', "content": prompt }] ).content[0].text #chat_client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token="") tts_client = Client("xxxrokxxx/Multilingual-TTS", hf_token=HF_TOKEN) languages = [ "English", "Spanish", "French", "German", "Italian", "Chinese", "Japanese", "Slovenian", "Russian", "Vietnamese" ] language_flags = { "English": "🇬🇧", "Spanish": "🇪🇸", "French": "🇫🇷", "German": "🇩🇪", "Italian": "🇮🇹", "Chinese": "🇨🇳", "Japanese": "🇯🇵", "Slovenian": "🇸🇮", "Russian": "🇷🇺", "Vietnamese": "🇻🇳" } def translate_text(text, from_lang, to_lang): prompt = f"Translate the following text from {from_lang} to {to_lang}:\n\n{text}\n\nTranslation:" return get_completion(prompt) def get_speakers(language): try: result = tts_client.predict(language=language, api_name="/get_speakers") speakers = result[0]['choices'] return [speaker[0] for speaker in speakers] except Exception as e: print(f"Error getting speakers for {language}: {e}") return ["Default"] def generate_unique_filename(extension=".wav"): return str(uuid.uuid4()) + extension def to_voice(text, language, speaker): try: #print(f"Generating voice for text: {text[:50]}... in language: {language} with speaker: {speaker}") _, audio_path = tts_client.predict( text=text, language_code=language, speaker=speaker, tashkeel_checkbox=False, api_name="/text_to_speech_edge" ) #print(f"Audio generated at path: {audio_path}") # Create a new file with a shorter, unique name new_filename = generate_unique_filename() new_path = os.path.join(os.path.dirname(audio_path), new_filename) os.rename(audio_path, new_path) print(f"Audio file renamed to: {new_path}") return new_path except Exception as e: print(f"Error generating voice: {e}") return None def respond(message, chat_history, language): language_instruction = f"Please respond in {language}." full_prompt = f"{SYSTEM_MESSAGE} {language_instruction}\n\nUser: {message}\nAssistant:" for human, ai in chat_history: full_prompt += f"\n\nUser: {human}\nAssistant: {ai}" full_prompt += f"\n\nUser: {message}\nAssistant:" ai_message = get_completion(full_prompt) return ai_message def print_like_dislike(x: gr.LikeData): print(x.index, x.value, x.liked) return x.liked def add_message(history, message, response_type): history.append((message, None)) return history, gr.Textbox(value="", interactive=True) def bot(history, response_type, language1, language2, speaker): human_message = history[-1][0] ai_message = respond(human_message, history[:-1], language1) audio_path = None if response_type in ["text + audio"]: #print(f"Attempting to generate audio for response type: {response_type}") # If languages are different, translate before generating audio if language1 != language2: #print(f"Translating from {language1} to {language2}") translated_message = translate_text(ai_message, language1, language2) audio_text = translated_message else: audio_text = ai_message audio_path = to_voice(audio_text, language2, speaker) #print(f"Audio path after generation: {audio_path}") history[-1] = (human_message, ai_message) return history, audio_path, response_type def update_speakers(language): language = language.split(" ")[-1] # Remove flag from language speakers = get_speakers(language) return gr.Dropdown(choices=speakers, value=speakers[0] if speakers else None, label="Speaker") custom_css = """ .submit-btn, .play-btn { background-color: transparent !important; border: none !important; padding: 0 !important; } .submit-btn:hover, .play-btn:hover { background-color: transparent !important; } """ with gr.Blocks(css=custom_css, theme=gr.themes.Base()) as demo: chatbot = gr.Chatbot( elem_id="chatbot", bubble_full_width=False, height=400, show_label=False, ) with gr.Row(): msg = gr.Textbox( show_label=False, placeholder="Enter your message...", scale=9 ) submit_btn = gr.Button( "➤", elem_classes=["submit-btn"], scale=1 ) response_type = gr.Radio( ["text", "text + audio"], value="text", label="Response Type", ) with gr.Accordion("Language Selection", open=False): with gr.Column(): gr.Markdown("### Text to Speech") with gr.Row(): language1 = gr.Dropdown(choices=languages, label="Text Language", value="English") language2 = gr.Dropdown(choices=languages, label="Speech Language", value="English") initial_speakers = get_speakers("English") speaker = gr.Dropdown(choices=initial_speakers, value=initial_speakers[0] if initial_speakers else None, label="Speaker") #gr.Markdown("### Speech to Text") #with gr.Row(): # language3 = gr.Dropdown(choices=languages, label="Speech Language", value="English") # language4 = gr.Dropdown(choices=languages, label="Text Language", value="English") audio_visible = response_type in ["text + audio"] print("Audio Visible", audio_visible) audio_player = gr.Audio(label="Response Audio", visible=audio_visible, elem_id="audio-player", autoplay=True) play_btn = gr.Button("🔊", elem_classes=["play-btn"], visible=False) # Hidden components to store state audio_path_state = gr.State() response_type_state = gr.State() # Event handlers language1.change(update_speakers, inputs=[language1], outputs=[speaker]) language2.change(update_speakers, inputs=[language2], outputs=[speaker]) def process_response(history, audio_path, response_type): #print(f"Processing response. Audio path: {audio_path}, Response type: {response_type}") audio_visible = response_type in ["text + audio"] and audio_path is not None return ( history, audio_path if audio_visible else None, audio_visible, audio_path, response_type ) def play_audio(audio_path): return gr.update(value=audio_path, visible=True, autoplay=True), gr.update(visible=True) msg.submit(add_message, [chatbot, msg, response_type], [chatbot, msg]).then( bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state] ).then( process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state] ) submit_btn.click(add_message, [chatbot, msg, response_type], [chatbot, msg]).then( bot, [chatbot, response_type, language1, language2, speaker], [chatbot, audio_path_state, response_type_state] ).then( process_response, [chatbot, audio_path_state, response_type_state], [chatbot, audio_player, audio_path_state, response_type_state] ) play_btn.click(play_audio, inputs=[audio_path_state], outputs=[audio_player, play_btn]) chatbot.like(print_like_dislike, None, None) if __name__ == "__main__": demo.launch()