Spaces:

JohnInizio
/

conversational_ai_poc

Running

App Files Files Community

John Langley commited on Aug 23

Commit

d405851

•

1 Parent(s): e0145dc

Initial Checkin

Browse files

Files changed (3) hide show

app.py +113 -0
requirements.txt +8 -0
utils.py +114 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import gradio as gr
+import edge_tts
+import asyncio
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from faster_whisper import WhisperModel
+from utils import get_sentence, tts_interface
+# The device to load the model onto.
+#
+# Available device types:
+# "cuda" - NVIDIA GPU
+# "cpu" - Plain CPU
+# "mps" - Apple silicon
+device = "cpu"
+# Load Mistral LLM
+print("Loading Mistral LLM")
+hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
+mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
+mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=False)
+# Load Whisper ASR model
+print("Loading Whisper ASR")
+whisper_model = WhisperModel("large-v3", device="cpu", compute_type="float32")
+# Get all available voices from edge_tts
+async def get_voices():
+    voices = await edge_tts.list_voices()
+    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
+# Will be triggered on text submit (updates the chat interface and sends the request to the LLM for a response)
+def add_text(chatbot_history, text):
+    chatbot_history = [] if chatbot_history is None else chatbot_history
+    chatbot_history = chatbot_history + [(text, None)]
+    return chatbot_history, gr.update(value="", interactive=True)
+# Will be triggered on voice submit (will transribe and send to generate_speech)
+def add_audio(chatbot_history, audio):
+    chatbot_history = [] if chatbot_history is None else chatbot_history
+    # get result from whisper and strip it to delete begin and end space
+    response, _ = whisper_model.transcribe(audio)
+    text = list(response)[0].text.strip()
+    print("Transcribed text:", text)
+    chatbot_history = chatbot_history + [(text, None)]
+    return chatbot_history, gr.update(value="", interactive=True)
+#Gets a reponse from the LLM and creates an audio clip using a TTS Model
+def respond(chat_history, voice):
+    if not voice:
+        return None, gr.Warning("Please select a voice.")
+    history, response = get_sentence(chat_history, mistral_llm)
+    return history, response
+#Gradio Interface
+async def create_demo():
+    voices = await get_voices()
+    #Interface Code
+    with gr.Blocks(title="Chat with LLM - POC") as demo:
+        DESCRIPTION = """# Chat with LLM - POC"""
+        gr.Markdown(DESCRIPTION)
+        with gr.Row():
+            with gr.Column(scale=1, min_width=300):
+                user_msg = gr.Textbox(placeholder="Enter text here or speak into your microphone")
+                audio_record = gr.Audio(sources=["microphone"], type="filepath", scale=4)
+                ai_response = gr.Label(show_label=True)
+                submit_button = gr.Button("Submit")
+                speech_button = gr.Button("Test Speech")
+                audio_playback = gr.Audio(
+                    value=None,
+                    label="Generated audio response",
+                    streaming=True,
+                    autoplay=True,interactive=False,
+                    show_label=True,
+                )
+            with gr.Column(scale=1, min_width=300):
+                voice = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
+                # Define chatbot component
+                chatbot = gr.Chatbot(
+                    value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")],  # Initial greeting from the chatbot
+                    elem_id="chatbot",
+                    bubble_full_width=False,
+                )
+                speech_button.click(fn=tts_interface, inputs=[user_msg, voice], outputs=[audio_playback])
+                audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, user_msg], queue=False
+                                    ).then(fn=respond,  inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
+                submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
+                                    ).then(fn=respond,  inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
+    return demo
+# Run the application
+demo = asyncio.run(create_demo())
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+huggingface_hub==0.24.5
+TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
+gradio_client
+asyncio
+faster-whisper==1.0.1
+edge-tts==6.1.12
+nltk==3.8.1
+llama_cpp_python==0.2.88

utils.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import gradio as gr
+import nltk
+import edge_tts
+import tempfile
+import asyncio
+# Download the 'punkt' tokenizer for the NLTK library
+nltk.download("punkt")
+def format_prompt(message, history):
+    system_message = f"""
+    You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements.
+    You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love.
+    You show radical candor and tough love.
+    Respond in a casual and friendly tone.
+    Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation.
+    Emulate the user’s speaking style and be concise in your response.
+    """
+    prompt = (
+        "<s>[INST]" + system_message + "[/INST]"
+    )
+    for user_prompt, bot_response in history:
+        if user_prompt is not None:
+            prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    if message=="":
+        message="Hello"
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
+def generate_llm_output(
+    prompt,
+    history,
+    llm,
+    temperature=0.8,
+    max_tokens=256,
+    top_p=0.95,
+    stop_words=["<s>","[/INST]", "</s>"]
+):
+    temperature = float(temperature)
+    if temperature < 1e-2:
+        temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_tokens=max_tokens,
+        top_p=top_p,
+        stop=stop_words
+    )
+    formatted_prompt = format_prompt(prompt, history)
+    try:
+        print("LLM Input:", formatted_prompt)
+        # Local GGUF
+        output = ""
+        stream = llm(
+            formatted_prompt,
+            **generate_kwargs,
+            stream=True,
+        )
+        for r in stream:
+            print(r["choices"][0]["text"])
+            character = r["choices"][0]["text"]
+            if character in stop_words:
+                # end of context
+                return
+            output += r["choices"][0]["text"]
+    except Exception as e:
+        print("Unhandled Exception: ", str(e))
+        gr.Warning("Unfortunately Mistral is unable to process")
+        output = "I do not know what happened but I could not understand you ."
+    return output
+# tts interface function
+def tts_interface(text, voice):
+    audio = asyncio.run(text_to_speech(text, voice))
+    return audio
+# Text-to-speech function
+async def text_to_speech(text, voice):
+    rate = 10
+    pitch = 10
+    rate_str = f"{rate:+d}%"
+    pitch_str = f"{pitch:+d}Hz"
+    voice_short_name = voice.split(" - ")[0]
+    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+        tmp_path = tmp_file.name
+        await communicate.save(tmp_path)
+    return tmp_path
+def get_sentence(history, llm):
+    history = [["", None]] if history is None else history
+    history[-1][1] = ""
+    text_to_generate = ""
+    text_to_generate = generate_llm_output(history[-1][0], history[:-1], llm)
+    history.append([None, text_to_generate])
+    return (history, text_to_generate)