John Langley
commited on
Commit
•
d405851
1
Parent(s):
e0145dc
Initial Checkin
Browse files- app.py +113 -0
- requirements.txt +8 -0
- utils.py +114 -0
app.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import edge_tts
|
3 |
+
import asyncio
|
4 |
+
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
from llama_cpp import Llama
|
7 |
+
from faster_whisper import WhisperModel
|
8 |
+
|
9 |
+
from utils import get_sentence, tts_interface
|
10 |
+
|
11 |
+
# The device to load the model onto.
|
12 |
+
#
|
13 |
+
# Available device types:
|
14 |
+
# "cuda" - NVIDIA GPU
|
15 |
+
# "cpu" - Plain CPU
|
16 |
+
# "mps" - Apple silicon
|
17 |
+
device = "cpu"
|
18 |
+
|
19 |
+
# Load Mistral LLM
|
20 |
+
print("Loading Mistral LLM")
|
21 |
+
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
|
22 |
+
mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
|
23 |
+
mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=False)
|
24 |
+
|
25 |
+
# Load Whisper ASR model
|
26 |
+
print("Loading Whisper ASR")
|
27 |
+
whisper_model = WhisperModel("large-v3", device="cpu", compute_type="float32")
|
28 |
+
|
29 |
+
|
30 |
+
# Get all available voices from edge_tts
|
31 |
+
async def get_voices():
|
32 |
+
voices = await edge_tts.list_voices()
|
33 |
+
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
|
34 |
+
|
35 |
+
|
36 |
+
# Will be triggered on text submit (updates the chat interface and sends the request to the LLM for a response)
|
37 |
+
def add_text(chatbot_history, text):
|
38 |
+
chatbot_history = [] if chatbot_history is None else chatbot_history
|
39 |
+
chatbot_history = chatbot_history + [(text, None)]
|
40 |
+
return chatbot_history, gr.update(value="", interactive=True)
|
41 |
+
|
42 |
+
|
43 |
+
# Will be triggered on voice submit (will transribe and send to generate_speech)
|
44 |
+
def add_audio(chatbot_history, audio):
|
45 |
+
chatbot_history = [] if chatbot_history is None else chatbot_history
|
46 |
+
# get result from whisper and strip it to delete begin and end space
|
47 |
+
response, _ = whisper_model.transcribe(audio)
|
48 |
+
text = list(response)[0].text.strip()
|
49 |
+
print("Transcribed text:", text)
|
50 |
+
chatbot_history = chatbot_history + [(text, None)]
|
51 |
+
return chatbot_history, gr.update(value="", interactive=True)
|
52 |
+
|
53 |
+
|
54 |
+
#Gets a reponse from the LLM and creates an audio clip using a TTS Model
|
55 |
+
def respond(chat_history, voice):
|
56 |
+
if not voice:
|
57 |
+
return None, gr.Warning("Please select a voice.")
|
58 |
+
|
59 |
+
history, response = get_sentence(chat_history, mistral_llm)
|
60 |
+
return history, response
|
61 |
+
|
62 |
+
|
63 |
+
#Gradio Interface
|
64 |
+
async def create_demo():
|
65 |
+
|
66 |
+
voices = await get_voices()
|
67 |
+
|
68 |
+
#Interface Code
|
69 |
+
with gr.Blocks(title="Chat with LLM - POC") as demo:
|
70 |
+
|
71 |
+
DESCRIPTION = """# Chat with LLM - POC"""
|
72 |
+
gr.Markdown(DESCRIPTION)
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
|
76 |
+
with gr.Column(scale=1, min_width=300):
|
77 |
+
user_msg = gr.Textbox(placeholder="Enter text here or speak into your microphone")
|
78 |
+
audio_record = gr.Audio(sources=["microphone"], type="filepath", scale=4)
|
79 |
+
ai_response = gr.Label(show_label=True)
|
80 |
+
submit_button = gr.Button("Submit")
|
81 |
+
speech_button = gr.Button("Test Speech")
|
82 |
+
audio_playback = gr.Audio(
|
83 |
+
value=None,
|
84 |
+
label="Generated audio response",
|
85 |
+
streaming=True,
|
86 |
+
autoplay=True,interactive=False,
|
87 |
+
show_label=True,
|
88 |
+
)
|
89 |
+
|
90 |
+
with gr.Column(scale=1, min_width=300):
|
91 |
+
voice = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
|
92 |
+
# Define chatbot component
|
93 |
+
chatbot = gr.Chatbot(
|
94 |
+
value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")], # Initial greeting from the chatbot
|
95 |
+
elem_id="chatbot",
|
96 |
+
bubble_full_width=False,
|
97 |
+
)
|
98 |
+
|
99 |
+
speech_button.click(fn=tts_interface, inputs=[user_msg, voice], outputs=[audio_playback])
|
100 |
+
|
101 |
+
audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, user_msg], queue=False
|
102 |
+
).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
|
103 |
+
|
104 |
+
submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
|
105 |
+
).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
|
106 |
+
|
107 |
+
|
108 |
+
return demo
|
109 |
+
|
110 |
+
|
111 |
+
# Run the application
|
112 |
+
demo = asyncio.run(create_demo())
|
113 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub==0.24.5
|
2 |
+
TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
|
3 |
+
gradio_client
|
4 |
+
asyncio
|
5 |
+
faster-whisper==1.0.1
|
6 |
+
edge-tts==6.1.12
|
7 |
+
nltk==3.8.1
|
8 |
+
llama_cpp_python==0.2.88
|
utils.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import nltk
|
3 |
+
import edge_tts
|
4 |
+
import tempfile
|
5 |
+
import asyncio
|
6 |
+
|
7 |
+
# Download the 'punkt' tokenizer for the NLTK library
|
8 |
+
nltk.download("punkt")
|
9 |
+
|
10 |
+
def format_prompt(message, history):
|
11 |
+
system_message = f"""
|
12 |
+
You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements.
|
13 |
+
You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love.
|
14 |
+
You show radical candor and tough love.
|
15 |
+
Respond in a casual and friendly tone.
|
16 |
+
Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation.
|
17 |
+
Emulate the user’s speaking style and be concise in your response.
|
18 |
+
"""
|
19 |
+
prompt = (
|
20 |
+
"<s>[INST]" + system_message + "[/INST]"
|
21 |
+
)
|
22 |
+
for user_prompt, bot_response in history:
|
23 |
+
if user_prompt is not None:
|
24 |
+
prompt += f"[INST] {user_prompt} [/INST]"
|
25 |
+
|
26 |
+
prompt += f" {bot_response}</s> "
|
27 |
+
|
28 |
+
if message=="":
|
29 |
+
message="Hello"
|
30 |
+
prompt += f"[INST] {message} [/INST]"
|
31 |
+
return prompt
|
32 |
+
|
33 |
+
|
34 |
+
def generate_llm_output(
|
35 |
+
prompt,
|
36 |
+
history,
|
37 |
+
llm,
|
38 |
+
temperature=0.8,
|
39 |
+
max_tokens=256,
|
40 |
+
top_p=0.95,
|
41 |
+
stop_words=["<s>","[/INST]", "</s>"]
|
42 |
+
):
|
43 |
+
temperature = float(temperature)
|
44 |
+
if temperature < 1e-2:
|
45 |
+
temperature = 1e-2
|
46 |
+
top_p = float(top_p)
|
47 |
+
|
48 |
+
generate_kwargs = dict(
|
49 |
+
temperature=temperature,
|
50 |
+
max_tokens=max_tokens,
|
51 |
+
top_p=top_p,
|
52 |
+
stop=stop_words
|
53 |
+
)
|
54 |
+
formatted_prompt = format_prompt(prompt, history)
|
55 |
+
try:
|
56 |
+
print("LLM Input:", formatted_prompt)
|
57 |
+
# Local GGUF
|
58 |
+
output = ""
|
59 |
+
stream = llm(
|
60 |
+
formatted_prompt,
|
61 |
+
**generate_kwargs,
|
62 |
+
stream=True,
|
63 |
+
)
|
64 |
+
for r in stream:
|
65 |
+
print(r["choices"][0]["text"])
|
66 |
+
character = r["choices"][0]["text"]
|
67 |
+
if character in stop_words:
|
68 |
+
# end of context
|
69 |
+
return
|
70 |
+
|
71 |
+
output += r["choices"][0]["text"]
|
72 |
+
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
print("Unhandled Exception: ", str(e))
|
76 |
+
gr.Warning("Unfortunately Mistral is unable to process")
|
77 |
+
output = "I do not know what happened but I could not understand you ."
|
78 |
+
return output
|
79 |
+
|
80 |
+
|
81 |
+
# tts interface function
|
82 |
+
def tts_interface(text, voice):
|
83 |
+
audio = asyncio.run(text_to_speech(text, voice))
|
84 |
+
return audio
|
85 |
+
|
86 |
+
|
87 |
+
# Text-to-speech function
|
88 |
+
async def text_to_speech(text, voice):
|
89 |
+
rate = 10
|
90 |
+
pitch = 10
|
91 |
+
rate_str = f"{rate:+d}%"
|
92 |
+
pitch_str = f"{pitch:+d}Hz"
|
93 |
+
|
94 |
+
voice_short_name = voice.split(" - ")[0]
|
95 |
+
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
|
96 |
+
|
97 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
98 |
+
tmp_path = tmp_file.name
|
99 |
+
await communicate.save(tmp_path)
|
100 |
+
return tmp_path
|
101 |
+
|
102 |
+
|
103 |
+
|
104 |
+
def get_sentence(history, llm):
|
105 |
+
history = [["", None]] if history is None else history
|
106 |
+
history[-1][1] = ""
|
107 |
+
|
108 |
+
text_to_generate = ""
|
109 |
+
text_to_generate = generate_llm_output(history[-1][0], history[:-1], llm)
|
110 |
+
|
111 |
+
history.append([None, text_to_generate])
|
112 |
+
return (history, text_to_generate)
|
113 |
+
|
114 |
+
|