John Langley commited on
Commit
d405851
1 Parent(s): e0145dc

Initial Checkin

Browse files
Files changed (3) hide show
  1. app.py +113 -0
  2. requirements.txt +8 -0
  3. utils.py +114 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import edge_tts
3
+ import asyncio
4
+
5
+ from huggingface_hub import hf_hub_download
6
+ from llama_cpp import Llama
7
+ from faster_whisper import WhisperModel
8
+
9
+ from utils import get_sentence, tts_interface
10
+
11
+ # The device to load the model onto.
12
+ #
13
+ # Available device types:
14
+ # "cuda" - NVIDIA GPU
15
+ # "cpu" - Plain CPU
16
+ # "mps" - Apple silicon
17
+ device = "cpu"
18
+
19
+ # Load Mistral LLM
20
+ print("Loading Mistral LLM")
21
+ hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", local_dir=".", filename="mistral-7b-instruct-v0.1.Q5_K_M.gguf")
22
+ mistral_model_path="./mistral-7b-instruct-v0.1.Q5_K_M.gguf"
23
+ mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=False)
24
+
25
+ # Load Whisper ASR model
26
+ print("Loading Whisper ASR")
27
+ whisper_model = WhisperModel("large-v3", device="cpu", compute_type="float32")
28
+
29
+
30
+ # Get all available voices from edge_tts
31
+ async def get_voices():
32
+ voices = await edge_tts.list_voices()
33
+ return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
34
+
35
+
36
+ # Will be triggered on text submit (updates the chat interface and sends the request to the LLM for a response)
37
+ def add_text(chatbot_history, text):
38
+ chatbot_history = [] if chatbot_history is None else chatbot_history
39
+ chatbot_history = chatbot_history + [(text, None)]
40
+ return chatbot_history, gr.update(value="", interactive=True)
41
+
42
+
43
+ # Will be triggered on voice submit (will transribe and send to generate_speech)
44
+ def add_audio(chatbot_history, audio):
45
+ chatbot_history = [] if chatbot_history is None else chatbot_history
46
+ # get result from whisper and strip it to delete begin and end space
47
+ response, _ = whisper_model.transcribe(audio)
48
+ text = list(response)[0].text.strip()
49
+ print("Transcribed text:", text)
50
+ chatbot_history = chatbot_history + [(text, None)]
51
+ return chatbot_history, gr.update(value="", interactive=True)
52
+
53
+
54
+ #Gets a reponse from the LLM and creates an audio clip using a TTS Model
55
+ def respond(chat_history, voice):
56
+ if not voice:
57
+ return None, gr.Warning("Please select a voice.")
58
+
59
+ history, response = get_sentence(chat_history, mistral_llm)
60
+ return history, response
61
+
62
+
63
+ #Gradio Interface
64
+ async def create_demo():
65
+
66
+ voices = await get_voices()
67
+
68
+ #Interface Code
69
+ with gr.Blocks(title="Chat with LLM - POC") as demo:
70
+
71
+ DESCRIPTION = """# Chat with LLM - POC"""
72
+ gr.Markdown(DESCRIPTION)
73
+
74
+ with gr.Row():
75
+
76
+ with gr.Column(scale=1, min_width=300):
77
+ user_msg = gr.Textbox(placeholder="Enter text here or speak into your microphone")
78
+ audio_record = gr.Audio(sources=["microphone"], type="filepath", scale=4)
79
+ ai_response = gr.Label(show_label=True)
80
+ submit_button = gr.Button("Submit")
81
+ speech_button = gr.Button("Test Speech")
82
+ audio_playback = gr.Audio(
83
+ value=None,
84
+ label="Generated audio response",
85
+ streaming=True,
86
+ autoplay=True,interactive=False,
87
+ show_label=True,
88
+ )
89
+
90
+ with gr.Column(scale=1, min_width=300):
91
+ voice = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
92
+ # Define chatbot component
93
+ chatbot = gr.Chatbot(
94
+ value=[(None, "Hi, I'm an AI training assistant. Let's get going, how should we start?")], # Initial greeting from the chatbot
95
+ elem_id="chatbot",
96
+ bubble_full_width=False,
97
+ )
98
+
99
+ speech_button.click(fn=tts_interface, inputs=[user_msg, voice], outputs=[audio_playback])
100
+
101
+ audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, user_msg], queue=False
102
+ ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
103
+
104
+ submit_button.click(fn=add_text, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg], queue=False
105
+ ).then(fn=respond, inputs=[chatbot, voice], outputs=[chatbot, ai_response]).then(fn=tts_interface, inputs=[ai_response, voice], outputs=[audio_playback])
106
+
107
+
108
+ return demo
109
+
110
+
111
+ # Run the application
112
+ demo = asyncio.run(create_demo())
113
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.24.5
2
+ TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
3
+ gradio_client
4
+ asyncio
5
+ faster-whisper==1.0.1
6
+ edge-tts==6.1.12
7
+ nltk==3.8.1
8
+ llama_cpp_python==0.2.88
utils.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nltk
3
+ import edge_tts
4
+ import tempfile
5
+ import asyncio
6
+
7
+ # Download the 'punkt' tokenizer for the NLTK library
8
+ nltk.download("punkt")
9
+
10
+ def format_prompt(message, history):
11
+ system_message = f"""
12
+ You are an empathetic, insightful, and supportive training coach who helps people deal with challenges and celebrate achievements.
13
+ You help people feel better by asking questions to reflect on and evoke feelings of positivity, gratitude, joy, and love.
14
+ You show radical candor and tough love.
15
+ Respond in a casual and friendly tone.
16
+ Sprinkle in filler words, contractions, idioms, and other casual speech that we use in conversation.
17
+ Emulate the user’s speaking style and be concise in your response.
18
+ """
19
+ prompt = (
20
+ "<s>[INST]" + system_message + "[/INST]"
21
+ )
22
+ for user_prompt, bot_response in history:
23
+ if user_prompt is not None:
24
+ prompt += f"[INST] {user_prompt} [/INST]"
25
+
26
+ prompt += f" {bot_response}</s> "
27
+
28
+ if message=="":
29
+ message="Hello"
30
+ prompt += f"[INST] {message} [/INST]"
31
+ return prompt
32
+
33
+
34
+ def generate_llm_output(
35
+ prompt,
36
+ history,
37
+ llm,
38
+ temperature=0.8,
39
+ max_tokens=256,
40
+ top_p=0.95,
41
+ stop_words=["<s>","[/INST]", "</s>"]
42
+ ):
43
+ temperature = float(temperature)
44
+ if temperature < 1e-2:
45
+ temperature = 1e-2
46
+ top_p = float(top_p)
47
+
48
+ generate_kwargs = dict(
49
+ temperature=temperature,
50
+ max_tokens=max_tokens,
51
+ top_p=top_p,
52
+ stop=stop_words
53
+ )
54
+ formatted_prompt = format_prompt(prompt, history)
55
+ try:
56
+ print("LLM Input:", formatted_prompt)
57
+ # Local GGUF
58
+ output = ""
59
+ stream = llm(
60
+ formatted_prompt,
61
+ **generate_kwargs,
62
+ stream=True,
63
+ )
64
+ for r in stream:
65
+ print(r["choices"][0]["text"])
66
+ character = r["choices"][0]["text"]
67
+ if character in stop_words:
68
+ # end of context
69
+ return
70
+
71
+ output += r["choices"][0]["text"]
72
+
73
+
74
+ except Exception as e:
75
+ print("Unhandled Exception: ", str(e))
76
+ gr.Warning("Unfortunately Mistral is unable to process")
77
+ output = "I do not know what happened but I could not understand you ."
78
+ return output
79
+
80
+
81
+ # tts interface function
82
+ def tts_interface(text, voice):
83
+ audio = asyncio.run(text_to_speech(text, voice))
84
+ return audio
85
+
86
+
87
+ # Text-to-speech function
88
+ async def text_to_speech(text, voice):
89
+ rate = 10
90
+ pitch = 10
91
+ rate_str = f"{rate:+d}%"
92
+ pitch_str = f"{pitch:+d}Hz"
93
+
94
+ voice_short_name = voice.split(" - ")[0]
95
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
96
+
97
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
98
+ tmp_path = tmp_file.name
99
+ await communicate.save(tmp_path)
100
+ return tmp_path
101
+
102
+
103
+
104
+ def get_sentence(history, llm):
105
+ history = [["", None]] if history is None else history
106
+ history[-1][1] = ""
107
+
108
+ text_to_generate = ""
109
+ text_to_generate = generate_llm_output(history[-1][0], history[:-1], llm)
110
+
111
+ history.append([None, text_to_generate])
112
+ return (history, text_to_generate)
113
+
114
+