syedmudassir16 commited on
Commit
3a96389
·
verified ·
1 Parent(s): 726d5a9

audio chat

Browse files
Files changed (1) hide show
  1. app.py +208 -170
app.py CHANGED
@@ -1,235 +1,273 @@
1
  from huggingface_hub import InferenceClient
 
2
  import gradio as gr
3
- import speech_recognition as sr
4
-
 
 
 
 
 
 
5
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
6
- recognizer = sr.Recognizer()
7
-
8
- def transcript_audio(audio_data):
9
- with sr.AudioFile(audio_data) as source:
10
- audio = recognizer.record(source)
11
- try:
12
- # Using Google's speech recognition service
13
- return recognizer.recognize_google(audio)
14
- except sr.UnknownValueError:
15
- return "Sorry, I could not understand the audio."
16
- except sr.RequestError as e:
17
- return "Could not request results from the speech recognition service; {0}".format(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def format_prompt(message, history):
20
- # Your fixed prompt and history logic here, unchanged
21
- if history is None:
22
- history = []
23
  fixed_prompt = """
24
- You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single response in the format "Playing [mood] playlist" where [mood] is one of the options if you have classified. Suppose you classify a sentence as happy, then respond with "Playing happy playlist".
25
 
26
- Note: Do not write anything else other than the classified playlist message if classified.
27
 
28
- Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
29
 
30
- Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
31
 
32
- Note: if user asks something like I need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
33
 
34
- Examples:
35
- User: What is C programming?
36
- LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
37
 
38
- User: Can I get a coffee?
39
- LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
40
- User: I feel like rocking
41
- LLM Response: Playing party playlist
42
 
43
- User: I'm feeling so energetic today!
44
- LLM Response: Playing happy playlist
45
 
46
- User: I'm feeling down today.
47
- LLM Response: Playing sad playlist
48
 
49
- User: I'm ready to have some fun tonight!
50
- LLM Response: Playing party playlist
51
 
52
- User: I need some background music while I am stuck in traffic.
53
- LLM Response: Playing instrumental playlist
54
 
55
- User: Hi
56
- LLM Response: Hi, how are you doing?
57
 
58
- User: Feeling okay only.
59
- LLM Response: Are you having a good day?
60
- User: I don't know
61
- LLM Response: Do you want to listen to some relaxing music?
62
- User: No
63
- LLM Response: How about listening to some rock and roll music?
64
- User: Yes
65
- LLM Response: Playing party playlist
66
 
67
- User: Where do I find an encyclopedia?
68
- LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
69
 
70
- User: I need a coffee
71
- LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
72
 
73
- User: I just got promoted at work!
74
- LLM Response: Playing happy playlist
75
 
76
- User: Today is my birthday!
77
- LLM Response: Playing happy playlist
78
 
79
- User: I won a prize in the lottery.
80
- LLM Response: Playing happy playlist
81
 
82
- User: I am so excited about my vacation next week!
83
- LLM Response: Playing happy playlist
84
 
85
- User: I aced my exams!
86
- LLM Response: Playing happy playlist
87
 
88
- User: I had a wonderful time with my family today.
89
- LLM Response: Playing happy playlist
90
 
91
- User: I just finished a great workout!
92
- LLM Response: Playing happy playlist
93
 
94
- User: I am feeling really good about myself today.
95
- LLM Response: Playing happy playlist
96
 
97
- User: I finally finished my project and it was a success!
98
- LLM Response: Playing happy playlist
99
 
100
- User: I just heard my favorite song on the radio.
101
- LLM Response: Playing happy playlist
102
 
103
- User: My pet passed away yesterday.
104
- LLM Response: Playing sad playlist
105
 
106
- User: I lost my job today.
107
- LLM Response: Playing sad playlist
108
 
109
- User: I'm feeling really lonely.
110
- LLM Response: Playing sad playlist
111
 
112
- User: I didn't get the results I wanted.
113
- LLM Response: Playing sad playlist
114
 
115
- User: I had a fight with my best friend.
116
- LLM Response: Playing sad playlist
117
 
118
- User: I'm feeling really overwhelmed with everything.
119
- LLM Response: Playing sad playlist
120
 
121
- User: I just got some bad news.
122
- LLM Response: Playing sad playlist
123
 
124
- User: I'm missing my family.
125
- LLM Response: Playing sad playlist
126
 
127
- User: I am feeling really down today.
128
- LLM Response: Playing sad playlist
129
 
130
- User: Nothing seems to be going right.
131
- LLM Response: Playing sad playlist
132
 
133
- User: I need some music while I study.
134
- LLM Response: Playing instrumental playlist
135
 
136
- User: I want to listen to something soothing while I work.
137
- LLM Response: Playing instrumental playlist
138
 
139
- User: Do you have any recommendations for background music?
140
- LLM Response: Playing instrumental playlist
141
 
142
- User: I'm looking for some relaxing tunes.
143
- LLM Response: Playing instrumental playlist
144
 
145
- User: I need some music to focus on my tasks.
146
- LLM Response: Playing instrumental playlist
147
 
148
- User: Can you suggest some ambient music for meditation?
149
- LLM Response: Playing instrumental playlist
150
 
151
- User: What's good for background music during reading?
152
- LLM Response: Playing instrumental playlist
153
 
154
- User: I need some calm music to help me sleep.
155
- LLM Response: Playing instrumental playlist
156
 
157
- User: I prefer instrumental music while cooking.
158
- LLM Response: Playing instrumental playlist
159
 
160
- User: What's the best music to play while doing yoga?
161
- LLM Response: Playing instrumental playlist
162
 
163
- User: Let's have a blast tonight!
164
- LLM Response: Playing party playlist
165
 
166
- User: I'm in the mood to dance!
167
- LLM Response: Playing party playlist
168
 
169
- User: I want to celebrate all night long!
170
- LLM Response: Playing party playlist
171
 
172
- User: Time to hit the club!
173
- LLM Response: Playing party playlist
174
 
175
- User: I feel like partying till dawn.
176
- LLM Response: Playing party playlist
177
 
178
- User: Let's get this party started!
179
- LLM Response: Playing party playlist
180
 
181
- User: I'm ready to party hard tonight.
182
- LLM Response: Playing party playlist
183
 
184
- User: I'm in the mood for some loud music and dancing!
185
- LLM Response: Playing party playlist
186
 
187
- User: Tonight's going to be epic!
188
- LLM Response: Playing party playlist
189
 
190
- User: Lets turn up the music and have some fun!
191
- LLM Response: Playing party playlist
192
- """
193
- prompt = f"<s>{fixed_prompt}"
194
  for user_prompt, bot_response in history:
195
  prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
196
  prompt += f"\nUser: {message}\nLLM Response:"
197
  return prompt
198
 
199
- def classify_mood(output):
200
- mood_words = {"happy", "sad", "instrumental", "party"}
201
- for word in mood_words:
202
- if word in output.lower():
203
- return f"Playing {word} playlist", True
204
- return output, False
205
-
206
- def generate(audio, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
207
- transcription = transcript_audio(audio)
208
- formatted_prompt = format_prompt(transcription, history)
209
-
210
- generate_kwargs = {
211
- "temperature": temperature,
212
- "max_new_tokens": max_new_tokens,
213
- "top_p": top_p,
214
- "repetition_penalty": repetition_penalty,
215
- "do_sample": True,
216
- "seed": 42,
217
- }
218
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
219
- output = ""
220
- for response in stream:
221
- output += response.token.text
222
- output, is_classified = classify_mood(output)
223
- if is_classified:
224
- return output, history + [(transcription, output)]
225
- return output, history + [(transcription, output)]
226
-
227
- demo = gr.Interface(
228
- fn=generate,
229
- inputs=[gr.Audio(sources="microphone", type="filepath"), gr.State()],
230
- outputs=["text", "state"],
231
- title="Mood-Based Music Recommender",
232
- description="Hi! I'm your mood analyser. Speak into the microphone to tell me how you're feeling or what type of music you'd like!"
233
- )
234
-
235
- demo.launch()
 
1
  from huggingface_hub import InferenceClient
2
+ from transformers import pipeline
3
  import gradio as gr
4
+ import edge_tts
5
+ import tempfile
6
+ import os
7
+ from streaming_stt_nemo import Model
8
+ import torch
9
+ import random
10
+
11
+ # Initialize the inference client with your Hugging Face token
12
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
13
+ # Initialize the ASR pipeline
14
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
15
+
16
+ def speech_to_text(speech):
17
+ """Converts speech to text using the ASR pipeline."""
18
+ # breakpoint()
19
+ return asr(speech)["text"]
20
+
21
+
22
+ def classify_mood(input_string):
23
+ """Classifies the mood based on keywords in the input string."""
24
+ input_string = input_string.lower()
25
+ mood_words = {"happy", "sad", "instrumental", "party"}
26
+ for word in mood_words:
27
+ if word in input_string:
28
+ return word, True
29
+ return None, False
30
+
31
+ def generate(
32
+ prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
33
+ ):
34
+ temperature = float(temperature)
35
+ if temperature < 1e-2:
36
+ temperature = 1e-2
37
+ top_p = float(top_p)
38
+
39
+ generate_kwargs = dict(
40
+ temperature=temperature,
41
+ max_new_tokens=max_new_tokens,
42
+ top_p=top_p,
43
+ repetition_penalty=repetition_penalty,
44
+ do_sample=True,
45
+ seed=42,
46
+ )
47
+
48
+ formatted_prompt = format_prompt(prompt, history)
49
+
50
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
51
+ output = ""
52
+
53
+ for response in stream:
54
+ output += response.token.text
55
+ mood, is_classified = classify_mood(output)
56
+ # Print the chatbot's response
57
+ if is_classified:
58
+ print("Chatbot:", mood.capitalize())
59
+ playlist_message = f"Playing {mood.capitalize()} playlist for you!"
60
+ output=playlist_message
61
+ return output
62
+ # yield output
63
+ return output
64
 
65
  def format_prompt(message, history):
66
+ """Formats the prompt including fixed instructions and conversation history."""
 
 
67
  fixed_prompt = """
68
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
69
 
70
+ Note: Do not write anything else other than the classified mood if classified.
71
 
72
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
73
 
74
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
75
 
76
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
77
 
78
+ Examples
79
+ User: What is C programming?
80
+ LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
81
 
82
+ User: Can I get a coffee?
83
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
84
+ User: I feel like rocking
85
+ LLM Response: Party
86
 
87
+ User: I'm feeling so energetic today!
88
+ LLM Response: Happy
89
 
90
+ User: I'm feeling down today.
91
+ LLM Response: Sad
92
 
93
+ User: I'm ready to have some fun tonight!
94
+ LLM Response: Party
95
 
96
+ User: I need some background music while I am stuck in traffic.
97
+ LLM Response: Instrumental
98
 
99
+ User: Hi
100
+ LLM Response: Hi, how are you doing?
101
 
102
+ User: Feeling okay only.
103
+ LLM Response: Are you having a good day?
104
+ User: I don't know
105
+ LLM Response: Do you want to listen to some relaxing music?
106
+ User: No
107
+ LLM Response: How about listening to some rock and roll music?
108
+ User: Yes
109
+ LLM Response: Party
110
 
111
+ User: Where do I find an encyclopedia?
112
+ LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
113
 
114
+ User: I need a coffee
115
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
116
 
117
+ User: I just got promoted at work!
118
+ LLM Response: Happy
119
 
120
+ User: Today is my birthday!
121
+ LLM Response: Happy
122
 
123
+ User: I won a prize in the lottery.
124
+ LLM Response: Happy
125
 
126
+ User: I am so excited about my vacation next week!
127
+ LLM Response: Happy
128
 
129
+ User: I aced my exams!
130
+ LLM Response: Happy
131
 
132
+ User: I had a wonderful time with my family today.
133
+ LLM Response: Happy
134
 
135
+ User: I just finished a great workout!
136
+ LLM Response: Happy
137
 
138
+ User: I am feeling really good about myself today.
139
+ LLM Response: Happy
140
 
141
+ User: I finally finished my project and it was a success!
142
+ LLM Response: Happy
143
 
144
+ User: I just heard my favorite song on the radio.
145
+ LLM Response: Happy
146
 
147
+ User: My pet passed away yesterday.
148
+ LLM Response: Sad
149
 
150
+ User: I lost my job today.
151
+ LLM Response: Sad
152
 
153
+ User: I'm feeling really lonely.
154
+ LLM Response: Sad
155
 
156
+ User: I didn't get the results I wanted.
157
+ LLM Response: Sad
158
 
159
+ User: I had a fight with my best friend.
160
+ LLM Response: Sad
161
 
162
+ User: I'm feeling really overwhelmed with everything.
163
+ LLM Response: Sad
164
 
165
+ User: I just got some bad news.
166
+ LLM Response: Sad
167
 
168
+ User: I'm missing my family.
169
+ LLM Response: Sad
170
 
171
+ User: I am feeling really down today.
172
+ LLM Response: Sad
173
 
174
+ User: Nothing seems to be going right.
175
+ LLM Response: Sad
176
 
177
+ User: I need some music while I study.
178
+ LLM Response: Instrumental
179
 
180
+ User: I want to listen to something soothing while I work.
181
+ LLM Response: Instrumental
182
 
183
+ User: Do you have any recommendations for background music?
184
+ LLM Response: Instrumental
185
 
186
+ User: I'm looking for some relaxing tunes.
187
+ LLM Response: Instrumental
188
 
189
+ User: I need some music to focus on my tasks.
190
+ LLM Response: Instrumental
191
 
192
+ User: Can you suggest some ambient music for meditation?
193
+ LLM Response: Instrumental
194
 
195
+ User: What's good for background music during reading?
196
+ LLM Response: Instrumental
197
 
198
+ User: I need some calm music to help me sleep.
199
+ LLM Response: Instrumental
200
 
201
+ User: I prefer instrumental music while cooking.
202
+ LLM Response: Instrumental
203
 
204
+ User: What's the best music to play while doing yoga?
205
+ LLM Response: Instrumental
206
 
207
+ User: Let's have a blast tonight!
208
+ LLM Response: Party
209
 
210
+ User: I'm in the mood to dance!
211
+ LLM Response: Party
212
 
213
+ User: I want to celebrate all night long!
214
+ LLM Response: Party
215
 
216
+ User: Time to hit the club!
217
+ LLM Response: Party
218
 
219
+ User: I feel like partying till dawn.
220
+ LLM Response: Party
221
 
222
+ User: Let's get this party started!
223
+ LLM Response: Party
224
 
225
+ User: I'm ready to party hard tonight.
226
+ LLM Response: Party
227
 
228
+ User: I'm in the mood for some loud music and dancing!
229
+ LLM Response: Party
230
 
231
+ User: Tonight's going to be epic!
232
+ LLM Response: Party
233
 
234
+ User: Lets turn up the music and have some fun!
235
+ LLM Response: Party
236
+ """ # Include your fixed prompt and instructions here
237
+ prompt = f"{fixed_prompt}"
238
  for user_prompt, bot_response in history:
239
  prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
240
  prompt += f"\nUser: {message}\nLLM Response:"
241
  return prompt
242
 
243
+ async def process_speech(speech_file):
244
+ """Processes speech input to text and then calls generate."""
245
+ text = speech_to_text(speech_file)
246
+ reply = generate(text, history="")
247
+ communicate = edge_tts.Communicate(reply)
248
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
249
+ tmp_path = tmp_file.name
250
+ await communicate.save(tmp_path)
251
+ yield tmp_path
252
+ DESCRIPTION = """ # <center><b>Mood-Based Music Recommender⚡</b></center>
253
+ ### <center>Hi! I'm a music recommender app.
254
+ ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
255
+ """
256
+ # Gradio interface setup
257
+ with gr.Blocks(css="style.css") as demo:
258
+ gr.Markdown(DESCRIPTION)
259
+ with gr.Row():
260
+ input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
261
+ output = gr.Audio(label="AI", type="filepath",
262
+ interactive=False,
263
+ autoplay=True,
264
+ elem_classes="audio")
265
+ gr.Interface(
266
+ batch=True,
267
+ max_batch_size=10,
268
+ fn=process_speech,
269
+ inputs=[input],
270
+ outputs=[output], live=True)
271
+
272
+ if __name__ == "__main__":
273
+ demo.queue(max_size=200).launch()