Prathamesh1420 commited on
Commit
321e683
·
verified ·
1 Parent(s): 8ab4f78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -251
app.py CHANGED
@@ -1,252 +1,129 @@
1
- import pyttsx3
2
- import speech_recognition as sr
3
- from playsound import playsound
4
- import random
5
- import datetime
6
- hour = datetime.datetime.now().strftime('%H:%M')
7
- #print(hour)
8
- date = datetime.date.today().strftime('%d/%B/%Y')
9
- #print(date)
10
- date = date.split('/')
11
- #print(date)
12
- import webbrowser as wb
13
- import tensorflow as tf
14
- import numpy as np
15
- import librosa
16
- import matplotlib.pyplot as plt
17
- import seaborn as sns
18
- sns.set()
19
- from modules import commands_answers, load_agenda
20
- commands = commands_answers.commands
21
- answers = commands_answers.answers
22
- #print(commands)
23
- #print(answers)
24
-
25
- my_name = 'Bob'
26
-
27
- # MacOS
28
- chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
29
- # Windows
30
- #chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
31
- # Linux
32
- # chrome_path = '/usr/bin/google-chrome %s'
33
-
34
- def search(sentence):
35
- wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
36
-
37
- #search('python programming language')
38
-
39
- MODEL_TYPES = ['EMOTION']
40
- def load_model_by_name(model_type):
41
- if model_type == MODEL_TYPES[0]:
42
- model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
43
- model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
44
- SAMPLE_RATE = 48000
45
- return model, model_dict, SAMPLE_RATE
46
-
47
- #print(load_model_by_name('EMOTION'))
48
- #print(load_model_by_name('EMOTION')[0].summary())
49
-
50
- model_type = 'EMOTION'
51
- loaded_model = load_model_by_name(model_type)
52
-
53
- def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
54
- results = []
55
- wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
56
- #print(wav_data.shape)
57
- #print(sample_rate)
58
- #print(wav_data)
59
- # ' librosa ' -> 'librosa'
60
- # https://librosa.org/doc/main/generated/librosa.effects.trim.html
61
- clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
62
- splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
63
- for i, data in enumerate(splitted_audio_data.numpy()):
64
- #print('Audio split: ', i)
65
- #print(data.shape)
66
- #print(data)
67
- # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
68
- # PCA
69
- mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
70
- #print(mfccs_features.shape)
71
- #print(mfccs_features)
72
- mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
73
- mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
74
- #print(mfccs_scaled_features.shape)
75
- mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
76
- # batch
77
- #print(mfccs_scaled_features.shape)
78
- predictions = loaded_model[0].predict(mfccs_scaled_features)
79
- #print(predictions)
80
- #print(predictions.sum())
81
- if plot:
82
- plt.figure(figsize=(len(splitted_audio_data), 5))
83
- plt.barh(loaded_model[1], predictions[0])
84
- plt.tight_layout()
85
- plt.show()
86
-
87
- predictions = predictions.argmax(axis = 1)
88
- #print(predictions)
89
- predictions = predictions.astype(int).flatten()
90
- predictions = loaded_model[1][predictions[0]]
91
- results.append(predictions)
92
- #print(results)
93
-
94
- result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
95
- #print(result_str)
96
-
97
- count_results = [[results.count(x), x] for x in set(results)]
98
- #print(count_results)
99
-
100
- #print(max(count_results))
101
- return max(count_results)
102
-
103
- #playsound('sad.wav')
104
- #predict_sound('sad.wav', loaded_model[2], plot=False)
105
-
106
- def play_music_youtube(emotion):
107
- play = False
108
- if emotion == 'sad' or emotion == 'fear':
109
- wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
110
- play = True
111
- if emotion == 'nervous' or emotion == 'surprise':
112
- wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
113
- play = True
114
- return play
115
-
116
- #play_music_youtube('sad')
117
- #play_music_youtube('surprise')
118
- #emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
119
- #print(emotion)
120
- #play_music_youtube(emotion[1])
121
-
122
- def speak(text):
123
- engine = pyttsx3.init()
124
- engine.setProperty('rate', 90) # number of words per second
125
- engine.setProperty('volume', 1) # min: 0, max: 1
126
- engine.say(text)
127
- engine.runAndWait()
128
-
129
- #speak("Testing the Assistant's Speech Synthesizer")
130
-
131
- def listen_microphone():
132
- microphone = sr.Recognizer()
133
- with sr.Microphone() as source:
134
- microphone.adjust_for_ambient_noise(source, duration=0.8)
135
- print('Listening: ')
136
- audio = microphone.listen(source)
137
- with open('recordings/speech.wav', 'wb') as f:
138
- f.write(audio.get_wav_data())
139
  try:
140
- # https://pypi.org/project/SpeechRecognition/
141
- sentence = microphone.recognize_google(audio, language='en-US')
142
- print('You said: ' + sentence)
143
- except sr.UnknownValueError:
144
- sentence = ''
145
- print('Not understood')
146
- return sentence
147
-
148
- #playsound('recordings/speech.wav')
149
- #listen_microphone()
150
-
151
- def test_models():
152
- audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
153
- prediction = predict_sound(audio_source, loaded_model[2], plot = False)
154
- return prediction
155
-
156
- #print(test_models())
157
-
158
- playing = False
159
- mode_control = False
160
- print('[INFO] Ready to start!')
161
- playsound('n1.mp3')
162
-
163
- while (1):
164
- result = listen_microphone()
165
-
166
- if my_name in result:
167
- result = str(result.split(my_name + ' ')[1])
168
- result = result.lower()
169
- #print('The assistant has been activacted!')
170
- #print('After processing: ', result)
171
-
172
- if result in commands[0]:
173
- playsound('n2.mp3')
174
- speak('I will read my list of functionalities: ' + answers[0])
175
-
176
- if result in commands[3]:
177
- playsound('n2.mp3')
178
- speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
179
-
180
- if result in commands[4]:
181
- playsound('n2.mp3')
182
- speak('Today is ' + date[0] + ' of ' + date[1])
183
-
184
- if result in commands[1]:
185
- playsound('n2.mp3')
186
- speak('Please, tell me the activity!')
187
- result = listen_microphone()
188
- annotation = open('annotation.txt', mode='a+', encoding='utf-8')
189
- annotation.write(result + '\n')
190
- annotation.close()
191
- speak(''.join(random.sample(answers[1], k = 1)))
192
- speak('Want me to read the notes?')
193
- result = listen_microphone()
194
- if result == 'yes' or result == 'sure':
195
- with open('annotation.txt') as file_source:
196
- lines = file_source.readlines()
197
- for line in lines:
198
- speak(line)
199
- else:
200
- speak('Ok!')
201
-
202
- if result in commands[2]:
203
- playsound('n2.mp3')
204
- speak(''.join(random.sample(answers[2], k = 1)))
205
- result = listen_microphone()
206
- search(result)
207
-
208
- if result in commands[6]:
209
- playsound('n2.mp3')
210
- if load_agenda.load_agenda():
211
- speak('These are the events for today:')
212
- for i in range(len(load_agenda.load_agenda()[1])):
213
- speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
214
- else:
215
- speak('There are not events for today considering the current time!')
216
-
217
- if result in commands[5]:
218
- mode_control = True
219
- playsound('n1.mp3')
220
- speak('Emotion analysis mode has been activacted!')
221
-
222
- if mode_control:
223
- analyse = test_models()
224
- print(f'I heard {analyse} in your voice!')
225
- if not playing:
226
- playing = play_music_youtube(analyse[1])
227
-
228
- if result == 'turn off':
229
- playsound('n2.mp3')
230
- speak(''.join(random.sample(answers[4], k = 1)))
231
- break
232
- else:
233
- playsound('n3.mp3')
234
-
235
-
236
-
237
-
238
-
239
-
240
-
241
-
242
-
243
-
244
-
245
-
246
-
247
-
248
-
249
-
250
-
251
-
252
-
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
5
+ from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
6
+ from llama_index.llms.groq import Groq
7
+ from llama_parse import LlamaParse
8
+
9
+ # API keys
10
+ llama_cloud_key = os.environ.get("LLAMA_CLOUD_API_KEY")
11
+ groq_key = os.environ.get("GROQ_API_KEY")
12
+ mxbai_key = os.environ.get("MXBAI_API_KEY")
13
+ if not (llama_cloud_key and groq_key and mxbai_key):
14
+ raise ValueError(
15
+ "API Keys not found! Ensure they are passed to the Docker container."
16
+ )
17
+
18
+ # models name
19
+ llm_model_name = "llama-3.1-70b-versatile"
20
+ embed_model_name = "mixedbread-ai/mxbai-embed-large-v1"
21
+
22
+ # Initialize the parser
23
+ parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown")
24
+
25
+ # Define file extractor with various common extensions
26
+ file_extractor = {
27
+ ".pdf": parser,
28
+ ".docx": parser,
29
+ ".doc": parser,
30
+ ".txt": parser,
31
+ ".csv": parser,
32
+ ".xlsx": parser,
33
+ ".pptx": parser,
34
+ ".html": parser,
35
+ ".jpg": parser,
36
+ ".jpeg": parser,
37
+ ".png": parser,
38
+ ".webp": parser,
39
+ ".svg": parser,
40
+ }
41
+
42
+ # Initialize the embedding model
43
+ embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=embed_model_name)
44
+
45
+ # Initialize the LLM
46
+ llm = Groq(model="llama-3.1-70b-versatile", api_key=groq_key)
47
+
48
+
49
+ # File processing function
50
+ def load_files(file_path: str):
51
+ global vector_index
52
+ if not file_path:
53
+ return "No file path provided. Please upload a file."
54
+
55
+ valid_extensions = ', '.join(file_extractor.keys())
56
+ if not any(file_path.endswith(ext) for ext in file_extractor):
57
+ return f"The parser can only parse the following file types: {valid_extensions}"
58
+
59
+ document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
60
+ vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
61
+ print(f"Parsing completed for: {file_path}")
62
+ filename = os.path.basename(file_path)
63
+ return f"Ready to provide responses based on: {filename}"
64
+
65
+
66
+ # Respond function
67
+ def respond(message, history):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  try:
69
+ # Use the preloaded LLM
70
+ query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
71
+ streaming_response = query_engine.query(message)
72
+ partial_text = ""
73
+ for new_text in streaming_response.response_gen:
74
+ partial_text += new_text
75
+ # Yield an empty string to cleanup the message textbox and the updated conversation history
76
+ yield partial_text
77
+ except (AttributeError, NameError):
78
+ print("An error occurred while processing your request.")
79
+ yield "Please upload the file to begin chat."
80
+
81
+
82
+ # Clear function
83
+ def clear_state():
84
+ global vector_index
85
+ vector_index = None
86
+ return [None, None, None]
87
+
88
+
89
+ # UI Setup
90
+ with gr.Blocks(
91
+ theme=gr.themes.Default(
92
+ primary_hue="green",
93
+ secondary_hue="blue",
94
+ font=[gr.themes.GoogleFont("Poppins")],
95
+ ),
96
+ css="footer {visibility: hidden}",
97
+ ) as demo:
98
+ gr.Markdown("# DataCamp Doc Q&A 🤖📃")
99
+ with gr.Row():
100
+ with gr.Column(scale=1):
101
+ file_input = gr.File(
102
+ file_count="single", type="filepath", label="Upload Document"
103
+ )
104
+ with gr.Row():
105
+ btn = gr.Button("Submit", variant="primary")
106
+ clear = gr.Button("Clear")
107
+ output = gr.Textbox(label="Status")
108
+ with gr.Column(scale=3):
109
+ chatbot = gr.ChatInterface(
110
+ fn=respond,
111
+ chatbot=gr.Chatbot(height=300),
112
+ theme="soft",
113
+ show_progress="full",
114
+ textbox=gr.Textbox(
115
+ placeholder="Ask questions about the uploaded document!",
116
+ container=False,
117
+ ),
118
+ )
119
+
120
+ # Set up Gradio interactions
121
+ btn.click(fn=load_files, inputs=file_input, outputs=output)
122
+ clear.click(
123
+ fn=clear_state, # Use the clear_state function
124
+ outputs=[file_input, output],
125
+ )
126
+
127
+ # Launch the demo
128
+ if __name__ == "__main__":
129
+ demo.launch()