Spaces:

Prathamesh1420
/

LLM_with_docker

Sleeping

App Files Files Community

Prathamesh1420 commited on Dec 13, 2024

Commit

321e683

verified ·

1 Parent(s): 8ab4f78

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -251

app.py CHANGED Viewed

@@ -1,252 +1,129 @@
-import pyttsx3
-import speech_recognition as sr
-from playsound import playsound
-import random
-import datetime
-hour = datetime.datetime.now().strftime('%H:%M')
-#print(hour)
-date = datetime.date.today().strftime('%d/%B/%Y')
-#print(date)
-date = date.split('/')
-#print(date)
-import webbrowser as wb
-import tensorflow as tf
-import numpy as np
-import librosa
-import matplotlib.pyplot as plt
-import seaborn as sns
-sns.set()
-from modules import commands_answers, load_agenda
-commands = commands_answers.commands
-answers = commands_answers.answers
-#print(commands)
-#print(answers)
-my_name = 'Bob'
-# MacOS
-chrome_path = 'open -a /Applications/Google\ Chrome.app %s'
-# Windows
-#chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'
-# Linux
-# chrome_path = '/usr/bin/google-chrome %s'
-def search(sentence):
-    wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)
-#search('python programming language')
-MODEL_TYPES = ['EMOTION']
-def load_model_by_name(model_type):
-    if model_type == MODEL_TYPES[0]:
-        model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
-        model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
-        SAMPLE_RATE = 48000
-    return model, model_dict, SAMPLE_RATE
-#print(load_model_by_name('EMOTION'))
-#print(load_model_by_name('EMOTION')[0].summary())
-model_type = 'EMOTION'
-loaded_model = load_model_by_name(model_type)
-def predict_sound(AUDIO, SAMPLE_RATE, plot = True):
-    results = []
-    wav_data, sample_rate = librosa.load(AUDIO, sr = SAMPLE_RATE)
-    #print(wav_data.shape)
-    #print(sample_rate)
-    #print(wav_data)
-    # ' librosa ' -> 'librosa'
-    # https://librosa.org/doc/main/generated/librosa.effects.trim.html
-    clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
-    splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end = True, pad_value = 0)
-    for i, data in enumerate(splitted_audio_data.numpy()):
-        #print('Audio split: ', i)
-        #print(data.shape)
-        #print(data)
-        # Mel frequency: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum
-        # PCA
-        mfccs_features = librosa.feature.mfcc(y = data, sr = sample_rate, n_mfcc=40)
-        #print(mfccs_features.shape)
-        #print(mfccs_features)
-        mfccs_scaled_features = np.mean(mfccs_features.T, axis = 0)
-        mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)
-        #print(mfccs_scaled_features.shape)
-        mfccs_scaled_features = mfccs_scaled_features[:, :, np.newaxis]
-        # batch
-        #print(mfccs_scaled_features.shape)
-        predictions = loaded_model[0].predict(mfccs_scaled_features)
-        #print(predictions)
-        #print(predictions.sum())
-        if plot:
-            plt.figure(figsize=(len(splitted_audio_data), 5))
-            plt.barh(loaded_model[1], predictions[0])
-            plt.tight_layout()
-            plt.show()
-        predictions = predictions.argmax(axis = 1)
-        #print(predictions)
-        predictions = predictions.astype(int).flatten()
-        predictions = loaded_model[1][predictions[0]]
-        results.append(predictions)
-        #print(results)
-        result_str = 'PART ' + str(i) + ': ' + str(predictions).upper()
-        #print(result_str)
-    count_results = [[results.count(x), x] for x in set(results)]
-    #print(count_results)
-    #print(max(count_results))
-    return max(count_results)
-#playsound('sad.wav')
-#predict_sound('sad.wav', loaded_model[2], plot=False)
-def play_music_youtube(emotion):
-    play = False
-    if emotion == 'sad' or emotion == 'fear':
-        wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
-        play = True
-    if emotion == 'nervous' or emotion == 'surprise':
-        wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
-        play = True
-    return play
-#play_music_youtube('sad')
-#play_music_youtube('surprise')
-#emotion = predict_sound('sad.wav', loaded_model[2], plot=False)
-#print(emotion)
-#play_music_youtube(emotion[1])
-def speak(text):
-    engine = pyttsx3.init()
-    engine.setProperty('rate', 90) # number of words per second
-    engine.setProperty('volume', 1) # min: 0, max: 1
-    engine.say(text)
-    engine.runAndWait()
-#speak("Testing the Assistant's Speech Synthesizer")
-def listen_microphone():
-    microphone = sr.Recognizer()
-    with sr.Microphone() as source:
-        microphone.adjust_for_ambient_noise(source, duration=0.8)
-        print('Listening: ')
-        audio = microphone.listen(source)
-        with open('recordings/speech.wav', 'wb') as f:
-            f.write(audio.get_wav_data())
     try:
-        # https://pypi.org/project/SpeechRecognition/
-        sentence = microphone.recognize_google(audio, language='en-US')
-        print('You said: ' + sentence)
-    except sr.UnknownValueError:
-        sentence = ''
-        print('Not understood')
-    return sentence
-#playsound('recordings/speech.wav')
-#listen_microphone()
-def test_models():
-    audio_source = '/Users/jonesgranatyr/Documents/Ensino/IA Expert/Cursos/Virtual assistent/virtual_assistant/recordings/speech.wav'
-    prediction = predict_sound(audio_source, loaded_model[2], plot = False)
-    return prediction
-#print(test_models())
-playing = False
-mode_control = False
-print('[INFO] Ready to start!')
-playsound('n1.mp3')
-while (1):
-    result = listen_microphone()
-    if my_name in result:
-        result = str(result.split(my_name + ' ')[1])
-        result = result.lower()
-        #print('The assistant has been activacted!')
-        #print('After processing: ', result)
-        if result in commands[0]:
-            playsound('n2.mp3')
-            speak('I will read my list of functionalities: ' + answers[0])
-        if result in commands[3]:
-            playsound('n2.mp3')
-            speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))
-        if result in commands[4]:
-            playsound('n2.mp3')
-            speak('Today is ' + date[0] + ' of ' + date[1])
-        if result in commands[1]:
-            playsound('n2.mp3')
-            speak('Please, tell me the activity!')
-            result = listen_microphone()
-            annotation = open('annotation.txt', mode='a+', encoding='utf-8')
-            annotation.write(result + '\n')
-            annotation.close()
-            speak(''.join(random.sample(answers[1], k = 1)))
-            speak('Want me to read the notes?')
-            result = listen_microphone()
-            if result == 'yes' or result == 'sure':
-                with open('annotation.txt') as file_source:
-                    lines = file_source.readlines()
-                    for line in lines:
-                        speak(line)
-            else:
-                speak('Ok!')
-        if result in commands[2]:
-            playsound('n2.mp3')
-            speak(''.join(random.sample(answers[2], k = 1)))
-            result = listen_microphone()
-            search(result)
-        if result in commands[6]:
-            playsound('n2.mp3')
-            if load_agenda.load_agenda():
-                speak('These are the events for today:')
-                for i in range(len(load_agenda.load_agenda()[1])):
-                    speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
-            else:
-                speak('There are not events for today considering the current time!')
-        if result in commands[5]:
-            mode_control = True
-            playsound('n1.mp3')
-            speak('Emotion analysis mode has been activacted!')
-        if mode_control:
-            analyse = test_models()
-            print(f'I heard {analyse} in your voice!')
-            if not playing:
-                playing = play_music_youtube(analyse[1])
-        if result == 'turn off':
-            playsound('n2.mp3')
-            speak(''.join(random.sample(answers[4], k = 1)))
-            break
-    else:
-        playsound('n3.mp3')

+import os
+import gradio as gr
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
+from llama_index.embeddings.mixedbreadai import MixedbreadAIEmbedding
+from llama_index.llms.groq import Groq
+from llama_parse import LlamaParse
+# API keys
+llama_cloud_key = os.environ.get("LLAMA_CLOUD_API_KEY")
+groq_key = os.environ.get("GROQ_API_KEY")
+mxbai_key = os.environ.get("MXBAI_API_KEY")
+if not (llama_cloud_key and groq_key and mxbai_key):
+    raise ValueError(
+        "API Keys not found! Ensure they are passed to the Docker container."
+    )
+# models name
+llm_model_name = "llama-3.1-70b-versatile"
+embed_model_name = "mixedbread-ai/mxbai-embed-large-v1"
+# Initialize the parser
+parser = LlamaParse(api_key=llama_cloud_key, result_type="markdown")
+# Define file extractor with various common extensions
+file_extractor = {
+    ".pdf": parser,
+    ".docx": parser,
+    ".doc": parser,
+    ".txt": parser,
+    ".csv": parser,
+    ".xlsx": parser,
+    ".pptx": parser,
+    ".html": parser,
+    ".jpg": parser,
+    ".jpeg": parser,
+    ".png": parser,
+    ".webp": parser,
+    ".svg": parser,
+}
+# Initialize the embedding model
+embed_model = MixedbreadAIEmbedding(api_key=mxbai_key, model_name=embed_model_name)
+# Initialize the LLM
+llm = Groq(model="llama-3.1-70b-versatile", api_key=groq_key)
+# File processing function
+def load_files(file_path: str):
+    global vector_index
+    if not file_path:
+        return "No file path provided. Please upload a file."
+    valid_extensions = ', '.join(file_extractor.keys())
+    if not any(file_path.endswith(ext) for ext in file_extractor):
+        return f"The parser can only parse the following file types: {valid_extensions}"
+    document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
+    vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
+    print(f"Parsing completed for: {file_path}")
+    filename = os.path.basename(file_path)
+    return f"Ready to provide responses based on: {filename}"
+# Respond function
+def respond(message, history):
     try:
+        # Use the preloaded LLM
+        query_engine = vector_index.as_query_engine(streaming=True, llm=llm)
+        streaming_response = query_engine.query(message)
+        partial_text = ""
+        for new_text in streaming_response.response_gen:
+            partial_text += new_text
+            # Yield an empty string to cleanup the message textbox and the updated conversation history
+            yield partial_text
+    except (AttributeError, NameError):
+        print("An error occurred while processing your request.")
+        yield "Please upload the file to begin chat."
+# Clear function
+def clear_state():
+    global vector_index
+    vector_index = None
+    return [None, None, None]
+# UI Setup
+with gr.Blocks(
+    theme=gr.themes.Default(
+        primary_hue="green",
+        secondary_hue="blue",
+        font=[gr.themes.GoogleFont("Poppins")],
+    ),
+    css="footer {visibility: hidden}",
+) as demo:
+    gr.Markdown("# DataCamp Doc Q&A 🤖📃")
+    with gr.Row():
+        with gr.Column(scale=1):
+            file_input = gr.File(
+                file_count="single", type="filepath", label="Upload Document"
+            )
+            with gr.Row():
+                btn = gr.Button("Submit", variant="primary")
+                clear = gr.Button("Clear")
+            output = gr.Textbox(label="Status")
+        with gr.Column(scale=3):
+            chatbot = gr.ChatInterface(
+                fn=respond,
+                chatbot=gr.Chatbot(height=300),
+                theme="soft",
+                show_progress="full",
+                textbox=gr.Textbox(
+                    placeholder="Ask questions about the uploaded document!",
+                    container=False,
+                ),
+            )
+    # Set up Gradio interactions
+    btn.click(fn=load_files, inputs=file_input, outputs=output)
+    clear.click(
+        fn=clear_state,  # Use the clear_state function
+        outputs=[file_input, output],
+    )
+# Launch the demo
+if __name__ == "__main__":
+    demo.launch()