File size: 2,512 Bytes

import speech_recognition as sr
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import UnstructuredFileLoader

import os
import torch

import soundfile as sf
from playsound import playsound
from TTS.api import TTS
from langchain.llms import Ollama


# Loading RAG data
loader = UnstructuredFileLoader("Foduu_KnowledgeBase.pdf")
documents = loader.load()

# Open-source embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(documents, embeddings)
 
ollama = Ollama(base_url='http://localhost:11434',model="llama3")
qa = RetrievalQA.from_chain_type(llm=ollama, chain_type="stuff", retriever=vectorstore.as_retriever())

# Speech recognition setup
r = sr.Recognizer()
 
tts = TTS(model_name="tts_models/en/ljspeech/glow-tts")
 


def speak(text):
    """
    Converts text to speech using Mozilla TTS, plays the audio, and then deletes the file.
    """
    try:
        # Generate speech
        output_file = "output.wav"
        tts.tts_to_file(text=text, file_path=output_file)

        # Play the speech
        playsound(output_file)
        os.remove(output_file)
        print(f"Speech played and file {output_file} removed.")

    except Exception as e:
        print(f"Error: {e}")


def listen():
    """
    Records audio and converts it to text using speech recognition.
    """
    with sr.Microphone() as source:
        print("Listening...")
        audio = r.listen(source)
    try:
        text = r.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except sr.UnknownValueError:
        print("Could not understand audio")
        speak('could not understand audio')
        return None
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
        return None


def process_audio(text):
    if text is not None:
        try:
            response = qa.run(text)
            print(response)
            speak(response)
        except Exception as e:
            print(f"An error occurred: {e}")
            speak("Sorry, I'm having trouble processing that right now.")


def main():
    """
    Main loop for the voice assistant.
    """
    while True:
        text = listen()
        process_audio(text)


if __name__ == "__main__":
    main()