Shabdobhedi's picture
Update app.py
a8604aa verified
import os
from langchain_groq import ChatGroq
#from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
load_dotenv()
#download embedding model
def download_hugging_face_embeddings():
embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
return embeddings
# Load the GROQ and OpenAI API keys
#os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
# Initialize the LLM
llm = ChatGroq(groq_api_key=groq_api_key,
model_name="Llama3-8b-8192")
from langchain_core.prompts import ChatPromptTemplate
prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {context}
Question: {input}
Only return the helpful answer below and nothing else.
Helpful answer:"""
prompt = ChatPromptTemplate.from_template(prompt_template)
def vector_embedding():
"""Embeds the documents and stores them in a FAISS vector store."""
#embeddings = OpenAIEmbeddings()
embeddings= HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
loader = PyPDFDirectoryLoader("/kaggle/input/book-pdf-1") # Data Ingestion
docs = loader.load() # Document Loading
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Chunk Creation
final_documents = text_splitter.split_documents(docs[:20]) # Splitting
vectors = FAISS.from_documents(final_documents, embeddings) # Vector OpenAI embeddings
return vectors
# Get user input
prompt1 = input("Enter Your Question From Documents: ")
# Embed the documents
vectors = vector_embedding()
print("Vector Store DB Is Ready")
import time
if prompt1:
document_chain = create_stuff_documents_chain(llm, prompt)
retriever = vectors.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
start = time.process_time()
response = retrieval_chain.invoke({'input': prompt1})
print("Response time :", time.process_time() - start)
print(response['answer'])
# Print similar documents
print("\nDocument Similarity Search:")
for i, doc in enumerate(response["context"]):
print(doc.page_content)
print("--------------------------------")