Dua Rajper commited on
Commit
8bd7428
·
verified ·
1 Parent(s): bafb802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
- from langchain_community.embeddings import HuggingFaceEmbeddings # Updated import
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
@@ -22,25 +22,24 @@ def get_pdf_text(pdf_docs):
22
  for pdf in pdf_docs:
23
  pdf_reader = PdfReader(pdf)
24
  for page in pdf_reader.pages:
25
- text += page.extract_text() or ""
26
  return text
27
 
28
  # Function to split extracted text into chunks
29
  def get_text_chunks(text):
30
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
31
- chunks = text_splitter.split_text(text)
32
- return chunks
33
 
34
  # Function to create a FAISS vectorstore using Hugging Face Embeddings
35
  def get_vectorstore(text_chunks):
36
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
38
  return vectorstore
39
 
40
  # Function to set up the conversational retrieval chain
41
  def get_conversation_chain(vectorstore):
42
  try:
43
- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
44
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
45
 
46
  conversation_chain = ConversationalRetrievalChain.from_llm(
 
4
  import streamlit as st
5
  from PyPDF2 import PdfReader
6
  from langchain.text_splitter import CharacterTextSplitter
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings # Fixed Import
8
  from langchain.vectorstores import FAISS
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain.chains import ConversationalRetrievalChain
 
22
  for pdf in pdf_docs:
23
  pdf_reader = PdfReader(pdf)
24
  for page in pdf_reader.pages:
25
+ text += page.extract_text() or "" # Ensure it doesn't break if extract_text() returns None
26
  return text
27
 
28
  # Function to split extracted text into chunks
29
  def get_text_chunks(text):
30
  text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
31
+ return text_splitter.split_text(text)
 
32
 
33
  # Function to create a FAISS vectorstore using Hugging Face Embeddings
34
  def get_vectorstore(text_chunks):
35
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # ✅ Open-source model
36
  vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
37
  return vectorstore
38
 
39
  # Function to set up the conversational retrieval chain
40
  def get_conversation_chain(vectorstore):
41
  try:
42
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5) # ✅ Uses GROQ LLaMA model
43
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
44
 
45
  conversation_chain = ConversationalRetrievalChain.from_llm(