Chris4K commited on
Commit
b54046d
1 Parent(s): a9f4491

Update vector_store_retriever.py

Browse files
Files changed (1) hide show
  1. vector_store_retriever.py +6 -1
vector_store_retriever.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from langchain.vectorstores import Chroma
3
  from langchain.document_loaders import PyPDFLoader
4
  from langchain.embeddings import HuggingFaceInstructEmbeddings
 
5
 
6
  # Initialize the HuggingFaceInstructEmbeddings
7
  hf = HuggingFaceInstructEmbeddings(
@@ -14,8 +15,12 @@ hf = HuggingFaceInstructEmbeddings(
14
  loader = PyPDFLoader('./new_papers/new_papers/', glob="./*.pdf")
15
  documents = loader.load()
16
 
 
 
 
 
17
  # Create a Chroma vector store from the PDF documents
18
- db = Chroma.from_documents(documents, hf, collection_name="my-collection")
19
 
20
  class VectoreStoreRetrievalTool:
21
  def __init__(self):
 
2
  from langchain.vectorstores import Chroma
3
  from langchain.document_loaders import PyPDFLoader
4
  from langchain.embeddings import HuggingFaceInstructEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
 
7
  # Initialize the HuggingFaceInstructEmbeddings
8
  hf = HuggingFaceInstructEmbeddings(
 
15
  loader = PyPDFLoader('./new_papers/new_papers/', glob="./*.pdf")
16
  documents = loader.load()
17
 
18
+ #splitting the text into
19
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
20
+ texts = text_splitter.split_documents(documents)
21
+
22
  # Create a Chroma vector store from the PDF documents
23
+ db = Chroma.from_documents(texts, hf, collection_name="my-collection")
24
 
25
  class VectoreStoreRetrievalTool:
26
  def __init__(self):