Add overlap and reduce search count.
Browse files
app.py
CHANGED
@@ -9,10 +9,13 @@ from langchain.vectorstores import FAISS
|
|
9 |
|
10 |
|
11 |
# Number of search results to query from the vector database.
|
12 |
-
SIMILARITY_SEARCH_COUNT =
|
13 |
|
14 |
# Size of each document chunk in number of characters.
|
15 |
-
CHUNK_SIZE =
|
|
|
|
|
|
|
16 |
|
17 |
# Maximum number of output tokens.
|
18 |
MODEL_MAX_LENGTH = 500
|
@@ -23,7 +26,7 @@ loader = PyMuPDFLoader("rdna3-shader-instruction-set-architecture-feb-2023_0.pdf
|
|
23 |
documents = loader.load()
|
24 |
|
25 |
print("Creating chunks")
|
26 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=
|
27 |
chunks = splitter.split_documents(documents)
|
28 |
|
29 |
print("Creating database")
|
|
|
9 |
|
10 |
|
11 |
# Number of search results to query from the vector database.
|
12 |
+
SIMILARITY_SEARCH_COUNT = 3
|
13 |
|
14 |
# Size of each document chunk in number of characters.
|
15 |
+
CHUNK_SIZE = 800
|
16 |
+
|
17 |
+
# Chunk overlap in number of characters.
|
18 |
+
CHUNK_OVERLAP = 50
|
19 |
|
20 |
# Maximum number of output tokens.
|
21 |
MODEL_MAX_LENGTH = 500
|
|
|
26 |
documents = loader.load()
|
27 |
|
28 |
print("Creating chunks")
|
29 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
|
30 |
chunks = splitter.split_documents(documents)
|
31 |
|
32 |
print("Creating database")
|