3gg commited on
Commit
074f5a4
1 Parent(s): 6fcd382

Add overlap and reduce search count.

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -9,10 +9,13 @@ from langchain.vectorstores import FAISS
9
 
10
 
11
  # Number of search results to query from the vector database.
12
- SIMILARITY_SEARCH_COUNT = 7
13
 
14
  # Size of each document chunk in number of characters.
15
- CHUNK_SIZE = 500
 
 
 
16
 
17
  # Maximum number of output tokens.
18
  MODEL_MAX_LENGTH = 500
@@ -23,7 +26,7 @@ loader = PyMuPDFLoader("rdna3-shader-instruction-set-architecture-feb-2023_0.pdf
23
  documents = loader.load()
24
 
25
  print("Creating chunks")
26
- splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=0)
27
  chunks = splitter.split_documents(documents)
28
 
29
  print("Creating database")
 
9
 
10
 
11
  # Number of search results to query from the vector database.
12
+ SIMILARITY_SEARCH_COUNT = 3
13
 
14
  # Size of each document chunk in number of characters.
15
+ CHUNK_SIZE = 800
16
+
17
+ # Chunk overlap in number of characters.
18
+ CHUNK_OVERLAP = 50
19
 
20
  # Maximum number of output tokens.
21
  MODEL_MAX_LENGTH = 500
 
26
  documents = loader.load()
27
 
28
  print("Creating chunks")
29
+ splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
30
  chunks = splitter.split_documents(documents)
31
 
32
  print("Creating database")