riteshraut
commited on
Commit
·
a743656
1
Parent(s):
56d49b0
fix/update
Browse files
app.py
CHANGED
|
@@ -141,7 +141,9 @@ def upload_files():
|
|
| 141 |
try:
|
| 142 |
print("Starting RAG pipeline setup...")
|
| 143 |
|
| 144 |
-
parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=300
|
|
|
|
|
|
|
| 145 |
child_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=100)
|
| 146 |
|
| 147 |
parent_docs = parent_splitter.split_documents(all_docs)
|
|
|
|
| 141 |
try:
|
| 142 |
print("Starting RAG pipeline setup...")
|
| 143 |
|
| 144 |
+
parent_splitter =RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=300,
|
| 145 |
+
separators=["\n\n", "\n", ". ", " ", ""], # Prioritize natural breaks
|
| 146 |
+
length_function=len)
|
| 147 |
child_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=100)
|
| 148 |
|
| 149 |
parent_docs = parent_splitter.split_documents(all_docs)
|