Spaces:
Running
Running
Update components/student_flow.py
Browse files
components/student_flow.py
CHANGED
|
@@ -297,6 +297,9 @@ def chunk_content(content, max_chunk_size=8000):
|
|
| 297 |
if len(content) <= max_chunk_size:
|
| 298 |
return [content.strip()]
|
| 299 |
|
|
|
|
|
|
|
|
|
|
| 300 |
# First try: split by paragraphs
|
| 301 |
paragraphs = re.split(r'\n\s*\n', content)
|
| 302 |
paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
|
@@ -351,7 +354,7 @@ def chunk_content(content, max_chunk_size=8000):
|
|
| 351 |
# Final validation - ensure no chunk is too large
|
| 352 |
validated_chunks = []
|
| 353 |
for chunk in chunks:
|
| 354 |
-
if len(chunk) < min_chunk_size and validated_chunks:
|
| 355 |
# Merge with previous chunk if too small (unless it's the first chunk)
|
| 356 |
validated_chunks[-1] = validated_chunks[-1] + "\n\n" + chunk
|
| 357 |
elif len(chunk) > max_chunk_size * 1.2: # Allow 20% overflow
|
|
|
|
| 297 |
if len(content) <= max_chunk_size:
|
| 298 |
return [content.strip()]
|
| 299 |
|
| 300 |
+
# Define minimum chunk size (you can adjust this value)
|
| 301 |
+
min_chunk_size = 500 # Add this line to define the variable
|
| 302 |
+
|
| 303 |
# First try: split by paragraphs
|
| 304 |
paragraphs = re.split(r'\n\s*\n', content)
|
| 305 |
paragraphs = [p.strip() for p in paragraphs if p.strip()]
|
|
|
|
| 354 |
# Final validation - ensure no chunk is too large
|
| 355 |
validated_chunks = []
|
| 356 |
for chunk in chunks:
|
| 357 |
+
if len(chunk) < min_chunk_size and validated_chunks: # Now min_chunk_size is defined
|
| 358 |
# Merge with previous chunk if too small (unless it's the first chunk)
|
| 359 |
validated_chunks[-1] = validated_chunks[-1] + "\n\n" + chunk
|
| 360 |
elif len(chunk) > max_chunk_size * 1.2: # Allow 20% overflow
|