ibraheem007 commited on
Commit
37d62b3
·
verified ·
1 Parent(s): 742a2dc

Update components/student_flow.py

Browse files
Files changed (1) hide show
  1. components/student_flow.py +4 -1
components/student_flow.py CHANGED
@@ -297,6 +297,9 @@ def chunk_content(content, max_chunk_size=8000):
297
  if len(content) <= max_chunk_size:
298
  return [content.strip()]
299
 
 
 
 
300
  # First try: split by paragraphs
301
  paragraphs = re.split(r'\n\s*\n', content)
302
  paragraphs = [p.strip() for p in paragraphs if p.strip()]
@@ -351,7 +354,7 @@ def chunk_content(content, max_chunk_size=8000):
351
  # Final validation - ensure no chunk is too large
352
  validated_chunks = []
353
  for chunk in chunks:
354
- if len(chunk) < min_chunk_size and validated_chunks:
355
  # Merge with previous chunk if too small (unless it's the first chunk)
356
  validated_chunks[-1] = validated_chunks[-1] + "\n\n" + chunk
357
  elif len(chunk) > max_chunk_size * 1.2: # Allow 20% overflow
 
297
  if len(content) <= max_chunk_size:
298
  return [content.strip()]
299
 
300
+ # Define minimum chunk size (you can adjust this value)
301
+ min_chunk_size = 500 # Add this line to define the variable
302
+
303
  # First try: split by paragraphs
304
  paragraphs = re.split(r'\n\s*\n', content)
305
  paragraphs = [p.strip() for p in paragraphs if p.strip()]
 
354
  # Final validation - ensure no chunk is too large
355
  validated_chunks = []
356
  for chunk in chunks:
357
+ if len(chunk) < min_chunk_size and validated_chunks: # Now min_chunk_size is defined
358
  # Merge with previous chunk if too small (unless it's the first chunk)
359
  validated_chunks[-1] = validated_chunks[-1] + "\n\n" + chunk
360
  elif len(chunk) > max_chunk_size * 1.2: # Allow 20% overflow