Spaces:
Runtime error
Runtime error
def chunk_document_to_dict(doc: str, doc_name: str, desired_chunk_size: int = 400, max_chunk_size: int = 500): | |
chunks = {} | |
chunk = '' | |
chunk_number = 1 | |
for line in doc.splitlines(): | |
chunk += line + '\n' | |
if len(chunk) >= desired_chunk_size: | |
chunk_id = f"{doc_name}_{chunk_number}" | |
chunks[chunk_id] = chunk[:max_chunk_size] | |
chunk = '' | |
chunk_number += 1 | |
if chunk: # Залишок запихаємо в останній чанк | |
chunk_id = f"{doc_name}_{chunk_number}" | |
chunks[chunk_id] = chunk | |
return chunks | |
def chunk_documents_to_dict(docs: dict, desired_chunk_size: int = 400, max_chunk_size: int = 500): | |
all_chunks = {} | |
for doc_name, doc_text in docs.items(): | |
chunks = chunk_document_to_dict(doc_text, doc_name, desired_chunk_size, max_chunk_size) | |
all_chunks.update(chunks) | |
return all_chunks |