Spaces:
Runtime error
Runtime error
| def chunk_document_to_dict(doc: str, doc_name: str, desired_chunk_size: int = 400, max_chunk_size: int = 500): | |
| chunks = {} | |
| chunk = '' | |
| chunk_number = 1 | |
| for line in doc.splitlines(): | |
| chunk += line + '\n' | |
| if len(chunk) >= desired_chunk_size: | |
| chunk_id = f"{doc_name}_{chunk_number}" | |
| chunks[chunk_id] = chunk[:max_chunk_size] | |
| chunk = '' | |
| chunk_number += 1 | |
| if chunk: # Залишок запихаємо в останній чанк | |
| chunk_id = f"{doc_name}_{chunk_number}" | |
| chunks[chunk_id] = chunk | |
| return chunks | |
| def chunk_documents_to_dict(docs: dict, desired_chunk_size: int = 400, max_chunk_size: int = 500): | |
| all_chunks = {} | |
| for doc_name, doc_text in docs.items(): | |
| chunks = chunk_document_to_dict(doc_text, doc_name, desired_chunk_size, max_chunk_size) | |
| all_chunks.update(chunks) | |
| return all_chunks |