def chunk_document_to_dict(doc: str, doc_name: str, desired_chunk_size: int = 400, max_chunk_size: int = 500): chunks = {} chunk = '' chunk_number = 1 for line in doc.splitlines(): chunk += line + '\n' if len(chunk) >= desired_chunk_size: chunk_id = f"{doc_name}_{chunk_number}" chunks[chunk_id] = chunk[:max_chunk_size] chunk = '' chunk_number += 1 if chunk: # Залишок запихаємо в останній чанк chunk_id = f"{doc_name}_{chunk_number}" chunks[chunk_id] = chunk return chunks def chunk_documents_to_dict(docs: dict, desired_chunk_size: int = 400, max_chunk_size: int = 500): all_chunks = {} for doc_name, doc_text in docs.items(): chunks = chunk_document_to_dict(doc_text, doc_name, desired_chunk_size, max_chunk_size) all_chunks.update(chunks) return all_chunks