Spaces:
Runtime error
Runtime error
| import asyncio | |
| import json | |
| import uuid | |
| from typing import Any, Sequence | |
| from langchain_core.documents import BaseDocumentTransformer, Document | |
| from langchain.tools.nuclia.tool import NucliaUnderstandingAPI | |
| class NucliaTextTransformer(BaseDocumentTransformer): | |
| """ | |
| The Nuclia Understanding API splits into paragraphs and sentences, | |
| identifies entities, provides a summary of the text and generates | |
| embeddings for all sentences. | |
| """ | |
| def __init__(self, nua: NucliaUnderstandingAPI): | |
| self.nua = nua | |
| def transform_documents( | |
| self, documents: Sequence[Document], **kwargs: Any | |
| ) -> Sequence[Document]: | |
| raise NotImplementedError | |
| async def atransform_documents( | |
| self, documents: Sequence[Document], **kwargs: Any | |
| ) -> Sequence[Document]: | |
| tasks = [ | |
| self.nua.arun( | |
| { | |
| "action": "push", | |
| "id": str(uuid.uuid4()), | |
| "text": doc.page_content, | |
| "path": None, | |
| } | |
| ) | |
| for doc in documents | |
| ] | |
| results = await asyncio.gather(*tasks) | |
| for doc, result in zip(documents, results): | |
| obj = json.loads(result) | |
| metadata = { | |
| "file": obj["file_extracted_data"][0], | |
| "metadata": obj["field_metadata"][0], | |
| } | |
| doc.metadata["nuclia"] = metadata | |
| return documents | |