Spaces:
Runtime error
Runtime error
import logging | |
import os | |
import tempfile | |
from typing import Any, Iterator, List | |
from langchain.docstore.document import Document | |
from langchain.document_loaders.base import BaseLoader | |
from langchain.document_loaders.unstructured import UnstructuredFileLoader | |
logger = logging.getLogger(__name__) | |
class BaiduBOSFileLoader(BaseLoader): | |
"""Load from `Baidu Cloud BOS` file.""" | |
def __init__(self, conf: Any, bucket: str, key: str): | |
"""Initialize with BOS config, bucket and key name. | |
:param conf(BceClientConfiguration): BOS config. | |
:param bucket(str): BOS bucket. | |
:param key(str): BOS file key. | |
""" | |
self.conf = conf | |
self.bucket = bucket | |
self.key = key | |
def load(self) -> List[Document]: | |
return list(self.lazy_load()) | |
def lazy_load(self) -> Iterator[Document]: | |
"""Load documents.""" | |
try: | |
from baidubce.services.bos.bos_client import BosClient | |
except ImportError: | |
raise ImportError( | |
"Please using `pip install bce-python-sdk`" | |
+ " before import bos related package." | |
) | |
# Initialize BOS Client | |
client = BosClient(self.conf) | |
with tempfile.TemporaryDirectory() as temp_dir: | |
file_path = f"{temp_dir}/{self.bucket}/{self.key}" | |
os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
# Download the file to a destination | |
logger.debug(f"get object key {self.key} to file {file_path}") | |
client.get_object_to_file(self.bucket, self.key, file_path) | |
try: | |
loader = UnstructuredFileLoader(file_path) | |
documents = loader.load() | |
return iter(documents) | |
except Exception as ex: | |
logger.error(f"load document error = {ex}") | |
return iter([Document(page_content="")]) | |