jfeng1115's picture
init commit
58d33f0
raw
history blame contribute delete
No virus
1.08 kB
"""Loading logic for loading documents from an s3 directory."""
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.s3_file import S3FileLoader
class S3DirectoryLoader(BaseLoader):
"""Loading logic for loading documents from s3."""
def __init__(self, bucket: str, prefix: str = ""):
"""Initialize with bucket and key name."""
self.bucket = bucket
self.prefix = prefix
def load(self) -> List[Document]:
"""Load documents."""
try:
import boto3
except ImportError:
raise ValueError(
"Could not import boto3 python package. "
"Please it install it with `pip install boto3`."
)
s3 = boto3.resource("s3")
bucket = s3.Bucket(self.bucket)
docs = []
for obj in bucket.objects.filter(Prefix=self.prefix):
loader = S3FileLoader(self.bucket, obj.key)
docs.extend(loader.load())
return docs