MLhouse-RAG / input_reader.py
acumplid
base app
a8bf50c
raw
history blame contribute delete
765 Bytes
from typing import List
from llama_index.core.constants import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.schema import Document
from llama_index.core import Settings
class InputReader:
def __init__(self, input_dir: str) -> None:
self.reader = SimpleDirectoryReader(input_dir=input_dir)
def parse_documents(
self,
show_progress: bool = True,
chunk_size: int = DEFAULT_CHUNK_SIZE,
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
) -> List[Document]:
Settings.chunk_size = chunk_size
Settings.chunk_overlap = chunk_overlap
documents = self.reader.load_data(show_progress=show_progress)
return documents