jeffrey
init commit
37c1830
raw
history blame
590 Bytes
from typing import Callable, List
from autorag.data.parse import langchain_parse
from autorag.data.parse.base import _add_last_modified_datetime
from autorag.utils import result_to_dataframe
@result_to_dataframe(["texts", "path", "page", "last_modified_datetime"])
def original_parse(fn: Callable, **kwargs):
result = fn(**kwargs)
result = _add_last_modified_datetime(result)
return result
def parse_pdf(file_lists: List[str], parse_method: str = "pdfminer"):
raw_df = original_parse(langchain_parse.__wrapped__, data_path_list=file_lists, parse_method=parse_method)
return raw_df