Spaces:
Runtime error
Runtime error
File size: 1,203 Bytes
06bca0c 6aad21a 06bca0c 6aad21a 06bca0c 25a0d11 06bca0c 6aad21a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import pandas as pd
from buster.retriever.base import ALL_SOURCES, Retriever
class PickleRetriever(Retriever):
def __init__(self, filepath: str):
self.filepath = filepath
self.documents = pd.read_pickle(filepath)
def get_documents(self, source: str) -> pd.DataFrame:
"""Get all current documents from a given source."""
if self.documents is None:
raise FileNotFoundError(f"No documents found at {self.filepath}. Are you sure this is the correct path?")
documents = self.documents.copy()
# The `current` column exists when multiple versions of a document exist
if "current" in documents.columns:
documents = documents[documents.current == 1]
# Drop the `current` column
documents.drop(columns=["current"], inplace=True)
if source not in [None, ""] and "source" in documents.columns:
documents = documents[documents.source == source]
return documents
def get_source_display_name(self, source: str) -> str:
"""Get the display name of a source."""
if source is None:
return ALL_SOURCES
else:
return source
|