File size: 1,203 Bytes
06bca0c
 
6aad21a
06bca0c
 
 
 
 
 
 
 
6aad21a
06bca0c
 
 
 
 
 
 
 
 
 
 
25a0d11
06bca0c
 
 
6aad21a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd

from buster.retriever.base import ALL_SOURCES, Retriever


class PickleRetriever(Retriever):
    def __init__(self, filepath: str):
        self.filepath = filepath
        self.documents = pd.read_pickle(filepath)

    def get_documents(self, source: str) -> pd.DataFrame:
        """Get all current documents from a given source."""
        if self.documents is None:
            raise FileNotFoundError(f"No documents found at {self.filepath}. Are you sure this is the correct path?")

        documents = self.documents.copy()
        # The `current` column exists when multiple versions of a document exist
        if "current" in documents.columns:
            documents = documents[documents.current == 1]

            # Drop the `current` column
            documents.drop(columns=["current"], inplace=True)

        if source not in [None, ""] and "source" in documents.columns:
            documents = documents[documents.source == source]

        return documents

    def get_source_display_name(self, source: str) -> str:
        """Get the display name of a source."""
        if source is None:
            return ALL_SOURCES
        else:
            return source