from .sourcer import search_web import pandas as pd import os root_dir = 'data/datasets' pira_df = pd.read_csv(os.path.join(root_dir, 'pira_simplified.csv')) def gen_corpus(query: str, pira: bool=True, ONU: bool=True, web: bool=True)->list: corpus = [] if not (pira or ONU or web): # TODO: raise error pass if pira: corpus += pira_df.text.to_list() if ONU: # TODO: implement PDFs pass if web: corpus += search_web(query) return corpus