Spaces:
Runtime error
Runtime error
from .sourcer import search_web | |
import pandas as pd | |
import os | |
import glob | |
root_dir = 'data/datasets' | |
pira_df = pd.read_csv(os.path.join(root_dir, 'pira_simplified.csv')) | |
pira_corpus = pira_df.text.to_list() | |
txt_path = os.path.join(root_dir, 'onu') | |
filenames = glob.glob(txt_path + '/*.txt') | |
onu_corpus = [] | |
for filename in filenames: | |
with open(filename, 'r') as f: | |
onu_corpus.append(f.read()) | |
def gen_corpus(query: str, pira: bool=True, ONU: bool=True, web: bool=True)->list: | |
corpus = [] | |
if not (pira or ONU or web): | |
# TODO: raise error | |
pass | |
if pira: | |
corpus += pira_corpus | |
if ONU: | |
corpus += onu_corpus | |
if web: | |
corpus += search_web(query) | |
return corpus |