Spaces:
Runtime error
Runtime error
File size: 733 Bytes
e539b70 8883a1c e539b70 8883a1c e539b70 8883a1c e539b70 8883a1c e539b70 8883a1c e539b70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from .sourcer import search_web
import pandas as pd
import os
import glob
root_dir = 'data/datasets'
pira_df = pd.read_csv(os.path.join(root_dir, 'pira_simplified.csv'))
pira_corpus = pira_df.text.to_list()
txt_path = os.path.join(root_dir, 'onu')
filenames = glob.glob(txt_path + '/*.txt')
onu_corpus = []
for filename in filenames:
with open(filename, 'r') as f:
onu_corpus.append(f.read())
def gen_corpus(query: str, pira: bool=True, ONU: bool=True, web: bool=True)->list:
corpus = []
if not (pira or ONU or web):
# TODO: raise error
pass
if pira:
corpus += pira_corpus
if ONU:
corpus += onu_corpus
if web:
corpus += search_web(query)
return corpus |