Spaces:
Runtime error
Runtime error
File size: 971 Bytes
283e21a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
import util
import pandas as pd
import os
class Library(object):
def load_from_file(self,library_name):
self.raw_lib = pd.read_parquet(os.path.join('./data',library_name))
def load_from_query(self,query_string,max_results):
self.raw_lib = util.query_to_df(query_string,max_results)
def clean_library(self):
## drop columns that we aren't going to modify
cols = ['title','summary','authors','primary_category','categories']
input_lib = self.raw_lib[cols].copy()
input_lib['title'] = input_lib['title'].apply(util.cleanse)
input_lib['summary'] = input_lib['summary'].apply(util.cleanse)
input_lib['hyph_in_summary'] = input_lib['summary'].apply(util.find_hyph)
input_lib['hyph_in_title'] = input_lib['title'].apply(util.find_hyph)
input_lib['msc_tags'] = input_lib.categories.apply(util.find_msc).apply(util.msc_to_eng)
self.clean_lib = input_lib |