Spaces:
Sleeping
Sleeping
from src.scrape_char_details import scrape_char_details | |
from src.scraper_chap_appearance import scrape_chap_appearances | |
from src.scraper_char_list import scrape_char_links | |
from configparser import ConfigParser, ExtendedInterpolation | |
import argparse | |
import pandas as pd | |
import os.path | |
def main_scraper(path_to_config): | |
pl_config = ConfigParser(interpolation=ExtendedInterpolation()) | |
pl_config.read(path_to_config) | |
end_chap = pl_config['SCRAPER'].getint('end_chap') + 1 | |
char_link_fp = pl_config['SCRAPER'].get('char_link_fp') | |
chap_appearance_fp = pl_config['SCRAPER'].get('chap_appearance_fp') | |
char_details_fp = pl_config['SCRAPER'].get('char_details_fp') | |
if pl_config['SCRAPER'].getboolean('char_link'): | |
print ("scraping char links") | |
char_dict = {} | |
scrape_char_links(char_dict, end_chap = end_chap) | |
df = pd.DataFrame.from_dict(char_dict, orient = 'index',columns=['Link']) | |
df = df.reset_index() | |
df.to_csv(char_link_fp, index = False) | |
if pl_config['SCRAPER'].getboolean('chap_appearance'): | |
print ("scraping char appearance") | |
if os.path.exists(chap_appearance_fp): | |
df = pd.read_csv(chap_appearance_fp) | |
else: | |
df = pd.DataFrame() | |
newdf = scrape_chap_appearances(df = df, end_chap = end_chap) | |
newdf.to_csv(chap_appearance_fp, index=False) | |
if pl_config['SCRAPER'].getboolean('char_details'): | |
print ("scraping character details") | |
char_link_df = pd.read_csv(char_link_fp) | |
scrape_char_details(char_link_df, save_file_name = char_details_fp) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--path_to_config', default='cfg/cfg.ini', help='path to config file') | |
args = parser.parse_args() | |
main_scraper(path_to_config = args.path_to_config) | |