one_dash / src /main.py
tappyness1
initial commit
cb22296
from src.scrape_char_details import scrape_char_details
from src.scraper_chap_appearance import scrape_chap_appearances
from src.scraper_char_list import scrape_char_links
from configparser import ConfigParser, ExtendedInterpolation
import argparse
import pandas as pd
import os.path
def main_scraper(path_to_config):
pl_config = ConfigParser(interpolation=ExtendedInterpolation())
pl_config.read(path_to_config)
end_chap = pl_config['SCRAPER'].getint('end_chap') + 1
char_link_fp = pl_config['SCRAPER'].get('char_link_fp')
chap_appearance_fp = pl_config['SCRAPER'].get('chap_appearance_fp')
char_details_fp = pl_config['SCRAPER'].get('char_details_fp')
if pl_config['SCRAPER'].getboolean('char_link'):
print ("scraping char links")
char_dict = {}
scrape_char_links(char_dict, end_chap = end_chap)
df = pd.DataFrame.from_dict(char_dict, orient = 'index',columns=['Link'])
df = df.reset_index()
df.to_csv(char_link_fp, index = False)
if pl_config['SCRAPER'].getboolean('chap_appearance'):
print ("scraping char appearance")
if os.path.exists(chap_appearance_fp):
df = pd.read_csv(chap_appearance_fp)
else:
df = pd.DataFrame()
newdf = scrape_chap_appearances(df = df, end_chap = end_chap)
newdf.to_csv(chap_appearance_fp, index=False)
if pl_config['SCRAPER'].getboolean('char_details'):
print ("scraping character details")
char_link_df = pd.read_csv(char_link_fp)
scrape_char_details(char_link_df, save_file_name = char_details_fp)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--path_to_config', default='cfg/cfg.ini', help='path to config file')
args = parser.parse_args()
main_scraper(path_to_config = args.path_to_config)