one_dash / src /scrape_char_details.py
tappyness1
initial commit
cb22296
raw
history blame contribute delete
No virus
1.89 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from sqlite3 import Error
def create_connection(db_file):
""" create a database connection to a SQLite database """
conn = None
conn = sqlite3.connect(db_file)
if conn:
conn.close()
def scrape_char_details(char_link_df, save_file_name):
char_links = char_link_df['Link'].tolist()
df = pd.DataFrame()
for char_link in char_links:
try:
URL = f'https://onepiece.fandom.com{char_link}'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('aside', {'role': 'region'} )
name = table.find("h2", {"data-source": "name"}).text
char_det_dict = {"Name": name}
det_list = ['first','affiliation', 'occupation','residence', 'epithet','status', 'age', 'bounty', 'dfname']
for det in det_list:
if table.find("div", {"data-source": det}) is not None:
text_value = table.find("div", {"data-source": det}).find("div", {"class": "pi-data-value pi-font"}).text
if text_value is not None:
char_det_dict[det] = text_value
else:
char_det_dict[det] = [i.get("title") for i in table.find("div", {"data-source": det}).find("div").find_all("a")]
df = df.append(char_det_dict, ignore_index=True)
except:
print(f'Unable to process: {char_link}')
continue
df.to_csv(save_file_name, index=False)
# print (char_det_dict)
if __name__ == '__main__':
# dbname = r"data/OPdash.db"
# create_connection(dbname)
char_link_df = pd.read_csv('data/char_link.csv')
scrape_char_details(char_link_df, save_file_name = "data/char_details.csv")