Spaces:
Sleeping
Sleeping
File size: 1,029 Bytes
cb22296 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import requests
from bs4 import BeautifulSoup
import pandas as pd
def scrape_char_links(char_dict, start_chap = 1, end_chap =5000, continue_last = True):
# if continue_last:
# curr_chapts = df['Chapter'].tolist()
# else: curr_chapts = []
for i in range(start_chap, end_chap):
# if i in curr_chapts:
# continue
# else:
if i % 100 == 0:
print (i)
# char_list = []
URL = f'https://onepiece.fandom.com/wiki/Chapter_{i}'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_='CharTable')
for elem in table.find_all('li'):
try:
# char_list.append(elem.text)
if elem.find('a').get('title') in char_dict:
continue
else:
char_dict[elem.find('a').get('title')] = elem.find('a').get('href')
except :
continue
return char_dict |