Spaces:

tappyness1
/

one_dash

Sleeping

one_dash / src /scraper_char_list.py

tappyness1

initial commit

cb22296 about 1 year ago

No virus

1.03 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	def scrape_char_links(char_dict, start_chap = 1, end_chap =5000, continue_last = True):
	# if continue_last:
	# curr_chapts = df['Chapter'].tolist()
	# else: curr_chapts = []
	for i in range(start_chap, end_chap):
	# if i in curr_chapts:
	# continue
	# else:
	if i % 100 == 0:
	print (i)
	# char_list = []
	URL = f'https://onepiece.fandom.com/wiki/Chapter_{i}'
	page = requests.get(URL)

	soup = BeautifulSoup(page.content, 'html.parser')

	table = soup.find('table', class_='CharTable')

	for elem in table.find_all('li'):
	try:
	# char_list.append(elem.text)
	if elem.find('a').get('title') in char_dict:
	continue
	else:
	char_dict[elem.find('a').get('title')] = elem.find('a').get('href')
	except :
	continue
	return char_dict