Spaces:

dsfd-assignment-3
/

witches-exploration

Sleeping

App Files Files Community

witches-exploration / data.py

nickil

Initial Commit

e1a22ca over 1 year ago

raw

history blame contribute delete

2.52 kB

	import sys
	import pandas as pd

	from SPARQLWrapper import SPARQLWrapper, JSON


	endpoint_url = "https://query.wikidata.org/sparql"

	query = """
	# Places of residence of accused witches in Scotland 1563-1736
	SELECT ?accusedurl ?item ?itemLabel ?residenceLabel ?genderLabel ?occupationLabel ?classLabel ?manner_of_inhumane_treatmentLabel ?place_of_detentionLabel ?cause_of_deathLabel ?fatherLabel ?motherLabel ?siblingLabel ?childLabel ?spouseLabel ?coords WHERE {
	?item wdt:P31 wd:Q5;
	wdt:P4478 ?accused.
	wd:P4478 wdt:P1630 ?formatterurl.
	BIND(IRI(REPLACE(?accused, "^(.+)$", ?formatterurl)) AS ?accusedurl)
	?item wdt:P551 ?residence.
	?residence wdt:P625 ?coords.

	OPTIONAL { ?item wdt:P21 ?gender. }
	OPTIONAL { ?item wdt:P106 ?occupation. }
	OPTIONAL { ?item wdt:P3716 ?class. }
	SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
	OPTIONAL { ?item wdt:P7160 ?manner_of_inhumane_treatment. }
	OPTIONAL { ?item wdt:P2632 ?place_of_detention. }
	OPTIONAL { ?item wdt:P509 ?cause_of_death. }
	OPTIONAL { ?item wdt:P22 ?father. }
	OPTIONAL { ?item wdt:P25 ?mother. }
	OPTIONAL { ?item wdt:P3373 ?sibling. }
	OPTIONAL { ?item wdt:P40 ?child. }
	OPTIONAL { ?item wdt:P26 ?spouse. }
	OPTIONAL { ?item wdt:P551 ?residence. }
	}
	"""


	def get_results(endpoint_url, query):
	"""
	Obtain SPARQL query results.
	"""
	user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
	# TODO adjust user agent; see https://w.wiki/CX6
	sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
	sparql.setQuery(query)
	sparql.setReturnFormat(JSON)
	return sparql.query().convert()


	def load_data():
	"""
	Obtain data for accused witches charged with witchcraft.
	"""
	lst = []
	results = get_results(endpoint_url, query)
	for result in results["results"]["bindings"]:
	d = {}
	for k, v in result.items():
	d[k] = v['value']
	lst.append(d)
	data = pd.DataFrame(lst)
	data.dropna(subset=['siblingLabel', 'spouseLabel', 'childLabel', 'fatherLabel', 'motherLabel'], how="all", inplace=True)
	data['longitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[0].lstrip("(")).astype(float)
	data['latitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[-1].rstrip(")")).astype(float)
	data.drop(['coords'], axis=1, inplace=True)
	data.columns = [col.replace("Label", "") if col != "itemLabel" else col for col in data.columns.tolist()]
	return data


	df = load_data()