nickil's picture
Initial Commit
e1a22ca
import sys
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
query = """
# Places of residence of accused witches in Scotland 1563-1736
SELECT ?accusedurl ?item ?itemLabel ?residenceLabel ?genderLabel ?occupationLabel ?classLabel ?manner_of_inhumane_treatmentLabel ?place_of_detentionLabel ?cause_of_deathLabel ?fatherLabel ?motherLabel ?siblingLabel ?childLabel ?spouseLabel ?coords WHERE {
?item wdt:P31 wd:Q5;
wdt:P4478 ?accused.
wd:P4478 wdt:P1630 ?formatterurl.
BIND(IRI(REPLACE(?accused, "^(.+)$", ?formatterurl)) AS ?accusedurl)
?item wdt:P551 ?residence.
?residence wdt:P625 ?coords.
OPTIONAL { ?item wdt:P21 ?gender. }
OPTIONAL { ?item wdt:P106 ?occupation. }
OPTIONAL { ?item wdt:P3716 ?class. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
OPTIONAL { ?item wdt:P7160 ?manner_of_inhumane_treatment. }
OPTIONAL { ?item wdt:P2632 ?place_of_detention. }
OPTIONAL { ?item wdt:P509 ?cause_of_death. }
OPTIONAL { ?item wdt:P22 ?father. }
OPTIONAL { ?item wdt:P25 ?mother. }
OPTIONAL { ?item wdt:P3373 ?sibling. }
OPTIONAL { ?item wdt:P40 ?child. }
OPTIONAL { ?item wdt:P26 ?spouse. }
OPTIONAL { ?item wdt:P551 ?residence. }
}
"""
def get_results(endpoint_url, query):
"""
Obtain SPARQL query results.
"""
user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
# TODO adjust user agent; see https://w.wiki/CX6
sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return sparql.query().convert()
def load_data():
"""
Obtain data for accused witches charged with witchcraft.
"""
lst = []
results = get_results(endpoint_url, query)
for result in results["results"]["bindings"]:
d = {}
for k, v in result.items():
d[k] = v['value']
lst.append(d)
data = pd.DataFrame(lst)
data.dropna(subset=['siblingLabel', 'spouseLabel', 'childLabel', 'fatherLabel', 'motherLabel'], how="all", inplace=True)
data['longitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[0].lstrip("(")).astype(float)
data['latitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[-1].rstrip(")")).astype(float)
data.drop(['coords'], axis=1, inplace=True)
data.columns = [col.replace("Label", "") if col != "itemLabel" else col for col in data.columns.tolist()]
return data
df = load_data()