import sys import pandas as pd from SPARQLWrapper import SPARQLWrapper, JSON endpoint_url = "https://query.wikidata.org/sparql" query = """ # Places of residence of accused witches in Scotland 1563-1736 SELECT ?accusedurl ?item ?itemLabel ?residenceLabel ?genderLabel ?occupationLabel ?classLabel ?manner_of_inhumane_treatmentLabel ?place_of_detentionLabel ?cause_of_deathLabel ?fatherLabel ?motherLabel ?siblingLabel ?childLabel ?spouseLabel ?coords WHERE { ?item wdt:P31 wd:Q5; wdt:P4478 ?accused. wd:P4478 wdt:P1630 ?formatterurl. BIND(IRI(REPLACE(?accused, "^(.+)$", ?formatterurl)) AS ?accusedurl) ?item wdt:P551 ?residence. ?residence wdt:P625 ?coords. OPTIONAL { ?item wdt:P21 ?gender. } OPTIONAL { ?item wdt:P106 ?occupation. } OPTIONAL { ?item wdt:P3716 ?class. } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } OPTIONAL { ?item wdt:P7160 ?manner_of_inhumane_treatment. } OPTIONAL { ?item wdt:P2632 ?place_of_detention. } OPTIONAL { ?item wdt:P509 ?cause_of_death. } OPTIONAL { ?item wdt:P22 ?father. } OPTIONAL { ?item wdt:P25 ?mother. } OPTIONAL { ?item wdt:P3373 ?sibling. } OPTIONAL { ?item wdt:P40 ?child. } OPTIONAL { ?item wdt:P26 ?spouse. } OPTIONAL { ?item wdt:P551 ?residence. } } """ def get_results(endpoint_url, query): """ Obtain SPARQL query results. """ user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) # TODO adjust user agent; see https://w.wiki/CX6 sparql = SPARQLWrapper(endpoint_url, agent=user_agent) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert() def load_data(): """ Obtain data for accused witches charged with witchcraft. """ lst = [] results = get_results(endpoint_url, query) for result in results["results"]["bindings"]: d = {} for k, v in result.items(): d[k] = v['value'] lst.append(d) data = pd.DataFrame(lst) data.dropna(subset=['siblingLabel', 'spouseLabel', 'childLabel', 'fatherLabel', 'motherLabel'], how="all", inplace=True) data['longitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[0].lstrip("(")).astype(float) data['latitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[-1].rstrip(")")).astype(float) data.drop(['coords'], axis=1, inplace=True) data.columns = [col.replace("Label", "") if col != "itemLabel" else col for col in data.columns.tolist()] return data df = load_data()