import sys
import pandas as pd

from SPARQLWrapper import SPARQLWrapper, JSON


endpoint_url = "https://query.wikidata.org/sparql"

query = """
# Places of residence of accused witches in Scotland 1563-1736
SELECT ?accusedurl ?item ?itemLabel ?residenceLabel ?genderLabel ?occupationLabel ?classLabel ?manner_of_inhumane_treatmentLabel ?place_of_detentionLabel ?cause_of_deathLabel ?fatherLabel ?motherLabel ?siblingLabel ?childLabel ?spouseLabel ?coords WHERE {
  ?item wdt:P31 wd:Q5;
    wdt:P4478 ?accused.
  wd:P4478 wdt:P1630 ?formatterurl.
  BIND(IRI(REPLACE(?accused, "^(.+)$", ?formatterurl)) AS ?accusedurl)
  ?item wdt:P551 ?residence.
  ?residence wdt:P625 ?coords.
  
  OPTIONAL { ?item wdt:P21 ?gender. }
  OPTIONAL { ?item wdt:P106 ?occupation. }
  OPTIONAL { ?item wdt:P3716 ?class. }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  OPTIONAL { ?item wdt:P7160 ?manner_of_inhumane_treatment. }
  OPTIONAL { ?item wdt:P2632 ?place_of_detention. }
  OPTIONAL { ?item wdt:P509 ?cause_of_death. }
  OPTIONAL { ?item wdt:P22 ?father. }
  OPTIONAL { ?item wdt:P25 ?mother. }
  OPTIONAL { ?item wdt:P3373 ?sibling. }
  OPTIONAL { ?item wdt:P40 ?child. }
  OPTIONAL { ?item wdt:P26 ?spouse. }
  OPTIONAL { ?item wdt:P551 ?residence. }
}
"""


def get_results(endpoint_url, query):
  """
  Obtain SPARQL query results.
  """
  user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
  # TODO adjust user agent; see https://w.wiki/CX6
  sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
  sparql.setQuery(query)
  sparql.setReturnFormat(JSON)
  return sparql.query().convert()


def load_data():
  """
  Obtain data for accused witches charged with witchcraft.
  """
  lst = []
  results = get_results(endpoint_url, query)
  for result in results["results"]["bindings"]:
      d = {}
      for k, v in result.items():
          d[k] = v['value']
      lst.append(d)
  data = pd.DataFrame(lst)
  data.dropna(subset=['siblingLabel', 'spouseLabel', 'childLabel', 'fatherLabel', 'motherLabel'], how="all", inplace=True)
  data['longitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[0].lstrip("(")).astype(float)
  data['latitude'] = data['coords'].str.replace("Point", "").apply(lambda x: x.split()[-1].rstrip(")")).astype(float)
  data.drop(['coords'], axis=1, inplace=True)
  data.columns = [col.replace("Label", "") if col != "itemLabel" else col for col in data.columns.tolist()]
  return data


df = load_data()