# IMPORTING TOOLS import streamlit as st from rdflib import Graph, Literal from rdflib.plugins.sparql import prepareQuery import pandas as pd import plotly.express as px import numpy as np # SET PAGE SETTINGS st.set_page_config(page_title='Amusement Accidents', layout="centered") # METHOD TO LOAD THE RDF @st.cache(persist=True) def importRDF(filename, format): graph = Graph().parse(filename, format) return graph # IMPORTING THE RDF with st.spinner('Loading all the stuffs...'): graph = importRDF("rdf-dataset.ttl", "ttl") # METHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME def sparql_results_to_df(results): return pd.DataFrame( data=([None if x is None else x.toPython() for x in row] for row in results), columns=[str(x) for x in results.vars], ) # METHOD TO EXECUTE A GENERIC QUERY def computeQuery(query, executor): result = executor.query(query) res_df = sparql_results_to_df(result) return res_df # METHOD TO EXECUTE A PARAMETRIC QUERY def rideAccidentDescription(ride_name, executor): ride_name = Literal(ride_name) query = """ PREFIX ride_type: PREFIX acc: PREFIX ride: SELECT (?manuf AS ?Manufacturer) (?description AS ?Accident_Description) WHERE { ?instance acc:description ?description ; acc:ref-ride_id ?ride_id . ?ride_id ride:name ?name ; ride:manufacturer ?manuf . FILTER (?name = ?ride_name) } """ prep_query = prepareQuery(query) r = executor.query(prep_query, initBindings={'ride_name': ride_name}) return sparql_results_to_df(r), query # PROCESSING & DISPLAY def display(): with st.container(): st.write("#### What are the months with the highest number of accidents?") res = computeQuery(query_5, graph) fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True") fig.update_xaxes(type="category") fig.update_yaxes(showticklabels=False) st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_5, language="sparql") st.markdown("---") with st.container(): st.write("#### Which cities and states have recorded the most accidents?") res = computeQuery(query_8, graph) fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"], color="count", color_continuous_scale='tealrose', color_continuous_midpoint=np.average(res['count'], weights=res['count'])) st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_8, language="sparql") st.markdown("---") with st.container(): st.write("#### What incidents have occurred on your favorite ride?") ride_names = computeQuery(query_0, graph) option = st.selectbox("Select a Ride", options=ride_names) res, query = rideAccidentDescription(option, graph) res_count = res.count()[0] if (res_count < 3): st.table(res) else: limit = st.slider("Num. of Accidents to Visualize", 1, int(res_count), 2, 1) st.table(res[:limit]) with st.expander("Show query"): st.code(query, language="sparql") st.markdown("---") with st.container(): st.write("#### What Are the Most Common Categories of Accidents?") res = computeQuery(query_4, graph) fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"]) st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_4, language="sparql") st.markdown("---") with st.container(): st.write("#### What are the Most Dangerous Ride Categories?") res = computeQuery(query_6, graph) fig = px.pie(res, names="amus_cat_name", values="count", hole=.4) st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_6, language="sparql") st.markdown("---") with st.container(): st.write("#### What are the Most Dangerous Ride Types?") res = computeQuery(query_3, graph) fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True) fig.update_xaxes(tickangle=45) st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_3, language="sparql") st.markdown("---") with st.container(): st.write("#### How many people are generally involved in an accident?") res = computeQuery(query_1, graph) fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True) fig.update_xaxes(type="category") st.plotly_chart(fig, use_container_width=True) with st.expander("Show query"): st.code(query_1, language="sparql") st.markdown("---") return None # ANALYTICAL QUERIES DEFINITION # get the names of all the rides query_0 = """ PREFIX ride: SELECT DISTINCT ?name WHERE { ?ride ride:name ?name . } """ # num of accidents per injured people query_1 = """ PREFIX r: PREFIX a: SELECT ?num_inj (COUNT(?num_inj) AS ?count) WHERE { ?acc a:num_injured ?num_inj . } GROUP BY ?num_inj ORDER BY (?num_inj) """ # manufacturers of the rides subjected to most accidents query_2 = """ PREFIX acc: PREFIX ride: SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count) WHERE { ?instance acc:ref-ride_id ?ride_id . ?ride_id ride:manufacturer ?ride_manuf } GROUP BY ?ride_manuf ORDER BY DESC(?count) """ # Top n types of rides most subjected to accidents query_3 = """ PREFIX ride_type: PREFIX acc: PREFIX ride: SELECT ?type_name (COUNT(?type_name) AS ?count) WHERE { ?instance acc:ref-ride_id ?ride_id . ?ride_id ride:ref-ride_type_id ?type_id . ?type_id ride_type:type ?type_name . } GROUP BY ?type_name ORDER BY DESC(?count) LIMIT 7 """ # Top 6 categories of rides most subjected to accidents query_6 = """ PREFIX amusement_cat: PREFIX ride_type: PREFIX acc: PREFIX ride: SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count) WHERE { ?instance acc:ref-ride_id ?ride_id . ?ride_id ride:ref-ride_type_id ?type_id . ?type_id ride_type:ref-amusement_category_id ?amus_cat_id . ?amus_cat_id amusement_cat:amusement_category ?amus_cat_name . } GROUP BY ?amus_cat_name ORDER BY DESC(?count) LIMIT 6 """ # most common categories of accidents query_4 = """ PREFIX acc_cat: PREFIX acc: SELECT ?category_name (COUNT(?category_name) AS ?count) WHERE { ?instance acc:ref-accident_category_id ?category_id . ?category_id acc_cat:accident_category ?category_name . } GROUP BY ?category_name ORDER BY DESC(?count) """ # months with the ngher num of accidents query_5 = """ PREFIX acc: SELECT ?mon (COUNT(?mon) AS ?count) WHERE { ?instance acc:date ?date . } GROUP BY (month(?date) AS ?mon) ORDER BY (?mon) """ # cities with the higher num of accidents query_8 = """ PREFIX location: PREFIX acc: SELECT ?city (COUNT(?city) AS ?count) ?state WHERE { ?instance acc:ref-location_id ?location_id . ?location_id location:city ?city ; location:state ?state } GROUP BY ?city ORDER BY DESC(?count) """ # TITLE st.header("Theme Park Ride Accidents") st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year. Children, families, and teenagers are ready to spend days of adrenaline and fun. Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?** Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""") st.markdown("---") display() # WRITE & RUN YOUR OWN QUERY st.write("#### Write & Run your Custom Query") pers_query = st.text_area('', """ PREFIX ride: SELECT ?name WHERE { ?ride ride:manufacturer "Vekoma" ; ride:name ?name } """, height=200) with st.container(): try: res = computeQuery(pers_query, graph) st.table(res) except: st.error("Ooops! Check you query syntax...") st.markdown("---") # SIDEBAR with st.sidebar: st.write(""" This App proposes some visualization about theme park ride accidents. The original dataset comes from "Saferparks", an organization that reports and collects data about theme park ride accidents in the US. The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it into the Third Normal Form (3NF) of Database. I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset. Queries are expressed in SPARQL, and charts are generated with Plotly Express. """) st.markdown("---") st.markdown("## Dataset Resources:") st.markdown(""" Saferparks Original Dataset: https://ridesdatabase.org/saferparks/data/ Saferparks Dataset Description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf """)