Spaces:

EdBianchi
/

ThemeParksAccidents_RDF-SPARQL

Runtime error

App Files Files Community

EdBianchi commited on Aug 29, 2022

Commit

32e5892

•

1 Parent(s): 7398f8e

Create app.py

Browse files

Files changed (1) hide show

app.py +261 -0

app.py ADDED Viewed

	@@ -0,0 +1,261 @@

+# IMPORTING TOOLS
+import streamlit as st
+from rdflib import Graph
+import pandas as pd
+import plotly.express as px
+import numpy as np
+# SET PAGE SETTINGS
+st.set_page_config(page_title='Amusement Accidents')
+# CACHED METHOD TO LOAD THE RDF
+@st.cache(persist=True)
+def importRDF(filename, format):
+    graph = Graph().parse(filename, format)
+    return graph
+# IMPORTING THE RDF
+with st.spinner('Loading all the stuffs...'):
+    graph = importRDF("rdf-dataset.ttl", "ttl")
+# MOTHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME
+def sparql_results_to_df(results):
+    return pd.DataFrame(
+        data=([None if x is None else x.toPython() for x in row] for row in results),
+        columns=[str(x) for x in results.vars],
+    )
+# METHOD TO EXECUTE A QUERY (and return a pandas dataframe)
+def computeQuery(query, executor):
+    result = executor.query(query)
+    res_df = sparql_results_to_df(result)
+    return res_df
+# PROCESSING & DISPLAY
+def display():
+    with st.container():
+        st.write("#### What are the months with the highest number of accidents?")
+        res = computeQuery(query_5, graph)
+        fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True")
+        fig.update_xaxes(type="category")
+        fig.update_yaxes(showticklabels=False)
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_5, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### Which cities have recorded the most accidents?")
+        res = computeQuery(query_8, graph)
+        fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"],
+                color="count",
+                color_continuous_scale='tealrose',
+                color_continuous_midpoint=np.average(res['count'], weights=res['count']))
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_8, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### What Are the Most Common Categories of Accidents?")
+        res = computeQuery(query_4, graph)
+        fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"])
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_4, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### What are the Most Dangerous Ride Categories?")
+        res = computeQuery(query_6, graph)
+        fig = px.pie(res, names="amus_cat_name", values="count", hole=.4)
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_6, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### What are the Most Dangerous Ride Types?")
+        res = computeQuery(query_3, graph)
+        fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True)
+        fig.update_xaxes(tickangle=45)
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_3, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### Which manufacturers produced the rides with the most accidents?")
+        res = computeQuery(query_2, graph)
+        fig = px.treemap(res, path=[px.Constant("Manufacturers"), "ride_manuf"], values="count", hover_data=["count"])
+        st.plotly_chart(fig)
+        with st.expander("Show query"):
+            st.code(query_2, language="sparql")
+        st.markdown("---")
+    with st.container():
+        st.write("#### How many people are generally involved in an accident?")
+        res = computeQuery(query_1, graph)
+        fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True)
+        fig.update_xaxes(type="category")
+        st.plotly_chart(fig, use_container_width=True)
+        with st.expander("Show query"):
+            st.code(query_1, language="sparql")
+        st.markdown("---")
+    return None
+# ANALYTICAL QUERIES DEFINITION
+# num of accidents per injured people
+query_1 = """
+    PREFIX r:<http://example.org/ride#>
+    PREFIX a:<http://example.org/accident#>
+    SELECT ?num_inj (COUNT(?num_inj) AS ?count)
+    WHERE {
+        ?acc a:num_injured ?num_inj .
+    }
+    GROUP BY ?num_inj
+    ORDER BY (?num_inj)
+"""
+# manufacturers of the rides subjected to most accidents
+query_2 = """
+    PREFIX acc: <http://example.org/accident#>
+    PREFIX ride: <http://example.org/ride#>
+    SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count)
+    WHERE {
+        ?instance acc:ref-ride_id ?ride_id .
+        ?ride_id ride:manufacturer ?ride_manuf
+    }
+    GROUP BY ?ride_manuf
+    ORDER BY DESC(?count)
+"""
+# Top n types of rides most subjected to accidents
+query_3 = """
+    PREFIX ride_type: <http://example.org/ride_type#>
+    PREFIX acc: <http://example.org/accident#>
+    PREFIX ride: <http://example.org/ride#>
+    SELECT ?type_name (COUNT(?type_name) AS ?count)
+    WHERE {
+        ?instance acc:ref-ride_id ?ride_id .
+        ?ride_id ride:ref-ride_type_id ?type_id .
+        ?type_id ride_type:type ?type_name .
+    }
+    GROUP BY ?type_name
+    ORDER BY DESC(?count)
+    LIMIT 7
+"""
+# Top 6 categories of rides most subjected to accidents
+query_6 = """
+    PREFIX amusement_cat: <http://example.org/amusement_category#>
+    PREFIX ride_type: <http://example.org/ride_type#>
+    PREFIX acc: <http://example.org/accident#>
+    PREFIX ride: <http://example.org/ride#>
+    SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count)
+    WHERE {
+        ?instance acc:ref-ride_id ?ride_id .
+        ?ride_id ride:ref-ride_type_id ?type_id .
+        ?type_id ride_type:ref-amusement_category_id ?amus_cat_id .
+        ?amus_cat_id amusement_cat:amusement_category ?amus_cat_name .
+    }
+    GROUP BY ?amus_cat_name
+    ORDER BY DESC(?count)
+    LIMIT 6
+"""
+# most common categories of accidents
+query_4 = """
+    PREFIX acc_cat: <http://example.org/accident_category#>
+    PREFIX acc: <http://example.org/accident#>
+    SELECT ?category_name (COUNT(?category_name) AS ?count)
+    WHERE {
+        ?instance acc:ref-accident_category_id ?category_id .
+        ?category_id acc_cat:accident_category ?category_name .
+    }
+    GROUP BY ?category_name
+    ORDER BY DESC(?count)
+"""
+# months with the ngher num of accidents
+query_5 = """
+    PREFIX acc: <http://example.org/accident#>
+    SELECT ?mon (COUNT(?mon) AS ?count)
+    WHERE {
+        ?instance acc:date ?date .
+    }
+    GROUP BY (month(?date) AS ?mon)
+    ORDER BY (?mon)
+"""
+# cities with the higher num of accidents
+query_8 = """
+    PREFIX location: <http://example.org/location#>
+    PREFIX acc: <http://example.org/accident#>
+    SELECT ?city (COUNT(?city) AS ?count) ?state
+    WHERE {
+        ?instance acc:ref-location_id ?location_id .
+        ?location_id location:city ?city ;
+                     location:state ?state
+    }
+    GROUP BY ?city
+    ORDER BY DESC(?count)
+"""
+# TITLE
+st.header("Theme Parks Rides Accidents")
+st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year. Children, families and teenagers ready to spend days of adrenaline and fun.
+    Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?**
+    Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""")
+st.markdown("---")
+display()
+# WRITE & RUN YOUR OWN QUERY
+st.write("#### Write & Run your Custom Query")
+pers_query = st.text_area('', """
+    PREFIX ride:<http://example.org/ride#>
+    SELECT ?name
+    WHERE {
+        ?ride ride:manufacturer "Vekoma" ;
+              ride:name ?name
+    }
+""", height=200)
+with st.container():
+    res = computeQuery(pers_query, graph)
+    st.dataframe(res)
+    st.markdown("---")
+# SIDEBAR
+with st.sidebar:
+    st.write("""
+        This App propose some visualization about theme parks rides accidents. The original dataset comes from "Saferparks", an organization that reports and collects
+        data about theme parks rides accidents, in the US.
+        The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it in the Third Normal Form (3NF)
+        of Database.
+        I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset.
+        """)
+    st.markdown("---")
+    st.markdown("## Resources:")
+    st.markdown("""
+        Saferparks dataset: https://ridesdatabase.org/saferparks/data/
+        Saferparks dataset description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf
+        Ontop Tool: https://ontop-vkg.org
+        Ontop Tool on GitHub: https://github.com/ontop/ontop
+        """)