EdBianchi commited on
Commit
32e5892
1 Parent(s): 7398f8e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -0
app.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IMPORTING TOOLS
2
+ import streamlit as st
3
+ from rdflib import Graph
4
+ import pandas as pd
5
+ import plotly.express as px
6
+ import numpy as np
7
+
8
+ # SET PAGE SETTINGS
9
+ st.set_page_config(page_title='Amusement Accidents')
10
+
11
+ # CACHED METHOD TO LOAD THE RDF
12
+ @st.cache(persist=True)
13
+ def importRDF(filename, format):
14
+ graph = Graph().parse(filename, format)
15
+ return graph
16
+
17
+ # IMPORTING THE RDF
18
+ with st.spinner('Loading all the stuffs...'):
19
+ graph = importRDF("rdf-dataset.ttl", "ttl")
20
+
21
+ # MOTHOD TO CONVERT THE QUERY RESULT INTO A DATAFRAME
22
+ def sparql_results_to_df(results):
23
+ return pd.DataFrame(
24
+ data=([None if x is None else x.toPython() for x in row] for row in results),
25
+ columns=[str(x) for x in results.vars],
26
+ )
27
+
28
+ # METHOD TO EXECUTE A QUERY (and return a pandas dataframe)
29
+ def computeQuery(query, executor):
30
+ result = executor.query(query)
31
+ res_df = sparql_results_to_df(result)
32
+ return res_df
33
+
34
+ # PROCESSING & DISPLAY
35
+ def display():
36
+ with st.container():
37
+ st.write("#### What are the months with the highest number of accidents?")
38
+ res = computeQuery(query_5, graph)
39
+ fig = px.bar(res, x="mon", y="count", color="count", labels={"mon":"Month", "count":"Num. of Accidents"}, text_auto="True")
40
+ fig.update_xaxes(type="category")
41
+ fig.update_yaxes(showticklabels=False)
42
+ st.plotly_chart(fig)
43
+ with st.expander("Show query"):
44
+ st.code(query_5, language="sparql")
45
+ st.markdown("---")
46
+
47
+ with st.container():
48
+ st.write("#### Which cities have recorded the most accidents?")
49
+ res = computeQuery(query_8, graph)
50
+ fig = px.treemap(res, path=[px.Constant("U.S"), "state", "city"], values="count", hover_data=["state", "city","count"],
51
+ color="count",
52
+ color_continuous_scale='tealrose',
53
+ color_continuous_midpoint=np.average(res['count'], weights=res['count']))
54
+ st.plotly_chart(fig)
55
+ with st.expander("Show query"):
56
+ st.code(query_8, language="sparql")
57
+ st.markdown("---")
58
+
59
+ with st.container():
60
+ st.write("#### What Are the Most Common Categories of Accidents?")
61
+ res = computeQuery(query_4, graph)
62
+ fig = px.treemap(res, path=[px.Constant("Accident Category"), "category_name"], values="count", hover_data=["category_name","count"])
63
+ st.plotly_chart(fig)
64
+ with st.expander("Show query"):
65
+ st.code(query_4, language="sparql")
66
+ st.markdown("---")
67
+
68
+ with st.container():
69
+ st.write("#### What are the Most Dangerous Ride Categories?")
70
+ res = computeQuery(query_6, graph)
71
+ fig = px.pie(res, names="amus_cat_name", values="count", hole=.4)
72
+ st.plotly_chart(fig)
73
+ with st.expander("Show query"):
74
+ st.code(query_6, language="sparql")
75
+ st.markdown("---")
76
+
77
+ with st.container():
78
+ st.write("#### What are the Most Dangerous Ride Types?")
79
+ res = computeQuery(query_3, graph)
80
+ fig = px.bar(res, x="type_name", y="count", labels={"type_name":"Ride Type", "count":"Num. of Accidents"}, text_auto=True)
81
+ fig.update_xaxes(tickangle=45)
82
+ st.plotly_chart(fig)
83
+ with st.expander("Show query"):
84
+ st.code(query_3, language="sparql")
85
+ st.markdown("---")
86
+
87
+ with st.container():
88
+ st.write("#### Which manufacturers produced the rides with the most accidents?")
89
+ res = computeQuery(query_2, graph)
90
+ fig = px.treemap(res, path=[px.Constant("Manufacturers"), "ride_manuf"], values="count", hover_data=["count"])
91
+ st.plotly_chart(fig)
92
+ with st.expander("Show query"):
93
+ st.code(query_2, language="sparql")
94
+ st.markdown("---")
95
+
96
+ with st.container():
97
+ st.write("#### How many people are generally involved in an accident?")
98
+ res = computeQuery(query_1, graph)
99
+ fig = px.bar(res, x="num_inj", y="count", labels={"num_inj":"Injured People", "count":"Num. of Accidents"}, text_auto=True)
100
+ fig.update_xaxes(type="category")
101
+ st.plotly_chart(fig, use_container_width=True)
102
+ with st.expander("Show query"):
103
+ st.code(query_1, language="sparql")
104
+ st.markdown("---")
105
+
106
+
107
+
108
+ return None
109
+
110
+ # ANALYTICAL QUERIES DEFINITION
111
+ # num of accidents per injured people
112
+ query_1 = """
113
+ PREFIX r:<http://example.org/ride#>
114
+ PREFIX a:<http://example.org/accident#>
115
+
116
+ SELECT ?num_inj (COUNT(?num_inj) AS ?count)
117
+ WHERE {
118
+ ?acc a:num_injured ?num_inj .
119
+ }
120
+ GROUP BY ?num_inj
121
+ ORDER BY (?num_inj)
122
+ """
123
+
124
+ # manufacturers of the rides subjected to most accidents
125
+ query_2 = """
126
+ PREFIX acc: <http://example.org/accident#>
127
+ PREFIX ride: <http://example.org/ride#>
128
+
129
+ SELECT ?ride_manuf (COUNT(?ride_manuf) AS ?count)
130
+ WHERE {
131
+ ?instance acc:ref-ride_id ?ride_id .
132
+ ?ride_id ride:manufacturer ?ride_manuf
133
+ }
134
+ GROUP BY ?ride_manuf
135
+ ORDER BY DESC(?count)
136
+ """
137
+
138
+ # Top n types of rides most subjected to accidents
139
+ query_3 = """
140
+ PREFIX ride_type: <http://example.org/ride_type#>
141
+ PREFIX acc: <http://example.org/accident#>
142
+ PREFIX ride: <http://example.org/ride#>
143
+
144
+ SELECT ?type_name (COUNT(?type_name) AS ?count)
145
+ WHERE {
146
+ ?instance acc:ref-ride_id ?ride_id .
147
+ ?ride_id ride:ref-ride_type_id ?type_id .
148
+ ?type_id ride_type:type ?type_name .
149
+ }
150
+ GROUP BY ?type_name
151
+ ORDER BY DESC(?count)
152
+ LIMIT 7
153
+ """
154
+
155
+ # Top 6 categories of rides most subjected to accidents
156
+ query_6 = """
157
+ PREFIX amusement_cat: <http://example.org/amusement_category#>
158
+ PREFIX ride_type: <http://example.org/ride_type#>
159
+ PREFIX acc: <http://example.org/accident#>
160
+ PREFIX ride: <http://example.org/ride#>
161
+
162
+ SELECT ?amus_cat_name (COUNT(?amus_cat_name) AS ?count)
163
+ WHERE {
164
+ ?instance acc:ref-ride_id ?ride_id .
165
+ ?ride_id ride:ref-ride_type_id ?type_id .
166
+ ?type_id ride_type:ref-amusement_category_id ?amus_cat_id .
167
+ ?amus_cat_id amusement_cat:amusement_category ?amus_cat_name .
168
+ }
169
+ GROUP BY ?amus_cat_name
170
+ ORDER BY DESC(?count)
171
+ LIMIT 6
172
+
173
+ """
174
+
175
+ # most common categories of accidents
176
+ query_4 = """
177
+ PREFIX acc_cat: <http://example.org/accident_category#>
178
+ PREFIX acc: <http://example.org/accident#>
179
+
180
+ SELECT ?category_name (COUNT(?category_name) AS ?count)
181
+ WHERE {
182
+ ?instance acc:ref-accident_category_id ?category_id .
183
+ ?category_id acc_cat:accident_category ?category_name .
184
+ }
185
+ GROUP BY ?category_name
186
+ ORDER BY DESC(?count)
187
+ """
188
+
189
+ # months with the ngher num of accidents
190
+ query_5 = """
191
+ PREFIX acc: <http://example.org/accident#>
192
+
193
+ SELECT ?mon (COUNT(?mon) AS ?count)
194
+ WHERE {
195
+ ?instance acc:date ?date .
196
+ }
197
+ GROUP BY (month(?date) AS ?mon)
198
+ ORDER BY (?mon)
199
+ """
200
+
201
+ # cities with the higher num of accidents
202
+ query_8 = """
203
+ PREFIX location: <http://example.org/location#>
204
+ PREFIX acc: <http://example.org/accident#>
205
+
206
+ SELECT ?city (COUNT(?city) AS ?count) ?state
207
+ WHERE {
208
+ ?instance acc:ref-location_id ?location_id .
209
+ ?location_id location:city ?city ;
210
+ location:state ?state
211
+ }
212
+ GROUP BY ?city
213
+ ORDER BY DESC(?count)
214
+
215
+ """
216
+
217
+
218
+ # TITLE
219
+ st.header("Theme Parks Rides Accidents")
220
+ st.markdown("""There are **thousands of amusement parks** around the world that welcome **millions of visitors** each year. Children, families and teenagers ready to spend days of adrenaline and fun.
221
+ Unfortunately, **accidents sometimes occur**. This raises some questions: **Are amusement parks safe? Which rides are the most accident-prone? What accidents happen most often? At what time of year are accidents most common?**
222
+ Let's try to find out in this **RDF data exploration** using **SPARQL** and **Plotly**.""")
223
+ st.markdown("---")
224
+
225
+ display()
226
+
227
+ # WRITE & RUN YOUR OWN QUERY
228
+ st.write("#### Write & Run your Custom Query")
229
+ pers_query = st.text_area('', """
230
+ PREFIX ride:<http://example.org/ride#>
231
+ SELECT ?name
232
+ WHERE {
233
+ ?ride ride:manufacturer "Vekoma" ;
234
+ ride:name ?name
235
+ }
236
+ """, height=200)
237
+ with st.container():
238
+ res = computeQuery(pers_query, graph)
239
+ st.dataframe(res)
240
+ st.markdown("---")
241
+
242
+ # SIDEBAR
243
+ with st.sidebar:
244
+ st.write("""
245
+ This App propose some visualization about theme parks rides accidents. The original dataset comes from "Saferparks", an organization that reports and collects
246
+ data about theme parks rides accidents, in the US.
247
+ The original dataset covers years from 2010 to 2017 and comes in CSV or Excel format. I used python to split the dataset and convert it in the Third Normal Form (3NF)
248
+ of Database.
249
+ I uploaded the data into a PostgreSQL database and I used the Ontop tool to get the final RDF dataset.
250
+ """)
251
+ st.markdown("---")
252
+ st.markdown("## Resources:")
253
+ st.markdown("""
254
+ Saferparks dataset: https://ridesdatabase.org/saferparks/data/
255
+
256
+ Saferparks dataset description: https://ridesdatabase.org/wp-content/uploads/2020/02/Saferparks-data-description.pdf
257
+
258
+ Ontop Tool: https://ontop-vkg.org
259
+
260
+ Ontop Tool on GitHub: https://github.com/ontop/ontop
261
+ """)