File size: 2,277 Bytes
bfef21c
 
 
 
 
 
 
 
 
 
 
 
 
24373d3
158c444
 
 
bfef21c
24373d3
158c444
bfef21c
24373d3
158c444
bfef21c
24373d3
158c444
23c17f5
bfef21c
158c444
bfef21c
 
158c444
bfef21c
 
158c444
bfef21c
 
158c444
 
 
 
bfef21c
158c444
bfef21c
158c444
bfef21c
 
ac6bae5
 
bfef21c
ac6bae5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import pandas as pd
import datasets
import seaborn as sns
import matplotlib.pyplot as plt

df = datasets.load_dataset("merve/supersoaker-failures")
df = df["train"].to_pandas()
df.dropna(axis=0, inplace=True)
df.drop(columns=["id"], inplace=True)

def plot(df):
  plots = []
  plt.scatter(df.measurement_13, df.measurement_15, c = df.failure, alpha=0.5)
  plt.title("Measurement 13 vs 15 with Failure")
  plt.xlabel("Measurement 13")
  plt.ylabel("Measurement 15")
  plt.savefig("scatter.png")
  plt.scatter(df.measurement_10, df.measurement_15, c = df.failure, alpha=0.5)
  plt.title("Measurement 10 vs 15 with Failure")
  plt.savefig("scatter_2.png")
  plt.scatter(df.measurement_14, df.measurement_15, c = df.failure, alpha=0.5)
  plt.title("Measurement 13 vs 15 with Failure")
  plt.savefig("scatter_3.png")
  plt.scatter(df.measurement_16, df.measurement_15, c = df.failure, alpha=0.5)
  plt.title("Measurement 16 vs 15 with Failure")
  plt.savefig("scatter_4.png")
  df['failure'].value_counts().plot(kind='bar')
  plt.title("Number of failed vs successful products")
  plt.savefig("bar.png")
  sns.distplot(df["loading"])
  plt.title("Distribution of Loading Variable")
  plt.savefig("loading_dist.png")
  sns.distplot(df["attribute_3"])
  plt.title("Distribution of Attribute 3")
  plt.savefig("attribute_3.png")
  sns.catplot(x='measurement_3', y='measurement_4', hue='failure', data=df, kind='violin')
  plt.title("Violin Plot of Measurement 3 vs Measurement 4 with Failures")
  plt.xlabel("Measurement 3")
  plt.ylabel("Measurement 4")
  plt.savefig("violinplot.png")
  sns.heatmap(df.select_dtypes(include="number").corr())
  plt.title("Correlation Between Numerical Variables")
  plt.savefig("corr.png")
  plots = ["corr.png","scatter.png", "scatter_2.png", "scatter_3.png", "scatter_4.png", "bar.png", "loading_dist.png", "attribute_3.png", "violinplot.png"]
  return plots
  
inputs = [gr.Dataframe(label="Supersoaker Production Data")]
outputs = [gr.Gallery(label="Profiling Dashboard").style(grid=(3,3))]

gr.Interface(plot, inputs=inputs, outputs=outputs, examples=[df.head(100)], title="Supersoaker Failures Analysis Dashboard", description="This is a data analysis & visualization dashboard based on supersoaker failures data.").launch()