cmpatino commited on
Commit
b7b95ef
1 Parent(s): 39ac7ff

Include ROC-AUC table and improve 2D plots

Browse files
Files changed (1) hide show
  1. app.py +76 -34
app.py CHANGED
@@ -8,29 +8,33 @@ from datasets import load_dataset
8
 
9
  import histos
10
 
11
-
12
  dataset = load_dataset("cmpatino/optimal_observables", "train")
13
  dataset_df = dataset["train"].to_pandas()
14
  dataset_df["target"] = dataset_df["target"].map({0: "spin-OFF", 1: "spin-ON"})
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def get_plot(features, n_bins):
18
  plotting_df = dataset_df.copy()
19
  if len(features) == 1:
20
  fig, ax = plt.subplots()
21
  pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features[0]]
22
  neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features[0]]
23
- y_score = np.concatenate([pos_samples, neg_samples], axis=0)
24
- if pos_samples.mean() >= neg_samples.mean():
25
- y_true = np.concatenate(
26
- [np.ones_like(pos_samples), np.zeros_like(neg_samples)], axis=0
27
- )
28
- roc_auc_score = metrics.roc_auc_score(y_true, y_score)
29
- else:
30
- y_true = np.concatenate(
31
- [np.zeros_like(pos_samples), np.ones_like(neg_samples)], axis=0
32
- )
33
- roc_auc_score = metrics.roc_auc_score(y_true, y_score)
34
  values = [
35
  pos_samples,
36
  neg_samples,
@@ -46,35 +50,73 @@ def get_plot(features, n_bins):
46
  )
47
  return fig
48
  if len(features) == 2:
49
- return sns.displot(
50
- plotting_df,
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  x=features[0],
52
  y=features[1],
53
- hue="target",
54
  bins=n_bins,
55
- height=8,
56
- aspect=1,
57
- ).fig
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  with gr.Blocks() as demo:
61
- with gr.Column():
62
- with gr.Row():
63
- features = gr.Dropdown(
64
- choices=dataset_df.columns.to_list(),
65
- label="Feature",
66
- value="m_tt",
67
- multiselect=True,
68
- )
69
- n_bins = gr.Slider(
70
- label="Number of Bins for Histogram",
71
- value=10,
72
- minimum=10,
73
- maximum=100,
74
- step=10,
75
- )
 
76
 
77
- feature_plot = gr.Plot(label="Feature's Plot")
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  features.change(
80
  get_plot,
 
8
 
9
  import histos
10
 
 
11
  dataset = load_dataset("cmpatino/optimal_observables", "train")
12
  dataset_df = dataset["train"].to_pandas()
13
  dataset_df["target"] = dataset_df["target"].map({0: "spin-OFF", 1: "spin-ON"})
14
 
15
 
16
+ def get_roc_auc_scores(pos_samples, neg_samples):
17
+ y_score = np.concatenate([pos_samples, neg_samples], axis=0)
18
+ if pos_samples.mean() >= neg_samples.mean():
19
+ y_true = np.concatenate(
20
+ [np.ones_like(pos_samples), np.zeros_like(neg_samples)], axis=0
21
+ )
22
+ roc_auc_score = metrics.roc_auc_score(y_true, y_score)
23
+ else:
24
+ y_true = np.concatenate(
25
+ [np.zeros_like(pos_samples), np.ones_like(neg_samples)], axis=0
26
+ )
27
+ roc_auc_score = metrics.roc_auc_score(y_true, y_score)
28
+ return roc_auc_score
29
+
30
+
31
  def get_plot(features, n_bins):
32
  plotting_df = dataset_df.copy()
33
  if len(features) == 1:
34
  fig, ax = plt.subplots()
35
  pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features[0]]
36
  neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features[0]]
37
+ roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples)
 
 
 
 
 
 
 
 
 
 
38
  values = [
39
  pos_samples,
40
  neg_samples,
 
50
  )
51
  return fig
52
  if len(features) == 2:
53
+ fig, ax = plt.subplots(ncols=2, figsize=(12, 6))
54
+ pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features]
55
+ neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features]
56
+ x_lims = (
57
+ min(pos_samples[features[0]].min(), neg_samples[features[0]].min()),
58
+ max(pos_samples[features[0]].max(), neg_samples[features[0]].max()),
59
+ )
60
+ y_lims = (
61
+ min(pos_samples[features[1]].min(), neg_samples[features[1]].min()),
62
+ max(pos_samples[features[1]].max(), neg_samples[features[1]].max()),
63
+ )
64
+ ranges = (x_lims, y_lims)
65
+
66
+ sns.histplot(
67
+ pos_samples,
68
  x=features[0],
69
  y=features[1],
 
70
  bins=n_bins,
71
+ ax=ax[0],
72
+ color="C0",
73
+ binrange=ranges,
74
+ )
75
+ sns.histplot(
76
+ neg_samples,
77
+ x=features[0],
78
+ y=features[1],
79
+ bins=n_bins,
80
+ ax=ax[1],
81
+ color="C1",
82
+ binrange=ranges,
83
+ )
84
+ ax[0].set_title("spin-ON")
85
+ ax[1].set_title("spin-OFF")
86
+ return fig
87
 
88
 
89
  with gr.Blocks() as demo:
90
+ with gr.Tab("Plots"):
91
+ with gr.Column():
92
+ with gr.Row():
93
+ features = gr.Dropdown(
94
+ choices=dataset_df.columns.to_list(),
95
+ label="Feature",
96
+ value="m_tt",
97
+ multiselect=True,
98
+ )
99
+ n_bins = gr.Slider(
100
+ label="Number of Bins for Histogram",
101
+ value=10,
102
+ minimum=10,
103
+ maximum=100,
104
+ step=10,
105
+ )
106
 
107
+ feature_plot = gr.Plot(label="Feature's Plot")
108
+ with gr.Tab("ROC-AUC Table"):
109
+ roc_auc_values = []
110
+ for feature in dataset_df.columns.to_list():
111
+ if feature in ["target", "reco_weight"]:
112
+ continue
113
+ pos_samples = dataset_df[dataset_df["target"] == "spin-ON"][feature]
114
+ neg_samples = dataset_df[dataset_df["target"] == "spin-OFF"][feature]
115
+ roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples)
116
+ roc_auc_values.append([feature, roc_auc_score])
117
+ roc_auc_table = gr.Dataframe(
118
+ label="ROC-AUC Table", headers=["Feature", "ROC-AUC"], value=roc_auc_values
119
+ )
120
 
121
  features.change(
122
  get_plot,