tpierrot commited on
Commit
0ad2349
β€’
1 Parent(s): 561f8a4

feat: organising tasks into categories

Browse files
Files changed (1) hide show
  1. app.py +63 -26
app.py CHANGED
@@ -7,7 +7,32 @@ import pandas as pd
7
  _ORIGINAL_DF = pd.read_csv("./data/benchmark.csv")
8
  _METRICS = ["MCC", "F1", "ACC"]
9
  _AGGREGATION_METHODS = ["mean", "max", "min", "median"]
10
- _DATASETS = list(set(_ORIGINAL_DF["Dataset"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  _BIBTEX = """@article{DallaTorre2023TheNT,
13
  title={The Nucleotide Transformer: Building and Evaluating Robust Foundation Models for Human Genomics},
@@ -34,8 +59,13 @@ def format_number(x):
34
 
35
 
36
  def get_dataset(
37
- tasks: List[str], target_metric: str = "MCC", aggregation_method: str = "mean"
 
 
 
 
38
  ):
 
39
 
40
  aggr_fn = getattr(np, aggregation_method)
41
  scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
@@ -80,8 +110,22 @@ with gr.Blocks() as demo:
80
  )
81
 
82
  with gr.Row():
83
- selected_tasks = gr.CheckboxGroup(
84
- choices=_DATASETS, value=_DATASETS, label="Tasks", info="Downstream tasks."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  )
86
 
87
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
@@ -93,23 +137,6 @@ with gr.Blocks() as demo:
93
  with gr.TabItem("πŸ“ˆ Metrics", elem_id="od-benchmark-tab-table", id=1):
94
  gr.Markdown("Hey hey hey", elem_classes="markdown-text")
95
 
96
- # with gr.TabItem("βœ‰οΈβœ¨ Request a model here!", elem_id="od-benchmark-tab-table",
97
- # id=2):
98
- # with gr.Column():
99
- # gr.Markdown("# βœ‰οΈβœ¨ Request results for a new model here!",
100
- # elem_classes="markdown-text")
101
- # with gr.Column():
102
- # gr.Markdown("Select a dataset:", elem_classes="markdown-text")
103
- # with gr.Column():
104
- # model_name_textbox = gr.Textbox(
105
- # label="Model name (user_name/model_name)")
106
- # chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset",
107
- # visible=False, value=True,
108
- # interactive=False)
109
- # with gr.Column():
110
- # mdw_submission_result = gr.Markdown()
111
- # btn_submitt = gr.Button(value="πŸš€ Request")
112
-
113
  gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
114
 
115
  with gr.Row():
@@ -121,24 +148,34 @@ with gr.Blocks() as demo:
121
  elem_id="citation-button",
122
  ).style(show_copy_button=True)
123
 
124
- selected_tasks.change(
 
 
 
 
 
 
 
 
 
 
125
  get_dataset,
126
- inputs=[selected_tasks, metric_choice, aggr_choice],
127
  outputs=dataframe,
128
  )
129
  metric_choice.change(
130
  get_dataset,
131
- inputs=[selected_tasks, metric_choice, aggr_choice],
132
  outputs=dataframe,
133
  )
134
  aggr_choice.change(
135
  get_dataset,
136
- inputs=[selected_tasks, metric_choice, aggr_choice],
137
  outputs=dataframe,
138
  )
139
  demo.load(
140
  fn=get_dataset,
141
- inputs=[selected_tasks, metric_choice, aggr_choice],
142
  outputs=dataframe,
143
  )
144
 
 
7
  _ORIGINAL_DF = pd.read_csv("./data/benchmark.csv")
8
  _METRICS = ["MCC", "F1", "ACC"]
9
  _AGGREGATION_METHODS = ["mean", "max", "min", "median"]
10
+ _TASKS = {
11
+ "histone_marks": [
12
+ "H4",
13
+ "H3",
14
+ "H3K14ac",
15
+ "H3K4me1",
16
+ "H3K4me3",
17
+ "H3K4me2",
18
+ "H3K36me3",
19
+ "H4ac",
20
+ "H3K79me3",
21
+ "H3K9ac",
22
+ ],
23
+ "regulatory_elements": [
24
+ "promoter_no_tata",
25
+ "enhancers",
26
+ "enhancers_types",
27
+ "promoter_all",
28
+ "promoter_tata",
29
+ ],
30
+ "RNA_production": [
31
+ "splice_sites_donors",
32
+ "splice_sites_all",
33
+ "splice_sites_acceptors",
34
+ ],
35
+ }
36
 
37
  _BIBTEX = """@article{DallaTorre2023TheNT,
38
  title={The Nucleotide Transformer: Building and Evaluating Robust Foundation Models for Human Genomics},
 
59
 
60
 
61
  def get_dataset(
62
+ histone_tasks: List[str],
63
+ regulatory_tasks: List[str],
64
+ rna_tasks: List[str],
65
+ target_metric: str = "MCC",
66
+ aggregation_method: str = "mean",
67
  ):
68
+ tasks = histone_tasks + regulatory_tasks + rna_tasks
69
 
70
  aggr_fn = getattr(np, aggregation_method)
71
  scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
 
110
  )
111
 
112
  with gr.Row():
113
+ regulatory_tasks = gr.CheckboxGroup(
114
+ choices=_TASKS["regulatory_elements"],
115
+ value=_TASKS["regulatory_elements"],
116
+ label="Regulatory Elements Downstream Tasks",
117
+ info="Human data.",
118
+ )
119
+ rna_tasks = gr.CheckboxGroup(
120
+ choices=_TASKS["RNA_production"],
121
+ value=_TASKS["RNA_production"],
122
+ label="RNA Production Downstream tasks.",
123
+ info="Human data.",
124
+ )
125
+ histone_tasks = gr.CheckboxGroup(
126
+ choices=_TASKS["histone_marks"],
127
+ label="Histone Modification Downstream Tasks",
128
+ info="Yeast data.",
129
  )
130
 
131
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
 
137
  with gr.TabItem("πŸ“ˆ Metrics", elem_id="od-benchmark-tab-table", id=1):
138
  gr.Markdown("Hey hey hey", elem_classes="markdown-text")
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
141
 
142
  with gr.Row():
 
148
  elem_id="citation-button",
149
  ).style(show_copy_button=True)
150
 
151
+ histone_tasks.change(
152
+ get_dataset,
153
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
154
+ outputs=dataframe,
155
+ )
156
+ regulatory_tasks.change(
157
+ get_dataset,
158
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
159
+ outputs=dataframe,
160
+ )
161
+ rna_tasks.change(
162
  get_dataset,
163
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
164
  outputs=dataframe,
165
  )
166
  metric_choice.change(
167
  get_dataset,
168
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
169
  outputs=dataframe,
170
  )
171
  aggr_choice.change(
172
  get_dataset,
173
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
174
  outputs=dataframe,
175
  )
176
  demo.load(
177
  fn=get_dataset,
178
+ inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
179
  outputs=dataframe,
180
  )
181