evijit HF staff commited on
Commit
6301ef2
1 Parent(s): a82a162

Upload 5 files

Browse files
app.py CHANGED
@@ -99,6 +99,153 @@ def create_category_summary(category_data):
99
  html += "</div>"
100
  return html
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def get_coverage_class(has_feature):
103
  """Return CSS class based on feature presence"""
104
  return 'covered' if has_feature else 'not-covered'
@@ -271,35 +418,32 @@ def create_category_chart(selected_models, selected_categories):
271
 
272
  def update_detailed_scorecard(model, selected_categories):
273
  if not model:
274
- return [
275
- gr.update(value="Please select a model to view details.", visible=True),
276
- gr.update(visible=False),
277
- gr.update(visible=False)
278
- ]
279
-
280
- print("Selected categories:", selected_categories)
281
- print("Available categories in model:", list(models[model]['scores'].keys()))
282
 
283
- # Sort categories before processing
284
  selected_categories = sort_categories(selected_categories)
285
  metadata_html = create_metadata_card(models[model]['metadata'])
 
286
 
287
-
288
- # metadata_md = f"## Model Metadata for {model}\n\n"
289
- # for key, value in models[model]['metadata'].items():
290
- # metadata_md += f"**{key}:** {value}\n\n"
291
 
292
  total_yes = 0
293
  total_no = 0
294
  total_na = 0
 
295
 
 
296
  all_cards_content = "<div class='container'>"
297
  for category_name in selected_categories:
298
  if category_name in models[model]['scores']:
299
  category_data = models[model]['scores'][category_name]
300
  card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
301
-
302
- # Add summary section at the top of each card
303
  card_content += create_category_summary(category_data)
304
 
305
  # Sort sections within each category
@@ -312,6 +456,8 @@ def update_detailed_scorecard(model, selected_categories):
312
 
313
  for section, details in sorted_sections:
314
  status = details['status']
 
 
315
  sources = details.get('sources', [])
316
  questions = details.get('questions', {})
317
 
@@ -374,23 +520,26 @@ def update_detailed_scorecard(model, selected_categories):
374
 
375
  if category_yes + category_no > 0:
376
  category_score = category_yes / (category_yes + category_no) * 100
377
- card_content += f"<div class='category-score'>Category Score: {category_score:.2f}% (Yes: {category_yes}, No: {category_no}, N/A: {category_na})</div>"
378
  elif category_na > 0:
379
- card_content += f"<div class='category-score'>Category Score: N/A (All {category_na} items not applicable)</div>"
380
 
381
  card_content += "</div>"
382
  all_cards_content += card_content
383
 
384
  all_cards_content += "</div>"
385
 
386
- if total_yes + total_no > 0:
 
 
 
387
  total_score = total_yes / (total_yes + total_no) * 100
388
  total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
389
  else:
390
  total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
391
 
392
  return [
393
- gr.update(value=metadata_html, visible=True),
394
  gr.update(value=all_cards_content, visible=True),
395
  gr.update(value=total_score_md, visible=True)
396
  ]
@@ -823,6 +972,130 @@ css = """
823
  color: #999;
824
  border-color: #444;
825
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826
  """
827
 
828
  first_model = next(iter(models.values()))
 
99
  html += "</div>"
100
  return html
101
 
102
+ def create_overall_summary(model_data, selected_categories):
103
+ """Create a comprehensive summary of all categories"""
104
+ scores = model_data['scores']
105
+
106
+ # Initialize counters
107
+ total_sections = 0
108
+ completed_sections = 0
109
+ na_sections = 0
110
+ total_questions = 0
111
+ completed_questions = 0
112
+
113
+ # Track evaluation types across all categories
114
+ evaluation_types = {
115
+ 'human': 0,
116
+ 'quantitative': 0,
117
+ 'documentation': 0,
118
+ 'monitoring': 0,
119
+ 'transparency': 0
120
+ }
121
+
122
+ # Calculate completion rates for categories
123
+ category_completion = {}
124
+
125
+ # Process all categories
126
+ for category, category_data in scores.items():
127
+ if category not in selected_categories:
128
+ continue # Skip unselected categories
129
+
130
+ category_questions = 0
131
+ category_completed = 0
132
+ category_na = 0
133
+ total_sections_in_category = len(category_data)
134
+ na_sections_in_category = sum(1 for section in category_data.values() if section['status'] == 'N/A')
135
+
136
+ for section in category_data.values():
137
+ total_sections += 1
138
+ if section['status'] == 'Yes':
139
+ completed_sections += 1
140
+ elif section['status'] == 'N/A':
141
+ na_sections += 1
142
+ category_na += 1
143
+
144
+ if section['status'] != 'N/A':
145
+ questions = section.get('questions', {})
146
+ section_total = len(questions)
147
+ section_completed = sum(1 for q in questions.values() if q)
148
+
149
+ total_questions += section_total
150
+ completed_questions += section_completed
151
+ category_questions += section_total
152
+ category_completed += section_completed
153
+
154
+ # Check for evaluation types
155
+ for question in questions.keys():
156
+ if 'human' in question.lower():
157
+ evaluation_types['human'] += 1
158
+ if any(term in question.lower() for term in ['quantitative', 'metric', 'benchmark']):
159
+ evaluation_types['quantitative'] += 1
160
+ if 'documentation' in question.lower():
161
+ evaluation_types['documentation'] += 1
162
+ if 'monitoring' in question.lower():
163
+ evaluation_types['monitoring'] += 1
164
+ if 'transparency' in question.lower():
165
+ evaluation_types['transparency'] += 1
166
+
167
+ # Store category information
168
+ is_na = na_sections_in_category == total_sections_in_category
169
+ completion_rate = (category_completed / category_questions * 100) if category_questions > 0 and not is_na else 0
170
+
171
+ category_completion[category] = {
172
+ 'completion_rate': completion_rate,
173
+ 'is_na': is_na
174
+ }
175
+
176
+ # Create summary HTML
177
+ html = "<div class='card overall-summary-card'>"
178
+ html += "<div class='card-title'>📊 Overall Model Evaluation Summary</div>"
179
+
180
+ # Key metrics section
181
+ html += "<div class='summary-grid'>"
182
+
183
+ # Overall completion metrics
184
+ html += "<div class='summary-section'>"
185
+ html += "<div class='summary-subtitle'>📈 Overall Completion</div>"
186
+ completion_rate = (completed_questions / total_questions * 100) if total_questions > 0 else 0
187
+ html += f"<div class='metric-row'><span class='metric-label'>Overall Completion Rate:</span> <span class='metric-value'>{completion_rate:.1f}%</span></div>"
188
+ html += f"<div class='metric-row'><span class='metric-label'>Sections Completed:</span> <span class='metric-value'>{completed_sections}/{total_sections}</span></div>"
189
+ html += f"<div class='metric-row'><span class='metric-label'>Questions Completed:</span> <span class='metric-value'>{completed_questions}/{total_questions}</span></div>"
190
+ html += "</div>"
191
+
192
+ # Evaluation coverage
193
+ html += "<div class='summary-section'>"
194
+ html += "<div class='summary-subtitle'>🎯 Evaluation Types Coverage</div>"
195
+ html += "<div class='coverage-grid'>"
196
+ for eval_type, count in evaluation_types.items():
197
+ icon = {
198
+ 'human': '👥',
199
+ 'quantitative': '📊',
200
+ 'documentation': '📝',
201
+ 'monitoring': '📡',
202
+ 'transparency': '🔍'
203
+ }.get(eval_type, '❓')
204
+ has_coverage = count > 0
205
+ html += f"<div class='coverage-item {get_coverage_class(has_coverage)}'>{icon} {eval_type.title()}</div>"
206
+ html += "</div>"
207
+ html += "</div>"
208
+
209
+ html += "</div>" # End summary-grid
210
+
211
+ # Category breakdown
212
+ html += "<div class='summary-section'>"
213
+ html += "<div class='summary-subtitle'>📋 Category Completion Breakdown</div>"
214
+ html += "<div class='category-completion-grid'>"
215
+
216
+ # Sort and filter categories
217
+ sorted_categories = [cat for cat in sort_categories(scores.keys()) if cat in selected_categories]
218
+
219
+ for category in sorted_categories:
220
+ info = category_completion[category]
221
+ category_name = category.split('. ', 1)[1] if '. ' in category else category
222
+ # remove last word from category_name
223
+ category_name = ' '.join(category_name.split(' ')[:-1])
224
+
225
+ # Determine display text and style
226
+ if info['is_na']:
227
+ completion_text = "N/A"
228
+ bar_width = "0"
229
+ style_class = "na"
230
+ else:
231
+ completion_text = f"{info['completion_rate']:.1f}%"
232
+ bar_width = f"{info['completion_rate']}"
233
+ style_class = "active"
234
+
235
+ html += f"""
236
+ <div class='category-completion-item'>
237
+ <div class='category-name'>{category_name}</div>
238
+ <div class='completion-bar-container {style_class}'>
239
+ <div class='completion-bar' style='width: {bar_width}%;'></div>
240
+ <span class='completion-text'>{completion_text}</span>
241
+ </div>
242
+ </div>
243
+ """
244
+
245
+ html += "</div></div>"
246
+ html += "</div>" # End overall-summary-card
247
+ return html
248
+
249
  def get_coverage_class(has_feature):
250
  """Return CSS class based on feature presence"""
251
  return 'covered' if has_feature else 'not-covered'
 
418
 
419
  def update_detailed_scorecard(model, selected_categories):
420
  if not model:
421
+ return [
422
+ gr.update(value="Please select a model to view details.", visible=True),
423
+ gr.update(visible=False),
424
+ gr.update(visible=False)
425
+ ]
 
 
 
426
 
 
427
  selected_categories = sort_categories(selected_categories)
428
  metadata_html = create_metadata_card(models[model]['metadata'])
429
+ overall_summary_html = create_overall_summary(models[model], selected_categories)
430
 
431
+ # Combine metadata and overall summary
432
+ combined_header = metadata_html + overall_summary_html
 
 
433
 
434
  total_yes = 0
435
  total_no = 0
436
  total_na = 0
437
+ has_non_na = False
438
 
439
+ # Create category cards
440
  all_cards_content = "<div class='container'>"
441
  for category_name in selected_categories:
442
  if category_name in models[model]['scores']:
443
  category_data = models[model]['scores'][category_name]
444
  card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
445
+
446
+ # Add category-specific summary at the top of each card
447
  card_content += create_category_summary(category_data)
448
 
449
  # Sort sections within each category
 
456
 
457
  for section, details in sorted_sections:
458
  status = details['status']
459
+ if status != 'N/A':
460
+ has_non_na = True
461
  sources = details.get('sources', [])
462
  questions = details.get('questions', {})
463
 
 
520
 
521
  if category_yes + category_no > 0:
522
  category_score = category_yes / (category_yes + category_no) * 100
523
+ card_content += f"<div class='category-score'>Completion Score Breakdown: {category_score:.2f}% Yes: {category_yes}, No: {category_no}, N/A: {category_na}</div>"
524
  elif category_na > 0:
525
+ card_content += f"<div class='category-score'>Completion Score Breakdown: N/A (All {category_na} items not applicable)</div>"
526
 
527
  card_content += "</div>"
528
  all_cards_content += card_content
529
 
530
  all_cards_content += "</div>"
531
 
532
+ # Create total score
533
+ if not has_non_na:
534
+ total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
535
+ elif total_yes + total_no > 0:
536
  total_score = total_yes / (total_yes + total_no) * 100
537
  total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
538
  else:
539
  total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
540
 
541
  return [
542
+ gr.update(value=combined_header, visible=True),
543
  gr.update(value=all_cards_content, visible=True),
544
  gr.update(value=total_score_md, visible=True)
545
  ]
 
972
  color: #999;
973
  border-color: #444;
974
  }
975
+
976
+ .overall-summary-card {
977
+ width: 100% !important;
978
+ margin-bottom: 30px;
979
+ }
980
+
981
+ .summary-grid {
982
+ display: grid;
983
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
984
+ gap: 20px;
985
+ margin-bottom: 20px;
986
+ }
987
+
988
+ .category-completion-grid {
989
+ display: grid;
990
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
991
+ gap: 16px;
992
+ margin-top: 12px;
993
+ }
994
+
995
+ .category-completion-item {
996
+ background-color: #f8f9fa;
997
+ border-radius: 8px;
998
+ padding: 12px;
999
+ }
1000
+
1001
+ .category-name {
1002
+ font-size: 0.9em;
1003
+ font-weight: 500;
1004
+ margin-bottom: 8px;
1005
+ color: #555;
1006
+ }
1007
+
1008
+ .completion-bar-container {
1009
+ height: 24px;
1010
+ background-color: #eee;
1011
+ border-radius: 12px;
1012
+ position: relative;
1013
+ overflow: hidden;
1014
+ }
1015
+
1016
+ .completion-bar {
1017
+ height: 100%;
1018
+ background-color: #4CAF50;
1019
+ transition: width 0.3s ease;
1020
+ }
1021
+
1022
+ .completion-text {
1023
+ position: absolute;
1024
+ right: 8px;
1025
+ top: 50%;
1026
+ transform: translateY(-50%);
1027
+ font-size: 0.8em;
1028
+ font-weight: 600;
1029
+ color: #333;
1030
+ }
1031
+
1032
+ .dark .category-completion-item {
1033
+ background-color: #2a2a2a;
1034
+ }
1035
+
1036
+ .dark .category-name {
1037
+ color: #ccc;
1038
+ }
1039
+
1040
+ .dark .completion-bar-container {
1041
+ background-color: #333;
1042
+ }
1043
+
1044
+ .dark .completion-bar {
1045
+ background-color: #2e7d32;
1046
+ }
1047
+
1048
+ .dark .completion-text {
1049
+ color: #fff;
1050
+ }
1051
+ .completion-bar-container.not-selected {
1052
+ opacity: 0.5;
1053
+ background-color: #f0f0f0;
1054
+ }
1055
+
1056
+ .completion-bar-container.na {
1057
+ background-color: #f0f0f0;
1058
+ }
1059
+
1060
+ .completion-bar-container.na .completion-bar {
1061
+ background-color: #999;
1062
+ width: 0 !important; /* Ensure no bar shows for N/A */
1063
+ }
1064
+
1065
+ .dark .completion-bar-container.na {
1066
+ background-color: #2d2d2d;
1067
+ }
1068
+
1069
+ .dark .completion-bar-container.na .completion-bar {
1070
+ background-color: #666;
1071
+ }
1072
+
1073
+ .category-completion-item {
1074
+ background-color: #f8f9fa;
1075
+ border-radius: 8px;
1076
+ padding: 12px;
1077
+ height: 80px;
1078
+ display: grid;
1079
+ grid-template-rows: 1fr auto;
1080
+ gap: 8px;
1081
+ }
1082
+
1083
+ .category-name {
1084
+ font-size: 0.9em;
1085
+ font-weight: 500;
1086
+ color: #555;
1087
+ align-self: start;
1088
+ line-height: 1.3;
1089
+ }
1090
+
1091
+ .completion-bar-container {
1092
+ height: 24px;
1093
+ background-color: #eee;
1094
+ border-radius: 12px;
1095
+ position: relative;
1096
+ overflow: hidden;
1097
+ align-self: end;
1098
+ }
1099
  """
1100
 
1101
  first_model = next(iter(models.values()))
model_data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
model_data/model_a_data.json CHANGED
@@ -144,7 +144,7 @@
144
  }
145
  }
146
  },
147
- "3. Disparate Performance": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],
 
144
  }
145
  }
146
  },
147
+ "3. Disparate Performance Evaluation": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],
model_data/model_b_data.json CHANGED
@@ -144,7 +144,7 @@
144
  }
145
  }
146
  },
147
- "3. Disparate Performance": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],
 
144
  }
145
  }
146
  },
147
+ "3. Disparate Performance Evaluation": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],
model_data/model_c_data.json CHANGED
@@ -144,7 +144,7 @@
144
  }
145
  }
146
  },
147
- "3. Disparate Performance": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],
 
144
  }
145
  }
146
  },
147
+ "3. Disparate Performance Evaluation": {
148
  "3.1 Disparate Performance Overview": {
149
  "status": "N/A",
150
  "sources": [],