Spaces:
Running
Running
Upload 5 files
Browse files- app.py +292 -19
- model_data/.DS_Store +0 -0
- model_data/model_a_data.json +1 -1
- model_data/model_b_data.json +1 -1
- model_data/model_c_data.json +1 -1
app.py
CHANGED
@@ -99,6 +99,153 @@ def create_category_summary(category_data):
|
|
99 |
html += "</div>"
|
100 |
return html
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
def get_coverage_class(has_feature):
|
103 |
"""Return CSS class based on feature presence"""
|
104 |
return 'covered' if has_feature else 'not-covered'
|
@@ -271,35 +418,32 @@ def create_category_chart(selected_models, selected_categories):
|
|
271 |
|
272 |
def update_detailed_scorecard(model, selected_categories):
|
273 |
if not model:
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
print("Selected categories:", selected_categories)
|
281 |
-
print("Available categories in model:", list(models[model]['scores'].keys()))
|
282 |
|
283 |
-
# Sort categories before processing
|
284 |
selected_categories = sort_categories(selected_categories)
|
285 |
metadata_html = create_metadata_card(models[model]['metadata'])
|
|
|
286 |
|
287 |
-
|
288 |
-
|
289 |
-
# for key, value in models[model]['metadata'].items():
|
290 |
-
# metadata_md += f"**{key}:** {value}\n\n"
|
291 |
|
292 |
total_yes = 0
|
293 |
total_no = 0
|
294 |
total_na = 0
|
|
|
295 |
|
|
|
296 |
all_cards_content = "<div class='container'>"
|
297 |
for category_name in selected_categories:
|
298 |
if category_name in models[model]['scores']:
|
299 |
category_data = models[model]['scores'][category_name]
|
300 |
card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
|
301 |
-
|
302 |
-
# Add summary
|
303 |
card_content += create_category_summary(category_data)
|
304 |
|
305 |
# Sort sections within each category
|
@@ -312,6 +456,8 @@ def update_detailed_scorecard(model, selected_categories):
|
|
312 |
|
313 |
for section, details in sorted_sections:
|
314 |
status = details['status']
|
|
|
|
|
315 |
sources = details.get('sources', [])
|
316 |
questions = details.get('questions', {})
|
317 |
|
@@ -374,23 +520,26 @@ def update_detailed_scorecard(model, selected_categories):
|
|
374 |
|
375 |
if category_yes + category_no > 0:
|
376 |
category_score = category_yes / (category_yes + category_no) * 100
|
377 |
-
card_content += f"<div class='category-score'>
|
378 |
elif category_na > 0:
|
379 |
-
card_content += f"<div class='category-score'>
|
380 |
|
381 |
card_content += "</div>"
|
382 |
all_cards_content += card_content
|
383 |
|
384 |
all_cards_content += "</div>"
|
385 |
|
386 |
-
|
|
|
|
|
|
|
387 |
total_score = total_yes / (total_yes + total_no) * 100
|
388 |
total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
|
389 |
else:
|
390 |
total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
|
391 |
|
392 |
return [
|
393 |
-
gr.update(value=
|
394 |
gr.update(value=all_cards_content, visible=True),
|
395 |
gr.update(value=total_score_md, visible=True)
|
396 |
]
|
@@ -823,6 +972,130 @@ css = """
|
|
823 |
color: #999;
|
824 |
border-color: #444;
|
825 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
826 |
"""
|
827 |
|
828 |
first_model = next(iter(models.values()))
|
|
|
99 |
html += "</div>"
|
100 |
return html
|
101 |
|
102 |
+
def create_overall_summary(model_data, selected_categories):
|
103 |
+
"""Create a comprehensive summary of all categories"""
|
104 |
+
scores = model_data['scores']
|
105 |
+
|
106 |
+
# Initialize counters
|
107 |
+
total_sections = 0
|
108 |
+
completed_sections = 0
|
109 |
+
na_sections = 0
|
110 |
+
total_questions = 0
|
111 |
+
completed_questions = 0
|
112 |
+
|
113 |
+
# Track evaluation types across all categories
|
114 |
+
evaluation_types = {
|
115 |
+
'human': 0,
|
116 |
+
'quantitative': 0,
|
117 |
+
'documentation': 0,
|
118 |
+
'monitoring': 0,
|
119 |
+
'transparency': 0
|
120 |
+
}
|
121 |
+
|
122 |
+
# Calculate completion rates for categories
|
123 |
+
category_completion = {}
|
124 |
+
|
125 |
+
# Process all categories
|
126 |
+
for category, category_data in scores.items():
|
127 |
+
if category not in selected_categories:
|
128 |
+
continue # Skip unselected categories
|
129 |
+
|
130 |
+
category_questions = 0
|
131 |
+
category_completed = 0
|
132 |
+
category_na = 0
|
133 |
+
total_sections_in_category = len(category_data)
|
134 |
+
na_sections_in_category = sum(1 for section in category_data.values() if section['status'] == 'N/A')
|
135 |
+
|
136 |
+
for section in category_data.values():
|
137 |
+
total_sections += 1
|
138 |
+
if section['status'] == 'Yes':
|
139 |
+
completed_sections += 1
|
140 |
+
elif section['status'] == 'N/A':
|
141 |
+
na_sections += 1
|
142 |
+
category_na += 1
|
143 |
+
|
144 |
+
if section['status'] != 'N/A':
|
145 |
+
questions = section.get('questions', {})
|
146 |
+
section_total = len(questions)
|
147 |
+
section_completed = sum(1 for q in questions.values() if q)
|
148 |
+
|
149 |
+
total_questions += section_total
|
150 |
+
completed_questions += section_completed
|
151 |
+
category_questions += section_total
|
152 |
+
category_completed += section_completed
|
153 |
+
|
154 |
+
# Check for evaluation types
|
155 |
+
for question in questions.keys():
|
156 |
+
if 'human' in question.lower():
|
157 |
+
evaluation_types['human'] += 1
|
158 |
+
if any(term in question.lower() for term in ['quantitative', 'metric', 'benchmark']):
|
159 |
+
evaluation_types['quantitative'] += 1
|
160 |
+
if 'documentation' in question.lower():
|
161 |
+
evaluation_types['documentation'] += 1
|
162 |
+
if 'monitoring' in question.lower():
|
163 |
+
evaluation_types['monitoring'] += 1
|
164 |
+
if 'transparency' in question.lower():
|
165 |
+
evaluation_types['transparency'] += 1
|
166 |
+
|
167 |
+
# Store category information
|
168 |
+
is_na = na_sections_in_category == total_sections_in_category
|
169 |
+
completion_rate = (category_completed / category_questions * 100) if category_questions > 0 and not is_na else 0
|
170 |
+
|
171 |
+
category_completion[category] = {
|
172 |
+
'completion_rate': completion_rate,
|
173 |
+
'is_na': is_na
|
174 |
+
}
|
175 |
+
|
176 |
+
# Create summary HTML
|
177 |
+
html = "<div class='card overall-summary-card'>"
|
178 |
+
html += "<div class='card-title'>📊 Overall Model Evaluation Summary</div>"
|
179 |
+
|
180 |
+
# Key metrics section
|
181 |
+
html += "<div class='summary-grid'>"
|
182 |
+
|
183 |
+
# Overall completion metrics
|
184 |
+
html += "<div class='summary-section'>"
|
185 |
+
html += "<div class='summary-subtitle'>📈 Overall Completion</div>"
|
186 |
+
completion_rate = (completed_questions / total_questions * 100) if total_questions > 0 else 0
|
187 |
+
html += f"<div class='metric-row'><span class='metric-label'>Overall Completion Rate:</span> <span class='metric-value'>{completion_rate:.1f}%</span></div>"
|
188 |
+
html += f"<div class='metric-row'><span class='metric-label'>Sections Completed:</span> <span class='metric-value'>{completed_sections}/{total_sections}</span></div>"
|
189 |
+
html += f"<div class='metric-row'><span class='metric-label'>Questions Completed:</span> <span class='metric-value'>{completed_questions}/{total_questions}</span></div>"
|
190 |
+
html += "</div>"
|
191 |
+
|
192 |
+
# Evaluation coverage
|
193 |
+
html += "<div class='summary-section'>"
|
194 |
+
html += "<div class='summary-subtitle'>🎯 Evaluation Types Coverage</div>"
|
195 |
+
html += "<div class='coverage-grid'>"
|
196 |
+
for eval_type, count in evaluation_types.items():
|
197 |
+
icon = {
|
198 |
+
'human': '👥',
|
199 |
+
'quantitative': '📊',
|
200 |
+
'documentation': '📝',
|
201 |
+
'monitoring': '📡',
|
202 |
+
'transparency': '🔍'
|
203 |
+
}.get(eval_type, '❓')
|
204 |
+
has_coverage = count > 0
|
205 |
+
html += f"<div class='coverage-item {get_coverage_class(has_coverage)}'>{icon} {eval_type.title()}</div>"
|
206 |
+
html += "</div>"
|
207 |
+
html += "</div>"
|
208 |
+
|
209 |
+
html += "</div>" # End summary-grid
|
210 |
+
|
211 |
+
# Category breakdown
|
212 |
+
html += "<div class='summary-section'>"
|
213 |
+
html += "<div class='summary-subtitle'>📋 Category Completion Breakdown</div>"
|
214 |
+
html += "<div class='category-completion-grid'>"
|
215 |
+
|
216 |
+
# Sort and filter categories
|
217 |
+
sorted_categories = [cat for cat in sort_categories(scores.keys()) if cat in selected_categories]
|
218 |
+
|
219 |
+
for category in sorted_categories:
|
220 |
+
info = category_completion[category]
|
221 |
+
category_name = category.split('. ', 1)[1] if '. ' in category else category
|
222 |
+
# remove last word from category_name
|
223 |
+
category_name = ' '.join(category_name.split(' ')[:-1])
|
224 |
+
|
225 |
+
# Determine display text and style
|
226 |
+
if info['is_na']:
|
227 |
+
completion_text = "N/A"
|
228 |
+
bar_width = "0"
|
229 |
+
style_class = "na"
|
230 |
+
else:
|
231 |
+
completion_text = f"{info['completion_rate']:.1f}%"
|
232 |
+
bar_width = f"{info['completion_rate']}"
|
233 |
+
style_class = "active"
|
234 |
+
|
235 |
+
html += f"""
|
236 |
+
<div class='category-completion-item'>
|
237 |
+
<div class='category-name'>{category_name}</div>
|
238 |
+
<div class='completion-bar-container {style_class}'>
|
239 |
+
<div class='completion-bar' style='width: {bar_width}%;'></div>
|
240 |
+
<span class='completion-text'>{completion_text}</span>
|
241 |
+
</div>
|
242 |
+
</div>
|
243 |
+
"""
|
244 |
+
|
245 |
+
html += "</div></div>"
|
246 |
+
html += "</div>" # End overall-summary-card
|
247 |
+
return html
|
248 |
+
|
249 |
def get_coverage_class(has_feature):
|
250 |
"""Return CSS class based on feature presence"""
|
251 |
return 'covered' if has_feature else 'not-covered'
|
|
|
418 |
|
419 |
def update_detailed_scorecard(model, selected_categories):
|
420 |
if not model:
|
421 |
+
return [
|
422 |
+
gr.update(value="Please select a model to view details.", visible=True),
|
423 |
+
gr.update(visible=False),
|
424 |
+
gr.update(visible=False)
|
425 |
+
]
|
|
|
|
|
|
|
426 |
|
|
|
427 |
selected_categories = sort_categories(selected_categories)
|
428 |
metadata_html = create_metadata_card(models[model]['metadata'])
|
429 |
+
overall_summary_html = create_overall_summary(models[model], selected_categories)
|
430 |
|
431 |
+
# Combine metadata and overall summary
|
432 |
+
combined_header = metadata_html + overall_summary_html
|
|
|
|
|
433 |
|
434 |
total_yes = 0
|
435 |
total_no = 0
|
436 |
total_na = 0
|
437 |
+
has_non_na = False
|
438 |
|
439 |
+
# Create category cards
|
440 |
all_cards_content = "<div class='container'>"
|
441 |
for category_name in selected_categories:
|
442 |
if category_name in models[model]['scores']:
|
443 |
category_data = models[model]['scores'][category_name]
|
444 |
card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
|
445 |
+
|
446 |
+
# Add category-specific summary at the top of each card
|
447 |
card_content += create_category_summary(category_data)
|
448 |
|
449 |
# Sort sections within each category
|
|
|
456 |
|
457 |
for section, details in sorted_sections:
|
458 |
status = details['status']
|
459 |
+
if status != 'N/A':
|
460 |
+
has_non_na = True
|
461 |
sources = details.get('sources', [])
|
462 |
questions = details.get('questions', {})
|
463 |
|
|
|
520 |
|
521 |
if category_yes + category_no > 0:
|
522 |
category_score = category_yes / (category_yes + category_no) * 100
|
523 |
+
card_content += f"<div class='category-score'>Completion Score Breakdown: {category_score:.2f}% Yes: {category_yes}, No: {category_no}, N/A: {category_na}</div>"
|
524 |
elif category_na > 0:
|
525 |
+
card_content += f"<div class='category-score'>Completion Score Breakdown: N/A (All {category_na} items not applicable)</div>"
|
526 |
|
527 |
card_content += "</div>"
|
528 |
all_cards_content += card_content
|
529 |
|
530 |
all_cards_content += "</div>"
|
531 |
|
532 |
+
# Create total score
|
533 |
+
if not has_non_na:
|
534 |
+
total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
|
535 |
+
elif total_yes + total_no > 0:
|
536 |
total_score = total_yes / (total_yes + total_no) * 100
|
537 |
total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
|
538 |
else:
|
539 |
total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
|
540 |
|
541 |
return [
|
542 |
+
gr.update(value=combined_header, visible=True),
|
543 |
gr.update(value=all_cards_content, visible=True),
|
544 |
gr.update(value=total_score_md, visible=True)
|
545 |
]
|
|
|
972 |
color: #999;
|
973 |
border-color: #444;
|
974 |
}
|
975 |
+
|
976 |
+
.overall-summary-card {
|
977 |
+
width: 100% !important;
|
978 |
+
margin-bottom: 30px;
|
979 |
+
}
|
980 |
+
|
981 |
+
.summary-grid {
|
982 |
+
display: grid;
|
983 |
+
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
984 |
+
gap: 20px;
|
985 |
+
margin-bottom: 20px;
|
986 |
+
}
|
987 |
+
|
988 |
+
.category-completion-grid {
|
989 |
+
display: grid;
|
990 |
+
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
991 |
+
gap: 16px;
|
992 |
+
margin-top: 12px;
|
993 |
+
}
|
994 |
+
|
995 |
+
.category-completion-item {
|
996 |
+
background-color: #f8f9fa;
|
997 |
+
border-radius: 8px;
|
998 |
+
padding: 12px;
|
999 |
+
}
|
1000 |
+
|
1001 |
+
.category-name {
|
1002 |
+
font-size: 0.9em;
|
1003 |
+
font-weight: 500;
|
1004 |
+
margin-bottom: 8px;
|
1005 |
+
color: #555;
|
1006 |
+
}
|
1007 |
+
|
1008 |
+
.completion-bar-container {
|
1009 |
+
height: 24px;
|
1010 |
+
background-color: #eee;
|
1011 |
+
border-radius: 12px;
|
1012 |
+
position: relative;
|
1013 |
+
overflow: hidden;
|
1014 |
+
}
|
1015 |
+
|
1016 |
+
.completion-bar {
|
1017 |
+
height: 100%;
|
1018 |
+
background-color: #4CAF50;
|
1019 |
+
transition: width 0.3s ease;
|
1020 |
+
}
|
1021 |
+
|
1022 |
+
.completion-text {
|
1023 |
+
position: absolute;
|
1024 |
+
right: 8px;
|
1025 |
+
top: 50%;
|
1026 |
+
transform: translateY(-50%);
|
1027 |
+
font-size: 0.8em;
|
1028 |
+
font-weight: 600;
|
1029 |
+
color: #333;
|
1030 |
+
}
|
1031 |
+
|
1032 |
+
.dark .category-completion-item {
|
1033 |
+
background-color: #2a2a2a;
|
1034 |
+
}
|
1035 |
+
|
1036 |
+
.dark .category-name {
|
1037 |
+
color: #ccc;
|
1038 |
+
}
|
1039 |
+
|
1040 |
+
.dark .completion-bar-container {
|
1041 |
+
background-color: #333;
|
1042 |
+
}
|
1043 |
+
|
1044 |
+
.dark .completion-bar {
|
1045 |
+
background-color: #2e7d32;
|
1046 |
+
}
|
1047 |
+
|
1048 |
+
.dark .completion-text {
|
1049 |
+
color: #fff;
|
1050 |
+
}
|
1051 |
+
.completion-bar-container.not-selected {
|
1052 |
+
opacity: 0.5;
|
1053 |
+
background-color: #f0f0f0;
|
1054 |
+
}
|
1055 |
+
|
1056 |
+
.completion-bar-container.na {
|
1057 |
+
background-color: #f0f0f0;
|
1058 |
+
}
|
1059 |
+
|
1060 |
+
.completion-bar-container.na .completion-bar {
|
1061 |
+
background-color: #999;
|
1062 |
+
width: 0 !important; /* Ensure no bar shows for N/A */
|
1063 |
+
}
|
1064 |
+
|
1065 |
+
.dark .completion-bar-container.na {
|
1066 |
+
background-color: #2d2d2d;
|
1067 |
+
}
|
1068 |
+
|
1069 |
+
.dark .completion-bar-container.na .completion-bar {
|
1070 |
+
background-color: #666;
|
1071 |
+
}
|
1072 |
+
|
1073 |
+
.category-completion-item {
|
1074 |
+
background-color: #f8f9fa;
|
1075 |
+
border-radius: 8px;
|
1076 |
+
padding: 12px;
|
1077 |
+
height: 80px;
|
1078 |
+
display: grid;
|
1079 |
+
grid-template-rows: 1fr auto;
|
1080 |
+
gap: 8px;
|
1081 |
+
}
|
1082 |
+
|
1083 |
+
.category-name {
|
1084 |
+
font-size: 0.9em;
|
1085 |
+
font-weight: 500;
|
1086 |
+
color: #555;
|
1087 |
+
align-self: start;
|
1088 |
+
line-height: 1.3;
|
1089 |
+
}
|
1090 |
+
|
1091 |
+
.completion-bar-container {
|
1092 |
+
height: 24px;
|
1093 |
+
background-color: #eee;
|
1094 |
+
border-radius: 12px;
|
1095 |
+
position: relative;
|
1096 |
+
overflow: hidden;
|
1097 |
+
align-self: end;
|
1098 |
+
}
|
1099 |
"""
|
1100 |
|
1101 |
first_model = next(iter(models.values()))
|
model_data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
model_data/model_a_data.json
CHANGED
@@ -144,7 +144,7 @@
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
-
"3. Disparate Performance": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
+
"3. Disparate Performance Evaluation": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|
model_data/model_b_data.json
CHANGED
@@ -144,7 +144,7 @@
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
-
"3. Disparate Performance": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
+
"3. Disparate Performance Evaluation": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|
model_data/model_c_data.json
CHANGED
@@ -144,7 +144,7 @@
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
-
"3. Disparate Performance": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|
|
|
144 |
}
|
145 |
}
|
146 |
},
|
147 |
+
"3. Disparate Performance Evaluation": {
|
148 |
"3.1 Disparate Performance Overview": {
|
149 |
"status": "N/A",
|
150 |
"sources": [],
|