Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,10 +25,6 @@ def load_scorecard_templates(directory):
|
|
| 25 |
))
|
| 26 |
return templates
|
| 27 |
|
| 28 |
-
# Load scorecard templates
|
| 29 |
-
scorecard_template = load_scorecard_templates('scorecard_templates')
|
| 30 |
-
|
| 31 |
-
# Function to read JSON files and populate models dictionary
|
| 32 |
def load_models_from_json(directory):
|
| 33 |
models = {}
|
| 34 |
for filename in os.listdir(directory):
|
|
@@ -38,12 +34,197 @@ def load_models_from_json(directory):
|
|
| 38 |
model_name = model_data['metadata']['Name']
|
| 39 |
models[model_name] = model_data
|
| 40 |
|
| 41 |
-
# Sort the models alphabetically by name
|
| 42 |
return OrderedDict(sorted(models.items(), key=lambda x: x[0].lower()))
|
| 43 |
|
| 44 |
-
# Load
|
|
|
|
| 45 |
models = load_models_from_json('model_data')
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
css = """
|
| 48 |
.container {
|
| 49 |
display: flex;
|
|
@@ -75,38 +256,26 @@ css = """
|
|
| 75 |
border-bottom: 2px solid #e0e0e0;
|
| 76 |
padding-bottom: 10px;
|
| 77 |
}
|
| 78 |
-
.
|
| 79 |
-
margin
|
| 80 |
-
padding: 15px;
|
| 81 |
-
border-radius: 5px;
|
| 82 |
}
|
| 83 |
-
.
|
| 84 |
-
margin
|
| 85 |
-
|
|
|
|
|
|
|
| 86 |
}
|
| 87 |
-
.question-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
background-color: #e6ffe6;
|
| 89 |
}
|
| 90 |
-
.question-
|
| 91 |
background-color: #ffe6e6;
|
| 92 |
}
|
| 93 |
-
.question-na {
|
| 94 |
-
background-color: #fffde6;
|
| 95 |
-
}
|
| 96 |
-
.status {
|
| 97 |
-
font-weight: bold;
|
| 98 |
-
}
|
| 99 |
-
details {
|
| 100 |
-
margin-top: 10px;
|
| 101 |
-
}
|
| 102 |
-
summary {
|
| 103 |
-
cursor: pointer;
|
| 104 |
-
color: #3498db;
|
| 105 |
-
font-weight: bold;
|
| 106 |
-
}
|
| 107 |
-
summary:hover {
|
| 108 |
-
text-decoration: underline;
|
| 109 |
-
}
|
| 110 |
.category-score, .total-score {
|
| 111 |
background-color: #f0f8ff;
|
| 112 |
border: 1px solid #b0d4ff;
|
|
@@ -139,8 +308,11 @@ summary:hover {
|
|
| 139 |
background-color: #f2f2f2;
|
| 140 |
font-weight: bold;
|
| 141 |
}
|
| 142 |
-
.
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
| 144 |
}
|
| 145 |
@media (max-width: 768px) {
|
| 146 |
.card {
|
|
@@ -148,288 +320,72 @@ summary:hover {
|
|
| 148 |
}
|
| 149 |
}
|
| 150 |
.dark {
|
| 151 |
-
/* General styles */
|
| 152 |
background-color: #1a1a1a;
|
| 153 |
color: #e0e0e0;
|
| 154 |
|
| 155 |
-
/* Card styles */
|
| 156 |
.card {
|
| 157 |
background-color: #2a2a2a;
|
| 158 |
border-color: #444;
|
| 159 |
-
box-shadow: 0 4px 6px rgba(0,0,0,0.3);
|
| 160 |
-
}
|
| 161 |
-
.card:hover {
|
| 162 |
-
box-shadow: 0 6px 8px rgba(0,0,0,0.4);
|
| 163 |
}
|
| 164 |
.card-title {
|
| 165 |
color: #fff;
|
| 166 |
border-bottom-color: #444;
|
| 167 |
}
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
.question {
|
| 171 |
-
background-color: #333;
|
| 172 |
-
}
|
| 173 |
-
.question h3 {
|
| 174 |
-
color: #e0e0e0;
|
| 175 |
}
|
| 176 |
-
.question-
|
| 177 |
background-color: #1a3a1a;
|
| 178 |
-
|
| 179 |
-
/* Make accordion match parent background */
|
| 180 |
-
details {
|
| 181 |
-
background-color: #1a3a1a;
|
| 182 |
-
}
|
| 183 |
}
|
| 184 |
-
.question-
|
| 185 |
background-color: #3a1a1a;
|
| 186 |
-
|
| 187 |
-
/* Make accordion match parent background */
|
| 188 |
-
details {
|
| 189 |
-
background-color: #3a1a1a;
|
| 190 |
-
}
|
| 191 |
}
|
| 192 |
-
.
|
| 193 |
-
background-color: #
|
| 194 |
-
|
| 195 |
-
/* Make accordion match parent background */
|
| 196 |
-
details {
|
| 197 |
-
background-color: #3a3a1a;
|
| 198 |
-
}
|
| 199 |
-
}
|
| 200 |
-
|
| 201 |
-
/* Summary and details styles */
|
| 202 |
-
summary {
|
| 203 |
-
color: #3498db;
|
| 204 |
-
}
|
| 205 |
-
summary:hover {
|
| 206 |
-
color: #5dade2;
|
| 207 |
}
|
| 208 |
-
|
| 209 |
-
/* Score styles */
|
| 210 |
.category-score, .total-score {
|
| 211 |
background-color: #2c3e50;
|
| 212 |
border-color: #34495e;
|
| 213 |
}
|
| 214 |
-
.total-score {
|
| 215 |
-
background-color: #34495e;
|
| 216 |
-
border-color: #2c3e50;
|
| 217 |
-
}
|
| 218 |
-
|
| 219 |
-
/* Leaderboard styles */
|
| 220 |
.leaderboard-table th {
|
| 221 |
background-color: #2c3e50;
|
| 222 |
-
color: #fff;
|
| 223 |
-
}
|
| 224 |
-
.leaderboard-table td {
|
| 225 |
-
border-bottom-color: #444;
|
| 226 |
-
}
|
| 227 |
-
|
| 228 |
-
/* Gradio component styles */
|
| 229 |
-
.gradio-container {
|
| 230 |
-
background-color: #1a1a1a;
|
| 231 |
-
}
|
| 232 |
-
.input-group, .output-group {
|
| 233 |
-
background-color: #2a2a2a;
|
| 234 |
-
}
|
| 235 |
-
input, select, textarea {
|
| 236 |
-
background-color: #333;
|
| 237 |
-
color: #e0e0e0;
|
| 238 |
-
border-color: #444;
|
| 239 |
-
}
|
| 240 |
-
button {
|
| 241 |
-
background-color: #3498db;
|
| 242 |
-
color: #fff;
|
| 243 |
-
}
|
| 244 |
-
button:hover {
|
| 245 |
-
background-color: #2980b9;
|
| 246 |
}
|
| 247 |
}
|
| 248 |
-
"""
|
| 249 |
-
|
| 250 |
-
def create_leaderboard():
|
| 251 |
-
scores = []
|
| 252 |
-
for model, data in models.items():
|
| 253 |
-
total_score = 0
|
| 254 |
-
total_questions = 0
|
| 255 |
-
for category in data['scores']:
|
| 256 |
-
for question, details in data['scores'][category].items():
|
| 257 |
-
if details['status'] == 'Yes':
|
| 258 |
-
total_score += 1
|
| 259 |
-
total_questions += 1
|
| 260 |
-
score_percentage = (total_score / total_questions) * 100 if total_questions > 0 else 0
|
| 261 |
-
scores.append((model, score_percentage))
|
| 262 |
-
|
| 263 |
-
df = pd.DataFrame(scores, columns=['Model', 'Score Percentage'])
|
| 264 |
-
df = df.sort_values('Score Percentage', ascending=False).reset_index(drop=True)
|
| 265 |
-
|
| 266 |
-
html = "<div class='card leaderboard-card'>"
|
| 267 |
-
html += "<div class='card-title'>AI Model Social Impact Leaderboard</div>"
|
| 268 |
-
html += "<table class='leaderboard-table'>"
|
| 269 |
-
html += "<tr><th>Rank</th><th>Model</th><th>Score Percentage</th></tr>"
|
| 270 |
-
for i, (_, row) in enumerate(df.iterrows(), 1):
|
| 271 |
-
html += f"<tr><td>{i}</td><td>{row['Model']}</td><td>{row['Score Percentage']:.2f}%</td></tr>"
|
| 272 |
-
html += "</table></div>"
|
| 273 |
-
|
| 274 |
-
return html
|
| 275 |
-
|
| 276 |
-
def create_category_chart(selected_models, selected_categories):
|
| 277 |
-
if not selected_models:
|
| 278 |
-
return px.bar(title='Please select at least one model for comparison')
|
| 279 |
-
|
| 280 |
-
data = []
|
| 281 |
-
for model in selected_models:
|
| 282 |
-
for category in selected_categories:
|
| 283 |
-
if category in models[model]['scores']:
|
| 284 |
-
total_questions = len(models[model]['scores'][category])
|
| 285 |
-
yes_count = sum(1 for q in models[model]['scores'][category].values() if q['status'] == 'Yes')
|
| 286 |
-
score_percentage = (yes_count / total_questions) * 100 if total_questions > 0 else 0
|
| 287 |
-
data.append({'Model': model, 'Category': category, 'Score Percentage': score_percentage})
|
| 288 |
-
|
| 289 |
-
df = pd.DataFrame(data)
|
| 290 |
-
if df.empty:
|
| 291 |
-
return px.bar(title='No data available for the selected models and categories')
|
| 292 |
-
|
| 293 |
-
fig = px.bar(df, x='Model', y='Score Percentage', color='Category',
|
| 294 |
-
title='AI Model Scores by Category',
|
| 295 |
-
labels={'Score Percentage': 'Score Percentage'},
|
| 296 |
-
category_orders={"Category": selected_categories})
|
| 297 |
-
return fig
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
gr.update(value="Please select a model to view details.", visible=True),
|
| 303 |
-
gr.update(visible=False),
|
| 304 |
-
gr.update(visible=False)
|
| 305 |
-
]
|
| 306 |
-
|
| 307 |
-
metadata_md = f"## Model Metadata for {model}\n\n"
|
| 308 |
-
for key, value in models[model]['metadata'].items():
|
| 309 |
-
metadata_md += f"**{key}:** {value}\n\n"
|
| 310 |
-
|
| 311 |
-
total_yes = 0
|
| 312 |
-
total_no = 0
|
| 313 |
-
total_na = 0
|
| 314 |
-
|
| 315 |
-
all_cards_content = "<div class='container'>"
|
| 316 |
-
for category in scorecard_template:
|
| 317 |
-
if category.name in selected_categories and category.name in models[model]['scores']:
|
| 318 |
-
category_data = models[model]['scores'][category.name]
|
| 319 |
-
card_content = f"<div class='card'><div class='card-title'>{category.name}</div>"
|
| 320 |
-
|
| 321 |
-
category_yes = 0
|
| 322 |
-
category_no = 0
|
| 323 |
-
category_na = 0
|
| 324 |
-
|
| 325 |
-
for question, details in category_data.items():
|
| 326 |
-
status = details['status']
|
| 327 |
-
source = details.get('source', 'N/A')
|
| 328 |
-
|
| 329 |
-
if status == 'Yes':
|
| 330 |
-
bg_class = 'question-yes'
|
| 331 |
-
category_yes += 1
|
| 332 |
-
total_yes += 1
|
| 333 |
-
elif status == 'No':
|
| 334 |
-
bg_class = 'question-no'
|
| 335 |
-
category_no += 1
|
| 336 |
-
total_no += 1
|
| 337 |
-
else:
|
| 338 |
-
bg_class = 'question-na'
|
| 339 |
-
category_na += 1
|
| 340 |
-
total_na += 1
|
| 341 |
-
|
| 342 |
-
card_content += f"<div class='question {bg_class}'>"
|
| 343 |
-
card_content += f"<h3>{question}</h3>\n\n"
|
| 344 |
-
card_content += f"<p><span class='status'>{status}</span></p>\n\n<p><strong>Source:</strong> {source}</p>\n\n"
|
| 345 |
-
|
| 346 |
-
if details.get('applicable_evaluations'):
|
| 347 |
-
card_content += "<details><summary>View Applicable Evaluations</summary>\n\n"
|
| 348 |
-
card_content += "<ul>"
|
| 349 |
-
for eval in details['applicable_evaluations']:
|
| 350 |
-
card_content += f"<li>{eval}</li>"
|
| 351 |
-
card_content += "</ul>\n"
|
| 352 |
-
card_content += "</details>\n\n"
|
| 353 |
-
else:
|
| 354 |
-
card_content += "<details><summary>View Applicable Evaluations</summary>\n\n"
|
| 355 |
-
card_content += "<p>No applicable evaluations.</p>\n"
|
| 356 |
-
card_content += "</details>\n\n"
|
| 357 |
-
|
| 358 |
-
card_content += "</div>"
|
| 359 |
-
|
| 360 |
-
category_score = category_yes / (category_yes + category_no) * 100 if (category_yes + category_no) > 0 else 0
|
| 361 |
-
card_content += f"<div class='category-score'>Category Score: {category_score:.2f}% (Yes: {category_yes}, No: {category_no}, N/A: {category_na})</div>"
|
| 362 |
-
card_content += "</div>"
|
| 363 |
-
all_cards_content += card_content
|
| 364 |
-
|
| 365 |
-
all_cards_content += "</div>"
|
| 366 |
-
|
| 367 |
-
total_score = total_yes / (total_yes + total_no) * 100 if (total_yes + total_no) > 0 else 0
|
| 368 |
-
total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
|
| 369 |
-
|
| 370 |
-
return [
|
| 371 |
-
gr.update(value=metadata_md, visible=True),
|
| 372 |
-
gr.update(value=all_cards_content, visible=True),
|
| 373 |
-
gr.update(value=total_score_md, visible=True)
|
| 374 |
-
]
|
| 375 |
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
model_chooser_visibility = gr.update(visible=False)
|
| 381 |
-
model_multi_chooser_visibility = gr.update(visible=False)
|
| 382 |
-
category_filter_visibility = gr.update(visible=False)
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
gr.update(value=leaderboard_html), gr.update(), gr.update(), gr.update(), gr.update()]
|
| 390 |
-
elif tab == "Category Analysis":
|
| 391 |
-
category_chart_visibility = gr.update(visible=True)
|
| 392 |
-
model_multi_chooser_visibility = gr.update(visible=True)
|
| 393 |
-
category_filter_visibility = gr.update(visible=True)
|
| 394 |
-
category_chart = create_category_chart(selected_models or [], selected_categories)
|
| 395 |
-
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
|
| 396 |
-
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
|
| 397 |
-
gr.update(), gr.update(value=category_chart), gr.update(), gr.update(), gr.update()]
|
| 398 |
-
elif tab == "Detailed Scorecard":
|
| 399 |
-
detailed_scorecard_visibility = gr.update(visible=True)
|
| 400 |
-
model_chooser_visibility = gr.update(visible=True)
|
| 401 |
-
category_filter_visibility = gr.update(visible=True)
|
| 402 |
-
if selected_model:
|
| 403 |
-
scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
|
| 404 |
-
else:
|
| 405 |
-
scorecard_updates = [
|
| 406 |
-
gr.update(value="Please select a model to view details.", visible=True),
|
| 407 |
-
gr.update(visible=False),
|
| 408 |
-
gr.update(visible=False)
|
| 409 |
-
]
|
| 410 |
-
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
|
| 411 |
-
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
|
| 412 |
-
gr.update(), gr.update()] + scorecard_updates
|
| 413 |
|
| 414 |
with gr.Blocks(css=css) as demo:
|
| 415 |
gr.Markdown("# AI Model Social Impact Scorecard Dashboard")
|
| 416 |
|
| 417 |
with gr.Row():
|
| 418 |
tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
|
| 419 |
-
|
| 420 |
|
| 421 |
with gr.Row():
|
| 422 |
-
model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
category_filter = gr.CheckboxGroup(choices=[cat.name for cat in scorecard_template],
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
|
| 434 |
with gr.Column(visible=True) as leaderboard_tab:
|
| 435 |
leaderboard_output = gr.HTML()
|
|
@@ -445,29 +401,82 @@ with gr.Blocks(css=css) as demo:
|
|
| 445 |
# Initialize the dashboard with the leaderboard
|
| 446 |
leaderboard_output.value = create_leaderboard()
|
| 447 |
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
# Launch the app
|
| 473 |
if __name__ == "__main__":
|
|
|
|
| 25 |
))
|
| 26 |
return templates
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def load_models_from_json(directory):
|
| 29 |
models = {}
|
| 30 |
for filename in os.listdir(directory):
|
|
|
|
| 34 |
model_name = model_data['metadata']['Name']
|
| 35 |
models[model_name] = model_data
|
| 36 |
|
|
|
|
| 37 |
return OrderedDict(sorted(models.items(), key=lambda x: x[0].lower()))
|
| 38 |
|
| 39 |
+
# Load templates and models
|
| 40 |
+
scorecard_template = load_scorecard_templates('scorecard_templates')
|
| 41 |
models = load_models_from_json('model_data')
|
| 42 |
|
| 43 |
+
def create_source_html(sources):
|
| 44 |
+
if not sources:
|
| 45 |
+
return ""
|
| 46 |
+
|
| 47 |
+
html = "<div class='sources-list'>"
|
| 48 |
+
for source in sources:
|
| 49 |
+
icon = source.get("type", "")
|
| 50 |
+
detail = source.get("detail", "")
|
| 51 |
+
name = source.get("name", detail)
|
| 52 |
+
|
| 53 |
+
html += f"<div class='source-item'>{icon} "
|
| 54 |
+
if detail.startswith("http"):
|
| 55 |
+
html += f"<a href='{detail}' target='_blank'>{name}</a>"
|
| 56 |
+
else:
|
| 57 |
+
html += name
|
| 58 |
+
html += "</div>"
|
| 59 |
+
html += "</div>"
|
| 60 |
+
return html
|
| 61 |
+
|
| 62 |
+
def create_leaderboard():
|
| 63 |
+
scores = []
|
| 64 |
+
for model, data in models.items():
|
| 65 |
+
total_score = 0
|
| 66 |
+
total_questions = 0
|
| 67 |
+
|
| 68 |
+
for category in data['scores'].values():
|
| 69 |
+
for section in category.values():
|
| 70 |
+
if section['status'] != 'N/A':
|
| 71 |
+
questions = section.get('questions', {})
|
| 72 |
+
total_score += sum(1 for q in questions.values() if q)
|
| 73 |
+
total_questions += len(questions)
|
| 74 |
+
|
| 75 |
+
score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
|
| 76 |
+
scores.append((model, score_percentage))
|
| 77 |
+
|
| 78 |
+
df = pd.DataFrame(scores, columns=['Model', 'Score Percentage'])
|
| 79 |
+
df = df.sort_values('Score Percentage', ascending=False).reset_index(drop=True)
|
| 80 |
+
|
| 81 |
+
html = "<div class='card leaderboard-card'>"
|
| 82 |
+
html += "<div class='card-title'>AI Model Social Impact Leaderboard</div>"
|
| 83 |
+
html += "<table class='leaderboard-table'>"
|
| 84 |
+
html += "<tr><th>Rank</th><th>Model</th><th>Score Percentage</th></tr>"
|
| 85 |
+
for i, (_, row) in enumerate(df.iterrows(), 1):
|
| 86 |
+
html += f"<tr><td>{i}</td><td>{row['Model']}</td><td>{row['Score Percentage']:.2f}%</td></tr>"
|
| 87 |
+
html += "</table></div>"
|
| 88 |
+
|
| 89 |
+
return html
|
| 90 |
+
|
| 91 |
+
def create_category_chart(selected_models, selected_categories):
|
| 92 |
+
if not selected_models:
|
| 93 |
+
return px.bar(title='Please select at least one model for comparison')
|
| 94 |
+
|
| 95 |
+
data = []
|
| 96 |
+
for model in selected_models:
|
| 97 |
+
for category in selected_categories:
|
| 98 |
+
if category in models[model]['scores']:
|
| 99 |
+
total_score = 0
|
| 100 |
+
total_questions = 0
|
| 101 |
+
|
| 102 |
+
for section in models[model]['scores'][category].values():
|
| 103 |
+
if section['status'] != 'N/A':
|
| 104 |
+
questions = section.get('questions', {})
|
| 105 |
+
total_score += sum(1 for q in questions.values() if q)
|
| 106 |
+
total_questions += len(questions)
|
| 107 |
+
|
| 108 |
+
score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
|
| 109 |
+
data.append({
|
| 110 |
+
'Model': model,
|
| 111 |
+
'Category': category,
|
| 112 |
+
'Score Percentage': score_percentage
|
| 113 |
+
})
|
| 114 |
+
|
| 115 |
+
df = pd.DataFrame(data)
|
| 116 |
+
if df.empty:
|
| 117 |
+
return px.bar(title='No data available for the selected models and categories')
|
| 118 |
+
|
| 119 |
+
fig = px.bar(df, x='Model', y='Score Percentage', color='Category',
|
| 120 |
+
title='AI Model Scores by Category',
|
| 121 |
+
labels={'Score Percentage': 'Score Percentage'},
|
| 122 |
+
category_orders={"Category": selected_categories})
|
| 123 |
+
return fig
|
| 124 |
+
|
| 125 |
+
def update_detailed_scorecard(model, selected_categories):
|
| 126 |
+
if not model:
|
| 127 |
+
return [
|
| 128 |
+
gr.update(value="Please select a model to view details.", visible=True),
|
| 129 |
+
gr.update(visible=False),
|
| 130 |
+
gr.update(visible=False)
|
| 131 |
+
]
|
| 132 |
+
|
| 133 |
+
metadata_md = f"## Model Metadata for {model}\n\n"
|
| 134 |
+
for key, value in models[model]['metadata'].items():
|
| 135 |
+
metadata_md += f"**{key}:** {value}\n\n"
|
| 136 |
+
|
| 137 |
+
total_yes = 0
|
| 138 |
+
total_no = 0
|
| 139 |
+
total_na = 0
|
| 140 |
+
|
| 141 |
+
all_cards_content = "<div class='container'>"
|
| 142 |
+
for category_name in selected_categories:
|
| 143 |
+
if category_name in models[model]['scores']:
|
| 144 |
+
category_data = models[model]['scores'][category_name]
|
| 145 |
+
card_content = f"<div class='card'><div class='card-title'>{category_name}</div>"
|
| 146 |
+
|
| 147 |
+
category_yes = 0
|
| 148 |
+
category_no = 0
|
| 149 |
+
category_na = 0
|
| 150 |
+
|
| 151 |
+
for section, details in category_data.items():
|
| 152 |
+
status = details['status']
|
| 153 |
+
sources = details.get('sources', [])
|
| 154 |
+
questions = details.get('questions', {})
|
| 155 |
+
|
| 156 |
+
# Determine section class based on status
|
| 157 |
+
section_class = "section-na" if status == "N/A" else "section-active"
|
| 158 |
+
card_content += f"<div class='section {section_class}'>"
|
| 159 |
+
card_content += f"<h3>{section}</h3>"
|
| 160 |
+
|
| 161 |
+
# Add sources if they exist
|
| 162 |
+
if sources:
|
| 163 |
+
card_content += "<div class='sources-list'>"
|
| 164 |
+
for source in sources:
|
| 165 |
+
icon = source.get("type", "")
|
| 166 |
+
detail = source.get("detail", "")
|
| 167 |
+
name = source.get("name", detail)
|
| 168 |
+
|
| 169 |
+
card_content += f"<div class='source-item'>{icon} "
|
| 170 |
+
if detail.startswith("http"):
|
| 171 |
+
card_content += f"<a href='{detail}' target='_blank'>{name}</a>"
|
| 172 |
+
else:
|
| 173 |
+
card_content += name
|
| 174 |
+
card_content += "</div>"
|
| 175 |
+
card_content += "</div>"
|
| 176 |
+
|
| 177 |
+
# Process questions
|
| 178 |
+
if questions:
|
| 179 |
+
card_content += "<div class='questions'>"
|
| 180 |
+
for question, is_checked in questions.items():
|
| 181 |
+
if status == "N/A":
|
| 182 |
+
style_class = "na"
|
| 183 |
+
icon = "○" # Circle for N/A items
|
| 184 |
+
category_na += 1
|
| 185 |
+
total_na += 1
|
| 186 |
+
else:
|
| 187 |
+
if is_checked:
|
| 188 |
+
style_class = "checked"
|
| 189 |
+
icon = "✓"
|
| 190 |
+
category_yes += 1
|
| 191 |
+
total_yes += 1
|
| 192 |
+
else:
|
| 193 |
+
style_class = "unchecked"
|
| 194 |
+
icon = "✗"
|
| 195 |
+
category_no += 1
|
| 196 |
+
total_no += 1
|
| 197 |
+
|
| 198 |
+
card_content += f"<div class='question-item {style_class}'>{icon} {question}</div>"
|
| 199 |
+
card_content += "</div>"
|
| 200 |
+
|
| 201 |
+
card_content += "</div>" # Close section div
|
| 202 |
+
|
| 203 |
+
# Calculate category score (excluding N/A items)
|
| 204 |
+
if category_yes + category_no > 0:
|
| 205 |
+
category_score = category_yes / (category_yes + category_no) * 100
|
| 206 |
+
card_content += f"<div class='category-score'>Category Score: {category_score:.2f}% (Yes: {category_yes}, No: {category_no}, N/A: {category_na})</div>"
|
| 207 |
+
elif category_na > 0:
|
| 208 |
+
card_content += f"<div class='category-score'>Category Score: N/A (All {category_na} items not applicable)</div>"
|
| 209 |
+
|
| 210 |
+
card_content += "</div>" # Close card div
|
| 211 |
+
all_cards_content += card_content
|
| 212 |
+
|
| 213 |
+
all_cards_content += "</div>"
|
| 214 |
+
|
| 215 |
+
# Calculate total score (excluding N/A items)
|
| 216 |
+
if total_yes + total_no > 0:
|
| 217 |
+
total_score = total_yes / (total_yes + total_no) * 100
|
| 218 |
+
total_score_md = f"<div class='total-score'>Total Score: {total_score:.2f}% (Yes: {total_yes}, No: {total_no}, N/A: {total_na})</div>"
|
| 219 |
+
else:
|
| 220 |
+
total_score_md = "<div class='total-score'>No applicable scores (all items N/A)</div>"
|
| 221 |
+
|
| 222 |
+
return [
|
| 223 |
+
gr.update(value=metadata_md, visible=True),
|
| 224 |
+
gr.update(value=all_cards_content, visible=True),
|
| 225 |
+
gr.update(value=total_score_md, visible=True)
|
| 226 |
+
]
|
| 227 |
+
|
| 228 |
css = """
|
| 229 |
.container {
|
| 230 |
display: flex;
|
|
|
|
| 256 |
border-bottom: 2px solid #e0e0e0;
|
| 257 |
padding-bottom: 10px;
|
| 258 |
}
|
| 259 |
+
.sources-list {
|
| 260 |
+
margin: 10px 0;
|
|
|
|
|
|
|
| 261 |
}
|
| 262 |
+
.source-item {
|
| 263 |
+
margin: 5px 0;
|
| 264 |
+
padding: 5px;
|
| 265 |
+
background-color: #f8f9fa;
|
| 266 |
+
border-radius: 4px;
|
| 267 |
}
|
| 268 |
+
.question-item {
|
| 269 |
+
margin: 5px 0;
|
| 270 |
+
padding: 8px;
|
| 271 |
+
border-radius: 4px;
|
| 272 |
+
}
|
| 273 |
+
.question-item.checked {
|
| 274 |
background-color: #e6ffe6;
|
| 275 |
}
|
| 276 |
+
.question-item.unchecked {
|
| 277 |
background-color: #ffe6e6;
|
| 278 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
.category-score, .total-score {
|
| 280 |
background-color: #f0f8ff;
|
| 281 |
border: 1px solid #b0d4ff;
|
|
|
|
| 308 |
background-color: #f2f2f2;
|
| 309 |
font-weight: bold;
|
| 310 |
}
|
| 311 |
+
.section {
|
| 312 |
+
margin-bottom: 20px;
|
| 313 |
+
padding: 15px;
|
| 314 |
+
border-radius: 5px;
|
| 315 |
+
background-color: #f8f9fa;
|
| 316 |
}
|
| 317 |
@media (max-width: 768px) {
|
| 318 |
.card {
|
|
|
|
| 320 |
}
|
| 321 |
}
|
| 322 |
.dark {
|
|
|
|
| 323 |
background-color: #1a1a1a;
|
| 324 |
color: #e0e0e0;
|
| 325 |
|
|
|
|
| 326 |
.card {
|
| 327 |
background-color: #2a2a2a;
|
| 328 |
border-color: #444;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
}
|
| 330 |
.card-title {
|
| 331 |
color: #fff;
|
| 332 |
border-bottom-color: #444;
|
| 333 |
}
|
| 334 |
+
.source-item {
|
| 335 |
+
background-color: #2a2a2a;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
}
|
| 337 |
+
.question-item.checked {
|
| 338 |
background-color: #1a3a1a;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
}
|
| 340 |
+
.question-item.unchecked {
|
| 341 |
background-color: #3a1a1a;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
}
|
| 343 |
+
.section {
|
| 344 |
+
background-color: #2a2a2a;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
}
|
|
|
|
|
|
|
| 346 |
.category-score, .total-score {
|
| 347 |
background-color: #2c3e50;
|
| 348 |
border-color: #34495e;
|
| 349 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
.leaderboard-table th {
|
| 351 |
background-color: #2c3e50;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
}
|
| 353 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
|
| 355 |
+
.section-na {
|
| 356 |
+
opacity: 0.6;
|
| 357 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
+
.question-item.na {
|
| 360 |
+
background-color: #f0f0f0;
|
| 361 |
+
color: #666;
|
| 362 |
+
}
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
+
.dark .question-item.na {
|
| 365 |
+
background-color: #2d2d2d;
|
| 366 |
+
color: #999;
|
| 367 |
+
}
|
| 368 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
with gr.Blocks(css=css) as demo:
|
| 371 |
gr.Markdown("# AI Model Social Impact Scorecard Dashboard")
|
| 372 |
|
| 373 |
with gr.Row():
|
| 374 |
tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
|
| 375 |
+
label="Select Tab", value="Leaderboard")
|
| 376 |
|
| 377 |
with gr.Row():
|
| 378 |
+
model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
|
| 379 |
+
label="Select Model for Details",
|
| 380 |
+
value="",
|
| 381 |
+
interactive=True, visible=False)
|
| 382 |
+
model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
|
| 383 |
+
label="Select Models for Comparison",
|
| 384 |
+
multiselect=True, interactive=True, visible=False)
|
| 385 |
+
category_filter = gr.CheckboxGroup(choices=[cat.name for cat in scorecard_template],
|
| 386 |
+
label="Filter Categories",
|
| 387 |
+
value=[cat.name for cat in scorecard_template],
|
| 388 |
+
visible=False)
|
| 389 |
|
| 390 |
with gr.Column(visible=True) as leaderboard_tab:
|
| 391 |
leaderboard_output = gr.HTML()
|
|
|
|
| 401 |
# Initialize the dashboard with the leaderboard
|
| 402 |
leaderboard_output.value = create_leaderboard()
|
| 403 |
|
| 404 |
+
def update_dashboard(tab, selected_models, selected_model, selected_categories):
|
| 405 |
+
leaderboard_visibility = gr.update(visible=False)
|
| 406 |
+
category_chart_visibility = gr.update(visible=False)
|
| 407 |
+
detailed_scorecard_visibility = gr.update(visible=False)
|
| 408 |
+
model_chooser_visibility = gr.update(visible=False)
|
| 409 |
+
model_multi_chooser_visibility = gr.update(visible=False)
|
| 410 |
+
category_filter_visibility = gr.update(visible=False)
|
| 411 |
+
|
| 412 |
+
if tab == "Leaderboard":
|
| 413 |
+
leaderboard_visibility = gr.update(visible=True)
|
| 414 |
+
leaderboard_html = create_leaderboard()
|
| 415 |
+
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
|
| 416 |
+
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
|
| 417 |
+
gr.update(value=leaderboard_html), gr.update(), gr.update(), gr.update(), gr.update()]
|
| 418 |
+
|
| 419 |
+
elif tab == "Category Analysis":
|
| 420 |
+
category_chart_visibility = gr.update(visible=True)
|
| 421 |
+
model_multi_chooser_visibility = gr.update(visible=True)
|
| 422 |
+
category_filter_visibility = gr.update(visible=True)
|
| 423 |
+
category_plot = create_category_chart(selected_models or [], selected_categories)
|
| 424 |
+
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
|
| 425 |
+
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
|
| 426 |
+
gr.update(), gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
|
| 427 |
+
|
| 428 |
+
elif tab == "Detailed Scorecard":
|
| 429 |
+
detailed_scorecard_visibility = gr.update(visible=True)
|
| 430 |
+
model_chooser_visibility = gr.update(visible=True)
|
| 431 |
+
category_filter_visibility = gr.update(visible=True)
|
| 432 |
+
if selected_model:
|
| 433 |
+
scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
|
| 434 |
+
else:
|
| 435 |
+
scorecard_updates = [
|
| 436 |
+
gr.update(value="Please select a model to view details.", visible=True),
|
| 437 |
+
gr.update(visible=False),
|
| 438 |
+
gr.update(visible=False)
|
| 439 |
+
]
|
| 440 |
+
return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
|
| 441 |
+
model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
|
| 442 |
+
gr.update(), gr.update()] + scorecard_updates
|
| 443 |
+
|
| 444 |
+
# Set up event handlers
|
| 445 |
+
tab_selection.change(
|
| 446 |
+
fn=update_dashboard,
|
| 447 |
+
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
|
| 448 |
+
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
|
| 449 |
+
model_chooser, model_multi_chooser, category_filter,
|
| 450 |
+
leaderboard_output, category_chart, model_metadata,
|
| 451 |
+
all_category_cards, total_score]
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
model_chooser.change(
|
| 455 |
+
fn=update_dashboard,
|
| 456 |
+
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
|
| 457 |
+
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
|
| 458 |
+
model_chooser, model_multi_chooser, category_filter,
|
| 459 |
+
leaderboard_output, category_chart, model_metadata,
|
| 460 |
+
all_category_cards, total_score]
|
| 461 |
+
)
|
| 462 |
+
|
| 463 |
+
model_multi_chooser.change(
|
| 464 |
+
fn=update_dashboard,
|
| 465 |
+
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
|
| 466 |
+
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
|
| 467 |
+
model_chooser, model_multi_chooser, category_filter,
|
| 468 |
+
leaderboard_output, category_chart, model_metadata,
|
| 469 |
+
all_category_cards, total_score]
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
category_filter.change(
|
| 473 |
+
fn=update_dashboard,
|
| 474 |
+
inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
|
| 475 |
+
outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
|
| 476 |
+
model_chooser, model_multi_chooser, category_filter,
|
| 477 |
+
leaderboard_output, category_chart, model_metadata,
|
| 478 |
+
all_category_cards, total_score]
|
| 479 |
+
)
|
| 480 |
|
| 481 |
# Launch the app
|
| 482 |
if __name__ == "__main__":
|