Spaces:
Running
Running
const fs = require('fs') | |
const path = require('path') | |
const EVAL_DIR = path.join(__dirname, '..', 'public', 'evaluations') | |
const BENCH = ['A1','A2','A3','A4','A5','A6'] | |
const PROC = ['B1','B2','B3','B4','B5','B6'] | |
function reasonForBenchmark(evalObj, catId, qid) { | |
const modality = (evalObj.modality || '').toLowerCase() | |
if (modality.includes('text') && (catId.includes('vision') || catId.includes('physical') || catId.includes('robotic') || catId.includes('perception'))) { | |
return `${qid}: Not applicable β this evaluation/sample is for a text-only model; visual or physical benchmarks are not relevant.` | |
} | |
// default | |
return `${qid}: Not applicable β benchmark data not provided or not run for this evaluation.` | |
} | |
function reasonForProcess(evalObj, catId, qid) { | |
// give more specific reasons for some questions | |
if (qid === 'B2') return `${qid}: Not applicable β replication package or reproducible artifacts were not published for this sample.` | |
if (qid === 'B4') return `${qid}: Not applicable β figures/uncertainty plots are not included in this report.` | |
if (qid === 'B5') return `${qid}: Not applicable β standards mapping or regulatory alignment not performed for this sample.` | |
if (evalObj.modality && evalObj.modality.toLowerCase().includes('text') && (catId.includes('physical') || catId.includes('robotic'))) { | |
return `${qid}: Not applicable β process documentation for physical/robotic systems not relevant to text-only model.` | |
} | |
return `${qid}: Not applicable β documentation or process evidence not captured for this evaluation.` | |
} | |
function populateFile(filePath) { | |
const raw = fs.readFileSync(filePath, 'utf8') | |
const obj = JSON.parse(raw) | |
let changed = false | |
for (const catId of obj.selectedCategories || []) { | |
obj.categoryEvaluations = obj.categoryEvaluations || {} | |
obj.categoryEvaluations[catId] = obj.categoryEvaluations[catId] || {} | |
const ce = obj.categoryEvaluations[catId] | |
ce.benchmarkAnswers = ce.benchmarkAnswers || {} | |
ce.processAnswers = ce.processAnswers || {} | |
ce.benchmarkSources = ce.benchmarkSources || {} | |
ce.processSources = ce.processSources || {} | |
// Benchmarks | |
for (const q of BENCH) { | |
if (ce.benchmarkAnswers[q] === 'N/A') { | |
const sources = ce.benchmarkSources[q] || [] | |
if (!sources || sources.length === 0 || (sources[0] && (sources[0].description === 'N/A' || sources[0].description === 'Not applicable'))) { | |
ce.benchmarkSources[q] = [ | |
{ | |
url: '', | |
description: reasonForBenchmark(obj, catId, q), | |
sourceType: 'N/A' | |
} | |
] | |
changed = true | |
} | |
} | |
} | |
// Process | |
for (const q of PROC) { | |
if (ce.processAnswers[q] === 'N/A') { | |
const sources = ce.processSources[q] || [] | |
if (!sources || sources.length === 0 || (sources[0] && (sources[0].description === 'N/A' || sources[0].description === 'Not applicable'))) { | |
ce.processSources[q] = [ | |
{ | |
url: '', | |
description: reasonForProcess(obj, catId, q), | |
documentType: 'N/A', | |
scope: reasonForProcess(obj, catId, q) | |
} | |
] | |
changed = true | |
} | |
} | |
} | |
} | |
if (changed) { | |
fs.writeFileSync(filePath, JSON.stringify(obj, null, 2) + '\n') | |
} | |
return changed | |
} | |
const results = [] | |
fs.readdirSync(EVAL_DIR).forEach((file) => { | |
if (!file.endsWith('.json')) return | |
const p = path.join(EVAL_DIR, file) | |
try { | |
const updated = populateFile(p) | |
results.push({ file, updated }) | |
} catch (e) { | |
results.push({ file, error: e.message }) | |
} | |
}) | |
console.table(results) | |