| [ |
| { |
| "benchmark":"EASI", |
| "model_name":"abrocitinib 200 mg QD", |
| "score":86.6, |
| "metric":"EASI-50", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Combination therapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50":86.6 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Upadacitinib", |
| "score":85.6, |
| "metric":"EASI percent reduction", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"The efficacy and safety of upadacitinib treatment for moderate to severe atopic dermatitis in real‐world practice in Japan", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.16549", |
| "source_year":2022.0, |
| "source_type":"citation", |
| "notes":"Median percent reduction in EASI at week 12 with Upadacitinib 15 mg\/day plus topical corticosteroids.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "Time":"Week 12", |
| "EASI percent reduction":"85.6%" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"upadacitinib 30 mg once daily (QD)", |
| "score":83.6, |
| "metric":"EASI-50 response", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Monotherapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50 response":83.6 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"dupilumab 300 mg Q2W", |
| "score":82.4, |
| "metric":"EASI-50", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Combination therapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50":82.4 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"abrocitinib 100 mg QD", |
| "score":79.7, |
| "metric":"EASI-50", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Combination therapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50":79.7 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Human", |
| "score":79.2, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"79.2", |
| "SITE [57]":"67.5", |
| "MMSI [68]":"97.2", |
| "OmniSpatial [23]":"92.63", |
| "MindCube ∗ [69]":"94.55", |
| "STARE [32]":"96.50", |
| "CoreCognition [33]":"86.98", |
| "SpatialViz [55]":"82.46" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"abrocitinib 200 mg QD", |
| "score":74.6, |
| "metric":"EASI-50 response", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Monotherapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50 response":74.6 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Upadacitinib", |
| "score":73.6, |
| "metric":"EASI percent reduction", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"The efficacy and safety of upadacitinib treatment for moderate to severe atopic dermatitis in real‐world practice in Japan", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.16549", |
| "source_year":2022.0, |
| "source_type":"citation", |
| "notes":"Median percent reduction in EASI at week 4 with Upadacitinib 15 mg\/day plus topical corticosteroids.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "Time":"Week 4", |
| "EASI percent reduction":"73.6%" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"upadacitinib 15 mg QD", |
| "score":70.5, |
| "metric":"EASI-50 response", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Monotherapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50 response":70.5 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Upadacitinib", |
| "score":67.7, |
| "metric":"EASI 75 achievement rate", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"The efficacy and safety of upadacitinib treatment for moderate to severe atopic dermatitis in real‐world practice in Japan", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.16549", |
| "source_year":2022.0, |
| "source_type":"citation", |
| "notes":"Achievement rate of EASI 75 at week 12 with Upadacitinib 15 mg\/day plus topical corticosteroids.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "Time":"Week 12", |
| "EASI 75 achievement rate":"67.7%" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"dupilumab 300 mg every 2 weeks (Q2W)", |
| "score":63.4, |
| "metric":"EASI-50 response", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Monotherapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50 response":63.4 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Qwen3-8B-Instruct [65]", |
| "score":57.9, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"† indicates cases where generations were truncated due to overlong chains of thought, yielding no final answer; such instances are counted as incorrect, which depresses the score.", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"57.90", |
| "SITE [57]":"45.83", |
| "MMSI [68]":"31.10", |
| "OmniSpatial [23]":"45.73", |
| "MindCube ∗ [69]":"29.42", |
| "STARE [32]":"39.76", |
| "CoreCognition [33]":"69.67", |
| "SpatialViz [55]":"17.54 †" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"abrocitinib 100 mg QD", |
| "score":56.7, |
| "metric":"EASI-50 response", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Comparative efficacy and safety of systemic therapies used in moderate‐to‐severe atopic dermatitis: a systematic literature review and network meta‐analysis", |
| "source_url":"https:\/\/doi.org\/10.1111\/jdv.17351", |
| "source_year":2021.0, |
| "source_type":"citation", |
| "notes":"Monotherapy RCTs", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "EASI-50 response":56.7 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"InternVL3.5-8B [56]", |
| "score":56.05, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"56.05", |
| "SITE [57]":"43.79", |
| "MMSI [68]":"27.30", |
| "OmniSpatial [23]":"46.71", |
| "MindCube ∗ [69]":"42.50", |
| "STARE [32]":"40.18", |
| "CoreCognition [33]":"66.40", |
| "SpatialViz [55]":"23.98" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"GPT-5-2025-08-07 [45]", |
| "score":55.03, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"55.03", |
| "SITE [57]":"61.88", |
| "MMSI [68]":"41.80", |
| "OmniSpatial [23]":"59.90", |
| "MindCube ∗ [69]":"56.30", |
| "STARE [32]":"54.59", |
| "CoreCognition [33]":"84.37", |
| "SpatialViz [55]":"51.27" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Gemini-2.5-pro-2025-06 [52]", |
| "score":53.57, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"53.57", |
| "SITE [57]":"57.06", |
| "MMSI [68]":"38.00", |
| "OmniSpatial [23]":"55.38", |
| "MindCube ∗ [69]":"57.60", |
| "STARE [32]":"49.14", |
| "CoreCognition [33]":"76.70", |
| "SpatialViz [55]":"42.71" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Upadacitinib", |
| "score":51.6, |
| "metric":"EASI 75 achievement rate", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"The efficacy and safety of upadacitinib treatment for moderate to severe atopic dermatitis in real‐world practice in Japan", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.16549", |
| "source_year":2022.0, |
| "source_type":"citation", |
| "notes":"Achievement rate of EASI 75 at week 4 with Upadacitinib 15 mg\/day plus topical corticosteroids.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "Time":"Week 4", |
| "EASI 75 achievement rate":"51.6%" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"high dose hUCB-MSC", |
| "score":50.0, |
| "metric":"EASI score reduction (%)", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Clinical Trial of Human Umbilical Cord Blood-Derived Stem Cells for the Treatment of Moderate-to-Severe Atopic Dermatitis: Phase I\/IIa Studies", |
| "source_url":"https:\/\/doi.org\/10.1002\/stem.2401", |
| "source_year":2016.0, |
| "source_type":"citation", |
| "notes":"Fifty-five percent of patients in high dose hUCB-MSC-treated group showed a 50% reduction in the EASI score.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "EASI score reduction (%)":50 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"EASI 50", |
| "score":50.0, |
| "metric":"percentage MIC", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"What are the best endpoints for Eczema Area and Severity Index and Scoring Atopic Dermatitis in clinical practice? A prospective observational study*", |
| "source_url":"https:\/\/doi.org\/10.1111\/bjd.19457", |
| "source_year":2020.0, |
| "source_type":"citation", |
| "notes":"Meaningful percentage MIC regardless of baseline AD severity.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "percentage MIC":50 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Seed-1.6-2025-06-15 [51]", |
| "score":49.91, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"49.91", |
| "SITE [57]":"54.61", |
| "MMSI [68]":"38.30", |
| "OmniSpatial [23]":"49.32", |
| "MindCube ∗ [69]":"48.75", |
| "STARE [32]":"46.06", |
| "CoreCognition [33]":"77.17", |
| "SpatialViz [55]":"34.58" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"GPT-5-mini-2025-08-07 [45]", |
| "score":48.67, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"48.67", |
| "SITE [57]":"52.47", |
| "MMSI [68]":"34.10", |
| "OmniSpatial [23]":"55.52", |
| "MindCube ∗ [69]":"56.69", |
| "STARE [32]":"52.51", |
| "CoreCognition [33]":"77.77", |
| "SpatialViz [55]":"44.66" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Grok-4-2025-07-09 [62]", |
| "score":47.92, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"† indicates cases where generations were truncated due to overlong chains of thought, yielding no final answer; such instances are counted as incorrect, which depresses the score.", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"47.92", |
| "SITE [57]":"47.01", |
| "MMSI [68]":"37.80", |
| "OmniSpatial [23]":"46.84", |
| "MindCube ∗ [69]":"63.56", |
| "STARE [32]":"26.90", |
| "CoreCognition [33]":"79.27", |
| "SpatialViz [55]":"19.40 †" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"InternVL3-78B [79]", |
| "score":47.55, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"47.55", |
| "SITE [57]":"52.72", |
| "MMSI [68]":"30.50", |
| "OmniSpatial [23]":"50.95", |
| "MindCube ∗ [69]":"49.52", |
| "STARE [32]":"42.00", |
| "CoreCognition [33]":"71.16", |
| "SpatialViz [55]":"31.10" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"GPT-5-nano-2025-08-07 [45]", |
| "score":43.22, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"43.22", |
| "SITE [57]":"35.81", |
| "MMSI [68]":"28.90", |
| "OmniSpatial [23]":"47.81", |
| "MindCube ∗ [69]":"41.48", |
| "STARE [32]":"46.05", |
| "CoreCognition [33]":"67.92", |
| "SpatialViz [55]":"35.59" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"InternVL3-8B [79]", |
| "score":42.14, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"42.14", |
| "SITE [57]":"41.15", |
| "MMSI [68]":"28.00", |
| "OmniSpatial [23]":"46.25", |
| "MindCube ∗ [69]":"41.54", |
| "STARE [32]":"41.36", |
| "CoreCognition [33]":"60.92", |
| "SpatialViz [55]":"30.00" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Qwen2.5-VL-72B-Instruct [1]", |
| "score":35.77, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"35.77", |
| "SITE [57]":"47.41", |
| "MMSI [68]":"32.50", |
| "OmniSpatial [23]":"47.81", |
| "MindCube ∗ [69]":"42.40", |
| "STARE [32]":"38.37", |
| "CoreCognition [33]":"69.22", |
| "SpatialViz [55]":"32.54" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Random Choice", |
| "score":34.0, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"VSI random choice here is chance level(Frequency).", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"34.00", |
| "SITE [57]":"0.0", |
| "MMSI [68]":"25.00", |
| "OmniSpatial [23]":"24.98", |
| "MindCube ∗ [69]":"32.35", |
| "STARE [32]":"34.80", |
| "CoreCognition [33]":"33.93", |
| "SpatialViz [55]":"25.08" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Qwen2.5-VL-7B-Instruct [1]", |
| "score":32.3, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"32.30", |
| "SITE [57]":"37.64", |
| "MMSI [68]":"26.80", |
| "OmniSpatial [23]":"39.07", |
| "MindCube ∗ [69]":"36.05", |
| "STARE [32]":"35.03", |
| "CoreCognition [33]":"62.16", |
| "SpatialViz [55]":"26.78" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Easi-CRISPR", |
| "score":30.0, |
| "metric":"efficiency", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Easi-CRISPR for creating knock-in and conditional knockout mouse models using long ssDNA donors", |
| "source_url":"https:\/\/doi.org\/10.1038\/nprot.2017.153", |
| "source_year":2017.0, |
| "source_type":"citation", |
| "notes":"Typically 30-60% efficiency, reaching as high as 100% in some cases.", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "efficiency":"30–60%" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"Qwen2.5-VL-3B-Instruct [1]", |
| "score":27.0, |
| "metric":"MRA, Acc", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"EASI", |
| "source_url":"https:\/\/arxiv.org\/pdf\/2508.13142.pdf", |
| "source_year":null, |
| "source_type":"seed", |
| "notes":"None", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "VSI [66]":"27.00", |
| "SITE [57]":"33.14", |
| "MMSI [68]":"28.60", |
| "OmniSpatial [23]":"42.47", |
| "MindCube ∗ [69]":"37.60", |
| "STARE [32]":"37.83", |
| "CoreCognition [33]":"60.19", |
| "SpatialViz [55]":"21.86" |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"EASI", |
| "score":0.47, |
| "metric":"sensitivity", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Development and Validation of a Tool to Improve Physician Identification of Elder Abuse: The Elder Abuse Suspicion Index (EASI)©", |
| "source_url":"https:\/\/doi.org\/10.1080\/08946560801973168", |
| "source_year":2008.0, |
| "source_type":"citation", |
| "notes":"The EASI had an estimated sensitivity and specificity of 0.47 and 0.75", |
| "extraction_confidence":1.0, |
| "additional_metrics":{ |
| "specificity":0.75 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"placebo", |
| "score":-1.3, |
| "metric":"median EASI", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Phase 2a, randomized, double‐blind, placebo‐controlled, multicenter, parallel‐group study of a H<sub>4<\/sub>R‐antagonist (<scp>JNJ<\/scp>‐39758979) in<scp>J<\/scp>apanese adults with moderate atopic dermatitis", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.12726", |
| "source_year":2014.0, |
| "source_type":"citation", |
| "notes":"at week 6", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "median EASI":-1.3 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"JNJ-39758979 300 mg", |
| "score":-3.0, |
| "metric":"median EASI", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Phase 2a, randomized, double‐blind, placebo‐controlled, multicenter, parallel‐group study of a H<sub>4<\/sub>R‐antagonist (<scp>JNJ<\/scp>‐39758979) in<scp>J<\/scp>apanese adults with moderate atopic dermatitis", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.12726", |
| "source_year":2014.0, |
| "source_type":"citation", |
| "notes":"at week 6", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "median EASI":-3.0 |
| } |
| }, |
| { |
| "benchmark":"EASI", |
| "model_name":"JNJ-39758979 100 mg", |
| "score":-3.7, |
| "metric":"median EASI", |
| "rank":null, |
| "task":"overall", |
| "split":"benchmark", |
| "source_title":"Phase 2a, randomized, double‐blind, placebo‐controlled, multicenter, parallel‐group study of a H<sub>4<\/sub>R‐antagonist (<scp>JNJ<\/scp>‐39758979) in<scp>J<\/scp>apanese adults with moderate atopic dermatitis", |
| "source_url":"https:\/\/doi.org\/10.1111\/1346-8138.12726", |
| "source_year":2014.0, |
| "source_type":"citation", |
| "notes":"at week 6", |
| "extraction_confidence":0.9, |
| "additional_metrics":{ |
| "median EASI":-3.7 |
| } |
| } |
| ] |