Spaces:
Running
Running
feat (filters): Enable filtering on model & metric comparison views.
Browse files
src/types.ts
CHANGED
@@ -236,20 +236,20 @@ export interface Data extends TileData {
|
|
236 |
}
|
237 |
|
238 |
// ===================================================================================
|
239 |
-
//
|
240 |
// ===================================================================================
|
241 |
-
export interface
|
242 |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] };
|
243 |
filters: { [key: string]: string[] };
|
244 |
-
expression: object;
|
245 |
models: Model[];
|
246 |
-
|
|
|
247 |
metric?: Metric;
|
248 |
allowedValues?: string[];
|
249 |
annotator?: string;
|
250 |
}
|
251 |
|
252 |
-
export interface
|
253 |
records: {
|
254 |
taskId: string;
|
255 |
modelName: string;
|
|
|
236 |
}
|
237 |
|
238 |
// ===================================================================================
|
239 |
+
// FILTERATION WORKER
|
240 |
// ===================================================================================
|
241 |
+
export interface FilterationRequest {
|
242 |
evaluationsPerMetric: { [key: string]: TaskEvaluation[] };
|
243 |
filters: { [key: string]: string[] };
|
|
|
244 |
models: Model[];
|
245 |
+
expression?: object;
|
246 |
+
agreementLevels?: { [key: string]: number | string }[];
|
247 |
metric?: Metric;
|
248 |
allowedValues?: string[];
|
249 |
annotator?: string;
|
250 |
}
|
251 |
|
252 |
+
export interface FilterationResponse {
|
253 |
records: {
|
254 |
taskId: string;
|
255 |
modelName: string;
|
src/views/example/Example.tsx
CHANGED
@@ -338,7 +338,7 @@ export default memo(function Example({ data }: { data: Data }) {
|
|
338 |
evaluationsPerMetric={evaluationsPerMetric}
|
339 |
models={data.models}
|
340 |
metrics={eligibleMetrics}
|
341 |
-
filters={
|
342 |
onTaskSelection={(taskId) => {
|
343 |
setSelectedTaskId(taskId);
|
344 |
}}
|
@@ -353,7 +353,7 @@ export default memo(function Example({ data }: { data: Data }) {
|
|
353 |
evaluationsPerMetric={evaluationsPerMetric}
|
354 |
models={data.models}
|
355 |
metrics={eligibleMetrics}
|
356 |
-
filters={
|
357 |
onTaskSelection={(taskId) => {
|
358 |
setSelectedTaskId(taskId);
|
359 |
}}
|
|
|
338 |
evaluationsPerMetric={evaluationsPerMetric}
|
339 |
models={data.models}
|
340 |
metrics={eligibleMetrics}
|
341 |
+
filters={filters}
|
342 |
onTaskSelection={(taskId) => {
|
343 |
setSelectedTaskId(taskId);
|
344 |
}}
|
|
|
353 |
evaluationsPerMetric={evaluationsPerMetric}
|
354 |
models={data.models}
|
355 |
metrics={eligibleMetrics}
|
356 |
+
filters={filters}
|
357 |
onTaskSelection={(taskId) => {
|
358 |
setSelectedTaskId(taskId);
|
359 |
}}
|
src/views/metric-behavior/MetricBehavior.module.scss
CHANGED
@@ -76,7 +76,7 @@
|
|
76 |
align-items: center;
|
77 |
}
|
78 |
|
79 |
-
.
|
80 |
display: flex;
|
81 |
column-gap: $spacing-02;
|
82 |
}
|
|
|
76 |
align-items: center;
|
77 |
}
|
78 |
|
79 |
+
.graphTitle {
|
80 |
display: flex;
|
81 |
column-gap: $spacing-02;
|
82 |
}
|
src/views/metric-behavior/MetricBehavior.tsx
CHANGED
@@ -353,7 +353,7 @@ export default memo(function MetricBehavior({
|
|
353 |
|
354 |
// Step 2.b: Filter evaluations based on selected models
|
355 |
const filteredEvaluationsPerMetric = useMemo(() => {
|
356 |
-
|
357 |
for (const [metric, evals] of Object.entries(evaluationsPerMetric)) {
|
358 |
filtered[metric] = evals.filter(
|
359 |
(evaluation) =>
|
@@ -700,18 +700,24 @@ export default memo(function MetricBehavior({
|
|
700 |
</div>
|
701 |
) : (
|
702 |
<div className={classes.row}>
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
<HeatmapChart
|
704 |
data={metricToMetricCorrelation}
|
705 |
options={{
|
706 |
// @ts-ignore
|
707 |
axes: {
|
708 |
bottom: {
|
709 |
-
title: '
|
710 |
mapsTo: 'metricA',
|
711 |
scaleType: ScaleTypes.LABELS,
|
712 |
},
|
713 |
left: {
|
714 |
-
title: '
|
715 |
mapsTo: 'metricB',
|
716 |
scaleType: ScaleTypes.LABELS,
|
717 |
},
|
@@ -759,10 +765,15 @@ export default memo(function MetricBehavior({
|
|
759 |
</div>
|
760 |
) : (
|
761 |
<div className={classes.row}>
|
762 |
-
<h4>
|
763 |
-
|
764 |
-
|
765 |
-
|
|
|
|
|
|
|
|
|
|
|
766 |
</h4>
|
767 |
<HeatmapChart
|
768 |
ref={chartRef}
|
|
|
353 |
|
354 |
// Step 2.b: Filter evaluations based on selected models
|
355 |
const filteredEvaluationsPerMetric = useMemo(() => {
|
356 |
+
const filtered: { [key: string]: TaskEvaluation[] } = {};
|
357 |
for (const [metric, evals] of Object.entries(evaluationsPerMetric)) {
|
358 |
filtered[metric] = evals.filter(
|
359 |
(evaluation) =>
|
|
|
700 |
</div>
|
701 |
) : (
|
702 |
<div className={classes.row}>
|
703 |
+
<h4 className={classes.graphTitle}>
|
704 |
+
<strong>Spearman correlation</strong>
|
705 |
+
<span>
|
706 |
+
{`(${Object.values(filteredEvaluationsPerMetric)[0].length ? Object.values(filteredEvaluationsPerMetric)[0].length / (selectedModels ? selectedModels.length : 1) : 0}/${Object.values(evaluationsPerMetric)[0].length / models.length})`}
|
707 |
+
</span>
|
708 |
+
</h4>
|
709 |
<HeatmapChart
|
710 |
data={metricToMetricCorrelation}
|
711 |
options={{
|
712 |
// @ts-ignore
|
713 |
axes: {
|
714 |
bottom: {
|
715 |
+
title: 'Metrics',
|
716 |
mapsTo: 'metricA',
|
717 |
scaleType: ScaleTypes.LABELS,
|
718 |
},
|
719 |
left: {
|
720 |
+
title: 'Metrics',
|
721 |
mapsTo: 'metricB',
|
722 |
scaleType: ScaleTypes.LABELS,
|
723 |
},
|
|
|
765 |
</div>
|
766 |
) : (
|
767 |
<div className={classes.row}>
|
768 |
+
<h4 className={classes.graphTitle}>
|
769 |
+
<strong>
|
770 |
+
% instances with same scores (
|
771 |
+
{extractMetricDisplayName(selectedMetricA)} vs.
|
772 |
+
{extractMetricDisplayName(selectedMetricB)})
|
773 |
+
</strong>
|
774 |
+
<span>
|
775 |
+
{`(${Object.values(filteredEvaluationsPerMetric)[0].length ? Object.values(filteredEvaluationsPerMetric)[0].length / (selectedModels ? selectedModels.length : 1) : 0}/${Object.values(evaluationsPerMetric)[0].length / models.length})`}
|
776 |
+
</span>
|
777 |
</h4>
|
778 |
<HeatmapChart
|
779 |
ref={chartRef}
|
src/views/model-behavior/ModelBehavior.tsx
CHANGED
@@ -38,7 +38,12 @@ import { GroupedBarChart } from '@carbon/charts-react';
|
|
38 |
import { ScaleTypes } from '@carbon/charts';
|
39 |
|
40 |
import { useTheme } from '@/src/theme';
|
41 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
42 |
import {
|
43 |
AgreementLevels,
|
44 |
AgreementLevelDefinitions,
|
@@ -221,7 +226,7 @@ export default function ModelBehavior({
|
|
221 |
);
|
222 |
|
223 |
// Step 2.c.ii: Set up event listener for messages from the worker
|
224 |
-
worker.onmessage = function (event: MessageEvent<
|
225 |
// Step 2.c.ii.*: Copy over response data
|
226 |
const { records, evaluations } = event.data;
|
227 |
|
@@ -337,8 +342,8 @@ export default function ModelBehavior({
|
|
337 |
filterationWorker.postMessage({
|
338 |
evaluationsPerMetric: evaluationsPerMetric,
|
339 |
filters: selectedFilters,
|
340 |
-
expression: expression,
|
341 |
models: selectedModels,
|
|
|
342 |
agreementLevels: selectedAgreementLevels,
|
343 |
metric: selectedMetric,
|
344 |
allowedValues: selectedAllowedValues,
|
|
|
38 |
import { ScaleTypes } from '@carbon/charts';
|
39 |
|
40 |
import { useTheme } from '@/src/theme';
|
41 |
+
import {
|
42 |
+
TaskEvaluation,
|
43 |
+
Model,
|
44 |
+
Metric,
|
45 |
+
FilterationResponse,
|
46 |
+
} from '@/src/types';
|
47 |
import {
|
48 |
AgreementLevels,
|
49 |
AgreementLevelDefinitions,
|
|
|
226 |
);
|
227 |
|
228 |
// Step 2.c.ii: Set up event listener for messages from the worker
|
229 |
+
worker.onmessage = function (event: MessageEvent<FilterationResponse>) {
|
230 |
// Step 2.c.ii.*: Copy over response data
|
231 |
const { records, evaluations } = event.data;
|
232 |
|
|
|
342 |
filterationWorker.postMessage({
|
343 |
evaluationsPerMetric: evaluationsPerMetric,
|
344 |
filters: selectedFilters,
|
|
|
345 |
models: selectedModels,
|
346 |
+
expression: expression,
|
347 |
agreementLevels: selectedAgreementLevels,
|
348 |
metric: selectedMetric,
|
349 |
allowedValues: selectedAllowedValues,
|
src/workers/filter.ts
CHANGED
@@ -18,11 +18,11 @@
|
|
18 |
|
19 |
import { isEmpty } from 'lodash';
|
20 |
|
21 |
-
import {
|
22 |
import { areObjectsIntersecting } from '@/src/utilities/objects';
|
23 |
import { evaluate } from '@/src/utilities/expressions';
|
24 |
|
25 |
-
onmessage = function (event: MessageEvent<
|
26 |
// Step 1: Initialize necessary variables
|
27 |
const {
|
28 |
evaluationsPerMetric,
|
@@ -55,7 +55,7 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
55 |
// Step 3: If a metric is selected
|
56 |
if (metric) {
|
57 |
// Step 3.a: If an expression is specified
|
58 |
-
if (
|
59 |
// Step 3.a.ii: Build an object containing evaluations per model for every task
|
60 |
const evaluationsPerTaskPerModel: {
|
61 |
[key: string]: { [key: string]: TaskEvaluation };
|
@@ -121,9 +121,10 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
121 |
// Step 3.b.ii: Verify against aggregate value
|
122 |
if (
|
123 |
evaluation.modelId in models &&
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
127 |
(!allowedValues ||
|
128 |
isEmpty(allowedValues) ||
|
129 |
allowedValues.includes(evaluation[`${metric.name}_agg`].value))
|
@@ -173,9 +174,10 @@ onmessage = function (event: MessageEvent<RequestMessage>) {
|
|
173 |
// Step 3.a: Verify against aggregate value
|
174 |
if (
|
175 |
evaluation.modelId in models &&
|
176 |
-
agreementLevels
|
177 |
-
|
178 |
-
|
|
|
179 |
(!allowedValues ||
|
180 |
isEmpty(allowedValues) ||
|
181 |
allowedValues.includes(evaluation[`${metric}_agg`].value))
|
|
|
18 |
|
19 |
import { isEmpty } from 'lodash';
|
20 |
|
21 |
+
import { FilterationRequest, TaskEvaluation } from '@/src/types';
|
22 |
import { areObjectsIntersecting } from '@/src/utilities/objects';
|
23 |
import { evaluate } from '@/src/utilities/expressions';
|
24 |
|
25 |
+
onmessage = function (event: MessageEvent<FilterationRequest>) {
|
26 |
// Step 1: Initialize necessary variables
|
27 |
const {
|
28 |
evaluationsPerMetric,
|
|
|
55 |
// Step 3: If a metric is selected
|
56 |
if (metric) {
|
57 |
// Step 3.a: If an expression is specified
|
58 |
+
if (expression && !isEmpty(expression)) {
|
59 |
// Step 3.a.ii: Build an object containing evaluations per model for every task
|
60 |
const evaluationsPerTaskPerModel: {
|
61 |
[key: string]: { [key: string]: TaskEvaluation };
|
|
|
121 |
// Step 3.b.ii: Verify against aggregate value
|
122 |
if (
|
123 |
evaluation.modelId in models &&
|
124 |
+
(!agreementLevels ||
|
125 |
+
agreementLevels
|
126 |
+
.map((level) => level.value)
|
127 |
+
.includes(evaluation[`${metric.name}_agg`].level)) &&
|
128 |
(!allowedValues ||
|
129 |
isEmpty(allowedValues) ||
|
130 |
allowedValues.includes(evaluation[`${metric.name}_agg`].value))
|
|
|
174 |
// Step 3.a: Verify against aggregate value
|
175 |
if (
|
176 |
evaluation.modelId in models &&
|
177 |
+
(!agreementLevels ||
|
178 |
+
agreementLevels
|
179 |
+
.map((level) => level.value)
|
180 |
+
.includes(evaluation[`${metric}_agg`].level)) &&
|
181 |
(!allowedValues ||
|
182 |
isEmpty(allowedValues) ||
|
183 |
allowedValues.includes(evaluation[`${metric}_agg`].value))
|