!(async function(){ var data = await util.getFile('cns-cache/model_grid_test_accuracy.json') data = data .filter(d => util.epsilonExtent[1] <= d.epsilon && d.epsilon <= util.epsilonExtent[0]) .filter(d => d.dataset_size > 1000) // .filter(d => d.dataset_size > 4000) // console.log(data) var bySize = d3.nestBy(data, d => d.dataset_size) bySize.forEach((d, i) => { d.dataset_size = d.key d.color = d3.interpolatePlasma(.84- i/6) if (d.key == 60000){ d3.selectAll('.tp60').st({background: d.color, padding: 2}) } if (d.key == 7500){ d3.selectAll('.tp75').st({background: d.color, color: '#fff', padding: 2}) } d.label = { 60000: {pos: [7, 11], textAnchor: 'middle', text: '60,000'}, 30000: {pos: [7, 11], textAnchor: 'middle', text: '30,000'}, 15000: {pos: [7, -5], textAnchor: 'start', text: '15,000'}, 7500: {pos: [0, 8], textAnchor: 'start', text: '7,500'}, // 3750: {pos: [0, 14], textAnchor: 'end', text: '3,750 training points'}, 3750: {pos: [-34, 10], textAnchor: 'start', text: '3,750'}, 2000: {pos: [-50, 10], textAnchor: 'end', text: '2,000 training points'}, }[d.key] d.forEach(e => e.size = d) }) var sel = d3.select('.accuracy-v-privacy-dataset_size').html('') .at({role: 'graphics-document', 'aria-label': `High privacy and accuracy requires more training data. Line chart showing too much differential privacy without enough data decreases accuracy.`}) sel.append('div.chart-title').text('High privacy and accuracy requires more training data') var c = d3.conventions({ sel, height: 400, margin: {bottom: 125, top: 5}, layers: 'sd', }) c.x = d3.scaleLog().domain(util.epsilonExtent).range(c.x.range()) c.xAxis = d3.axisBottom(c.x).tickFormat(d => { var rv = d + '' if (rv.split('').filter(d => d !=0 && d != '.')[0] == 1) return rv }) c.yAxis.tickFormat(d => d3.format('.0%')(d))//.ticks(8) d3.drawAxis(c) util.addAxisLabel(c, 'Higher Privacy →', 'Test Accuracy') util.ggPlotBg(c, false) c.layers[1].append('div') .st({fontSize: 12, color: '#555', width: 120*2, textAlign: 'center', lineHeight: '1.3em'}) .translate([c.width/2 - 120, c.height + 70]) .html('in ε, a measure of how much modifying a single training point can change the model (models with a lower ε are more private)') c.svg.selectAll('.y .tick').filter(d => d == .9) .select('text').st({fontWeight: 600}).parent() .append('path') .at({stroke: '#000', strokeDasharray: '2 2', d: 'M 0 0 H ' + c.width}) var line = d3.line() .x(d => c.x(d.epsilon)) .y(d => c.y(d.accuracy)) .curve(d3.curveMonotoneX) var lineSel = c.svg.append('g').appendMany('path.accuracy-line', bySize) .at({ d: line, fill: 'none', }) .st({ stroke: d => d.color, }) .on('mousemove', setActiveDigit) var circleSel = c.svg.append('g') .appendMany('g.accuracy-circle', data) .translate(d => [c.x(d.epsilon), c.y(d.accuracy)]) .on('mousemove', setActiveDigit) // .call(d3.attachTooltip) circleSel.append('circle') .at({r: 4, stroke: '#fff'}) .st({fill: d => d.size.color }) var labelSel = c.svg.appendMany('g.accuracy-label', bySize) .translate(d => [c.x(d[0].epsilon), c.y(d[0].accuracy)]) labelSel.append('text') .filter(d => d.label) .translate(d => d.label.pos) .st({fill: d => d.color, fontWeight: 400}) .at({textAnchor: d => d.label.textAnchor, fontSize: 14, fill: '#000', dy: '.66em'}) .text(d => d.label.text) .filter(d => d.key == 2000) .text('') .tspans(d => d.label.text.split(' ')) c.svg.append('text.annotation') .translate([225, 106]) .tspans(d3.wordwrap('With limited data, adding more differential privacy improves accuracy...', 25), 12) c.svg.append('text.annotation') .translate([490, 230]) .tspans(d3.wordwrap(`...until it doesn't`, 20)) // setActiveDigit({dataset_size: 60000}) function setActiveDigit({dataset_size}){ lineSel .classed('active', 0) .filter(d => d.dataset_size == dataset_size) .classed('active', 1) .raise() circleSel .classed('active', 0) .filter(d => d.dataset_size == dataset_size) .classed('active', 1) .raise() labelSel .classed('active', 0) .filter(d => d.dataset_size == dataset_size) .classed('active', 1) } })() // aVal: 0.5 // accuracy: 0.8936 // accuracy_0: 0.9663265306122449 // accuracy_1: 0.9806167400881057 // accuracy_2: 0.9011627906976745 // accuracy_3: 0.8633663366336634 // accuracy_4: 0.8859470468431772 // accuracy_5: 0.8733183856502242 // accuracy_6: 0.9384133611691023 // accuracy_7: 0.8657587548638133 // accuracy_8: 0.8059548254620124 // accuracy_9: 0.8434093161546086 // dataset_size: 60000 // epochs: 4 // epsilon: 0.19034890168775565 // l2_norm_clip: 0.75 // noise_multiplier: 2.6