Spaces:

merve
/

private-and-fair

Running

File size: 5,017 Bytes

30e9731

!(async function(){
  var data = await util.getFile('cns-cache/model_grid_test_accuracy.json')

  data = data
    .filter(d => util.epsilonExtent[1] <= d.epsilon && d.epsilon <= util.epsilonExtent[0])
    .filter(d => d.dataset_size > 1000)

    // .filter(d => d.dataset_size > 4000)

  // console.log(data)

  var bySize = d3.nestBy(data, d => d.dataset_size)
  bySize.forEach((d, i) => {
    d.dataset_size = d.key

    d.color = d3.interpolatePlasma(.84- i/6)
    if (d.key == 60000){
      d3.selectAll('.tp60').st({background: d.color, padding: 2})
    }
    if (d.key == 7500){
      d3.selectAll('.tp75').st({background: d.color, color: '#fff', padding: 2})
    }

    d.label = {
      60000: {pos: [7, 11], textAnchor: 'middle', text: '60,000'},
      30000: {pos: [7, 11], textAnchor: 'middle', text: '30,000'},
      15000: {pos: [7, -5], textAnchor: 'start', text: '15,000'},
      7500: {pos: [0, 8], textAnchor: 'start', text: '7,500'},
      // 3750: {pos: [0, 14], textAnchor: 'end', text: '3,750 training points'},
      3750: {pos: [-34, 10], textAnchor: 'start', text: '3,750'},
      2000: {pos: [-50, 10], textAnchor: 'end', text: '2,000 training points'},
    }[d.key]

    d.forEach(e => e.size = d)
  })

  var sel = d3.select('.accuracy-v-privacy-dataset_size').html('')
    .at({role: 'graphics-document', 'aria-label': `High privacy and accuracy requires more training data. Line chart showing too much differential privacy without enough data decreases accuracy.`})

  sel.append('div.chart-title').text('High privacy and accuracy requires more training data')

  var c = d3.conventions({
    sel,
    height: 400,
    margin: {bottom: 125, top: 5},
    layers: 'sd',
  })

  c.x = d3.scaleLog().domain(util.epsilonExtent).range(c.x.range())
  c.xAxis = d3.axisBottom(c.x).tickFormat(d => {
    var rv = d + ''
    if (rv.split('').filter(d => d !=0 && d != '.')[0] == 1) return rv
  })

  c.yAxis.tickFormat(d => d3.format('.0%')(d))//.ticks(8)

  d3.drawAxis(c)
  util.addAxisLabel(c, 'Higher Privacy →', 'Test Accuracy')
  util.ggPlotBg(c, false)
  c.layers[1].append('div')
    .st({fontSize: 12, color: '#555', width: 120*2, textAlign: 'center', lineHeight: '1.3em'})
    .translate([c.width/2 - 120, c.height + 70])
    .html('in ε, a <a href="https://desfontain.es/privacy/differential-privacy-in-more-detail.html">measure</a> of how much modifying a single training point can change the model (models with a lower ε are more private)')


  c.svg.selectAll('.y .tick').filter(d => d == .9)
    .select('text').st({fontWeight: 600}).parent()
    .append('path')
    .at({stroke: '#000', strokeDasharray: '2 2', d: 'M 0 0 H ' + c.width})

  var line = d3.line()
    .x(d => c.x(d.epsilon))
    .y(d => c.y(d.accuracy))
    .curve(d3.curveMonotoneX)


  var lineSel = c.svg.append('g').appendMany('path.accuracy-line', bySize)
    .at({
      d: line,
      fill: 'none',
    })
    .st({ stroke: d => d.color, })
    .on('mousemove', setActiveDigit)

  var circleSel = c.svg.append('g')
    .appendMany('g.accuracy-circle', data)
    .translate(d => [c.x(d.epsilon), c.y(d.accuracy)])
    .on('mousemove', setActiveDigit)
    // .call(d3.attachTooltip)

  circleSel.append('circle')
    .at({r: 4, stroke: '#fff'})
    .st({fill: d => d.size.color })


  var labelSel = c.svg.appendMany('g.accuracy-label', bySize)
    .translate(d => [c.x(d[0].epsilon), c.y(d[0].accuracy)])
  labelSel.append('text')
    .filter(d => d.label)
    .translate(d => d.label.pos)
    .st({fill: d => d.color, fontWeight: 400})
    .at({textAnchor: d => d.label.textAnchor, fontSize: 14, fill: '#000', dy: '.66em'})
    .text(d => d.label.text)
    .filter(d => d.key == 2000)
    .text('')
    .tspans(d => d.label.text.split(' '))


  c.svg.append('text.annotation')
    .translate([225, 106])
    .tspans(d3.wordwrap('With limited data, adding more differential privacy improves accuracy...', 25), 12)

  c.svg.append('text.annotation')
    .translate([490, 230])
    .tspans(d3.wordwrap(`...until it doesn't`, 20))

  // setActiveDigit({dataset_size: 60000})
  function setActiveDigit({dataset_size}){
    lineSel
      .classed('active', 0)
      .filter(d => d.dataset_size == dataset_size)
      .classed('active', 1)
      .raise()

    circleSel
      .classed('active', 0)
      .filter(d => d.dataset_size == dataset_size)
      .classed('active', 1)
      .raise()

    labelSel
      .classed('active', 0)
      .filter(d => d.dataset_size == dataset_size)
      .classed('active', 1)
  }
})()




// aVal: 0.5
// accuracy: 0.8936
// accuracy_0: 0.9663265306122449
// accuracy_1: 0.9806167400881057
// accuracy_2: 0.9011627906976745
// accuracy_3: 0.8633663366336634
// accuracy_4: 0.8859470468431772
// accuracy_5: 0.8733183856502242
// accuracy_6: 0.9384133611691023
// accuracy_7: 0.8657587548638133
// accuracy_8: 0.8059548254620124
// accuracy_9: 0.8434093161546086
// dataset_size: 60000
// epochs: 4
// epsilon: 0.19034890168775565
// l2_norm_clip: 0.75
// noise_multiplier: 2.6