File size: 4,435 Bytes
e903a32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env node

// Jitter Trackio CSV data with small, controlled noise.
// - Preserves comments (# ...) and blank lines
// - Leaves 'epoch' values unchanged
// - Adds mild noise to train/val accuracy (clamped to [0,1])
// - Adds mild noise to train/val loss (kept >= 0)
// - Keeps steps untouched
// Usage:
//   node app/scripts/jitter-trackio-data.mjs \
//     --in app/src/content/assets/data/trackio_wandb_demo.csv \
//     --out app/src/content/assets/data/trackio_wandb_demo.jitter.csv \
//     [--seed 42] [--amount 1.0] [--in-place]

import fs from 'node:fs/promises';
import path from 'node:path';

function parseArgs(argv){
  const args = { in: '', out: '', seed: undefined, amount: 1, inPlace: false };
  for (let i = 2; i < argv.length; i++){
    const a = argv[i];
    if (a === '--in' && argv[i+1]) { args.in = argv[++i]; continue; }
    if (a === '--out' && argv[i+1]) { args.out = argv[++i]; continue; }
    if (a === '--seed' && argv[i+1]) { args.seed = Number(argv[++i]); continue; }
    if (a === '--amount' && argv[i+1]) { args.amount = Number(argv[++i]) || 3; continue; }
    if (a === '--in-place') { args.inPlace = true; continue; }
  }
  if (!args.in) throw new Error('--in is required');
  if (args.inPlace) args.out = args.in;
  if (!args.out) {
    const { dir, name, ext } = path.parse(args.in);
    args.out = path.join(dir, `${name}.jitter${ext || '.csv'}`);
  }
  return args;
}

function mulberry32(seed){
  let t = seed >>> 0;
  return function(){
    t += 0x6D2B79F5;
    let r = Math.imul(t ^ (t >>> 15), 1 | t);
    r ^= r + Math.imul(r ^ (r >>> 7), 61 | r);
    return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
  };
}

function makeRng(seed){
  if (Number.isFinite(seed)) return mulberry32(seed);
  return Math.random;
}

function randn(rng){
  // Box-Muller transform
  let u = 0, v = 0;
  while (u === 0) u = rng();
  while (v === 0) v = rng();
  return Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v);
}

function jitterValue(metric, value, amount, rng){
  const m = metric.toLowerCase();
  if (m === 'epoch') return value; // keep as-is
  if (m.includes('accuracy')){
    const n = Math.max(-0.02 * amount, Math.min(0.02 * amount, randn(rng) * 0.01 * amount));
    return Math.max(0, Math.min(1, value + n));
  }
  if (m.includes('loss')){
    const n = Math.max(-0.03 * amount, Math.min(0.03 * amount, randn(rng) * 0.01 * amount));
    return Math.max(0, value + n);
  }
  // default: tiny noise
  const n = Math.max(-0.01 * amount, Math.min(0.01 * amount, randn(rng) * 0.005 * amount));
  return value + n;
}

function formatNumberLike(original, value){
  const s = String(original);
  const dot = s.indexOf('.')
  const decimals = dot >= 0 ? (s.length - dot - 1) : 0;
  if (!Number.isFinite(value)) return s;
  if (decimals <= 0) return String(Math.round(value));
  return value.toFixed(decimals);
}

async function main(){
  const args = parseArgs(process.argv);
  const rng = makeRng(args.seed);
  const raw = await fs.readFile(args.in, 'utf8');
  const lines = raw.split(/\r?\n/);
  const out = new Array(lines.length);

  for (let i = 0; i < lines.length; i++){
    const line = lines[i];
    if (!line || line.trim().length === 0) { out[i] = line; continue; }
    if (/^\s*#/.test(line)) { out[i] = line; continue; }

    // Preserve header line unmodified
    if (i === 0 && /^\s*run\s*,\s*step\s*,\s*metric\s*,\s*value\s*,\s*stderr\s*$/i.test(line)) {
      out[i] = line; continue;
    }

    const cols = line.split(',');
    if (cols.length < 4) { out[i] = line; continue; }

    const [run, stepStr, metric, valueStr, stderrStr = ''] = cols;
    const trimmedMetric = (metric || '').trim();
    const valueNum = Number((valueStr || '').trim());

    if (!Number.isFinite(valueNum)) { out[i] = line; continue; }

    const jittered = jitterValue(trimmedMetric, valueNum, args.amount, rng);
    const valueOut = formatNumberLike(valueStr, jittered);

    // Reassemble with original column count and positions
    const result = [run, stepStr, metric, valueOut, stderrStr].join(',');
    out[i] = result;
  }

  const finalText = out.join('\n');
  await fs.writeFile(args.out, finalText, 'utf8');
  const relIn = path.relative(process.cwd(), args.in);
  const relOut = path.relative(process.cwd(), args.out);
  console.log(`Jittered data written: ${relOut} (from ${relIn})`);
}

main().catch(err => {
  console.error(err?.stack || String(err));
  process.exit(1);
});