| <div class="d3-tokenization"> |
| <svg viewBox="0 0 800 400" xmlns="http://www.w3.org/2000/svg"> |
| <defs> |
| <marker id="arrowhead-tok" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto"> |
| <polygon points="0 0, 10 3, 0 6" fill="currentColor" /> |
| </marker> |
| </defs> |
| |
| |
| <rect x="50" y="50" width="200" height="80" rx="5" class="box"/> |
| <text x="150" y="75" text-anchor="middle" class="text title">Input Text</text> |
| <text x="150" y="100" text-anchor="middle" class="text label">"Hello, world!"</text> |
| |
| |
| <path d="M 250 90 L 290 90" class="arrow" marker-end="url(#arrowhead-tok)"/> |
| |
| |
| <rect x="290" y="60" width="120" height="60" rx="5" class="process"/> |
| <text x="350" y="85" text-anchor="middle" class="text title">Tokenizer</text> |
| <text x="350" y="105" text-anchor="middle" class="text label" font-size="10">Split into tokens</text> |
| |
| |
| <path d="M 410 90 L 450 90" class="arrow" marker-end="url(#arrowhead-tok)"/> |
| |
| |
| <rect x="450" y="30" width="280" height="120" rx="5" class="box"/> |
| <text x="590" y="55" text-anchor="middle" class="text title">Tokens</text> |
| |
| |
| <rect x="470" y="70" width="60" height="30" rx="3" class="token-box"/> |
| <text x="500" y="90" text-anchor="middle" class="text token">Hello</text> |
| |
| <rect x="540" y="70" width="40" height="30" rx="3" class="token-box"/> |
| <text x="560" y="90" text-anchor="middle" class="text token">,</text> |
| |
| <rect x="590" y="70" width="60" height="30" rx="3" class="token-box"/> |
| <text x="620" y="90" text-anchor="middle" class="text token">world</text> |
| |
| <rect x="660" y="70" width="40" height="30" rx="3" class="token-box"/> |
| <text x="680" y="90" text-anchor="middle" class="text token">!</text> |
| |
| |
| <text x="500" y="125" text-anchor="middle" class="text token-id">[5425]</text> |
| <text x="560" y="125" text-anchor="middle" class="text token-id">[11]</text> |
| <text x="620" y="125" text-anchor="middle" class="text token-id">[1917]</text> |
| <text x="680" y="125" text-anchor="middle" class="text token-id">[0]</text> |
| |
| |
| <path d="M 590 150 L 590 190" class="arrow" marker-end="url(#arrowhead-tok)"/> |
| |
| |
| <rect x="480" y="190" width="220" height="100" rx="5" class="model"/> |
| <text x="590" y="215" text-anchor="middle" class="text title">Language Model</text> |
| |
| |
| <g transform="translate(520, 230)"> |
| <circle cx="20" cy="15" r="8" class="node-circle"/> |
| <circle cx="50" cy="15" r="8" class="node-circle"/> |
| <circle cx="80" cy="15" r="8" class="node-circle"/> |
| <circle cx="110" cy="15" r="8" class="node-circle"/> |
| <circle cx="140" cy="15" r="8" class="node-circle"/> |
| </g> |
| <text x="590" y="275" text-anchor="middle" class="text label" font-size="10">Process & Generate</text> |
| |
| |
| <path d="M 590 290 L 590 330" class="arrow" marker-end="url(#arrowhead-tok)"/> |
| |
| |
| <rect x="490" y="330" width="200" height="50" rx="5" class="box"/> |
| <text x="590" y="360" text-anchor="middle" class="text label">Output / Prediction</text> |
| </svg> |
| </div> |
| <style> |
| .d3-tokenization { |
| position: relative; |
| width: 100%; |
| } |
| .d3-tokenization svg { |
| display: block; |
| width: 100%; |
| height: auto; |
| } |
| .d3-tokenization .box { |
| fill: var(--surface-bg, #f0f4ff); |
| stroke: var(--primary-color, #4169e1); |
| stroke-width: 2; |
| } |
| .d3-tokenization .process { |
| fill: #fff8e1; |
| stroke: #ff9800; |
| stroke-width: 2; |
| } |
| .d3-tokenization .model { |
| fill: #e8f5e9; |
| stroke: #4caf50; |
| stroke-width: 2; |
| } |
| .d3-tokenization .text { |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| fill: var(--text-color, #333); |
| } |
| .d3-tokenization .title { |
| font-size: 14px; |
| font-weight: 600; |
| } |
| .d3-tokenization .label { |
| font-size: 12px; |
| } |
| .d3-tokenization .token { |
| font-size: 11px; |
| font-family: 'Monaco', 'Courier New', monospace; |
| } |
| .d3-tokenization .token-id { |
| font-size: 9px; |
| fill: var(--muted-color, #666); |
| } |
| .d3-tokenization .arrow { |
| fill: none; |
| stroke: var(--muted-color, #666); |
| stroke-width: 2; |
| color: var(--muted-color, #666); |
| } |
| .d3-tokenization .token-box { |
| fill: white; |
| stroke: var(--primary-color, #4169e1); |
| stroke-width: 1.5; |
| } |
| .d3-tokenization .node-circle { |
| fill: #81c784; |
| opacity: 0.7; |
| } |
| [data-theme="dark"] .d3-tokenization .box { |
| fill: rgba(65, 105, 225, 0.1); |
| } |
| [data-theme="dark"] .d3-tokenization .token-box { |
| fill: var(--surface-bg, #1a1a1a); |
| } |
| [data-theme="dark"] .d3-tokenization .process { |
| fill: rgba(255, 152, 0, 0.15); |
| } |
| [data-theme="dark"] .d3-tokenization .model { |
| fill: rgba(76, 175, 80, 0.15); |
| } |
| </style> |
| <script> |
| (() => { |
| const bootstrap = () => { |
| const scriptEl = document.currentScript; |
| let container = scriptEl ? scriptEl.previousElementSibling : null; |
| if (!(container && container.classList && container.classList.contains('d3-tokenization'))) { |
| const candidates = Array.from(document.querySelectorAll('.d3-tokenization')) |
| .filter((el) => !(el.dataset && el.dataset.mounted === 'true')); |
| container = candidates[candidates.length - 1] || null; |
| } |
| if (!container) return; |
| if (container.dataset) { |
| if (container.dataset.mounted === 'true') return; |
| container.dataset.mounted = 'true'; |
| } |
| }; |
| |
| if (document.readyState === 'loading') { |
| document.addEventListener('DOMContentLoaded', bootstrap, { once: true }); |
| } else { |
| bootstrap(); |
| } |
| })(); |
| </script> |