mbpe-dyn / index.html
jonasknobloch's picture
Upload index.html
31976f9 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>mbpe-dyn</title>
<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
<script src="wasm_exec.js"></script>
<script>
const go = new Go();
document.addEventListener("DOMContentLoaded", function () {
const tokenizeButton = document.querySelector("button[onclick='tokenizeText()']");
const loadingMessage = document.getElementById("loadingMessage");
WebAssembly.instantiateStreaming(fetch("main.wasm"), go.importObject).then(result => {
go.run(result.instance);
tokenizeButton.disabled = false;
loadingMessage.style.display = "none";
});
});
async function tokenizeText() {
const input = document.getElementById("textInput").value.trim();
const modelChoice = document.getElementById("modelSelect").value;
const vocabSize = parseInt(document.getElementById("vocabSize").value, 10) || -1;
if (!input) {
alert("Please enter text to tokenize.");
return;
}
if (typeof tokenizeWeb !== "undefined") {
const resultJSON = tokenizeWeb(input, modelChoice, vocabSize);
try {
const result = JSON.parse(resultJSON);
console.log(result);
if (Array.isArray(result) && result.length > 0) {
displayTabs(result);
showResult(0); // Show first tab by default
} else {
document.getElementById("code").innerText = "No tokens found.";
document.getElementById("mermaidContainer").innerHTML = "";
}
} catch (e) {
console.error("Error parsing result:", e);
}
}
}
function displayTabs(results) {
const tabsContainer = document.getElementById("wordTabs");
tabsContainer.innerHTML = "";
results.forEach((entry, index) => {
const finalSegmentation = entry.Segmentations[entry.Segmentations.length - 1];
const label = finalSegmentation.map(t => t.Token).join("");
const tabButton = document.createElement("button");
tabButton.textContent = label;
tabButton.classList.add("word-tab");
tabButton.dataset.index = index;
tabButton.onclick = () => showResult(index);
tabsContainer.appendChild(tabButton);
});
}
function showResult(index) {
const results = JSON.parse(tokenizeWeb(
document.getElementById("textInput").value.trim(),
document.getElementById("modelSelect").value,
parseInt(document.getElementById("vocabSize").value, 10) || -1
));
const selectedResult = results[index];
const diagramCode = selectedResult.Mermaid.trim();
if (diagramCode) {
mermaid.render("mermaidDiagram", diagramCode).then(({ svg }) => {
document.getElementById("mermaidContainer").innerHTML = svg;
}).catch(error => {
console.error("Mermaid rendering error:", error);
});
}
// Update active tab
document.querySelectorAll(".word-tab").forEach(tab => tab.classList.remove("active"));
document.querySelector(`.word-tab[data-index="${index}"]`).classList.add("active");
}
</script>
<style>
/* General Styles */
body {
font-family: Arial, sans-serif;
background: #f8f9fa;
color: #333;
text-align: center;
padding: 20px;
}
h1 {
color: #007bff;
}
/* Input and Select */
.input-group {
display: flex;
justify-content: center;
gap: 10px;
margin-bottom: 20px;
}
input, select, button {
padding: 10px;
border: 1px solid #ccc;
border-radius: 5px;
font-size: 16px;
}
input {
width: 300px;
}
select {
width: 150px;
background: white;
cursor: pointer;
}
button {
background: #007bff;
color: white;
font-weight: bold;
cursor: pointer;
transition: 0.3s;
border: none;
}
button:hover {
background: #0056b3;
}
button:disabled {
background: #ccc;
color: #666;
cursor: not-allowed;
}
#modelSelect {
width: 80px;
}
#vocabSize {
width: 110px;
}
#loadingMessage {
text-align: center;
font-weight: bold;
color: #555;
margin-top: 10px;
}
/* Word Tabs */
.word-tabs {
display: flex;
justify-content: center;
gap: 10px;
margin-top: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
.word-tab {
padding: 8px 15px;
background: #e1ecf4;
border: none;
border-radius: 20px;
cursor: pointer;
font-weight: bold;
transition: 0.3s;
}
.word-tab.active {
background: #007bff;
color: white;
}
</style>
</head>
<body>
<h1>mbpe-dyn</h1>
<div class="input-group">
<select id="modelSelect">
<option value="m000">m000</option>
<option value="m010">m010</option>
<option value="m020">m020</option>
<option value="m030">m030</option>
<option value="m040">m040</option>
<option value="m050">m050</option>
<option value="m060">m060</option>
<option value="m070">m070</option>
<option value="m080">m080</option>
<option value="m090">m090</option>
<option value="m100" selected>m100</option>
</select>
<select id="vocabSize">
<option value="16384">2¹⁴ (16K)</option>
<option value="32768">2¹⁵ (32K)</option>
<option value="65536">2¹⁶ (64K)</option>
<option value="131072" selected>2¹⁷ (128K)</option>
</select>
<input type="text" id="textInput" placeholder="Enter text here" value="airsickness">
<button onclick="tokenizeText()" disabled>Tokenize</button>
</div>
<div id="loadingMessage">Initializing tokenizers...</div>
<div id="wordTabs" class="word-tabs"></div>
<div id="mermaidContainer"></div>
</body>
</html>