Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>mbpe-dyn</title> | |
<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script> | |
<script src="wasm_exec.js"></script> | |
<script> | |
const go = new Go(); | |
document.addEventListener("DOMContentLoaded", function () { | |
const tokenizeButton = document.querySelector("button[onclick='tokenizeText()']"); | |
const loadingMessage = document.getElementById("loadingMessage"); | |
WebAssembly.instantiateStreaming(fetch("main.wasm"), go.importObject).then(result => { | |
go.run(result.instance); | |
tokenizeButton.disabled = false; | |
loadingMessage.style.display = "none"; | |
}); | |
}); | |
async function tokenizeText() { | |
const input = document.getElementById("textInput").value.trim(); | |
const modelChoice = document.getElementById("modelSelect").value; | |
const vocabSize = parseInt(document.getElementById("vocabSize").value, 10) || -1; | |
if (!input) { | |
alert("Please enter text to tokenize."); | |
return; | |
} | |
if (typeof tokenizeWeb !== "undefined") { | |
const resultJSON = tokenizeWeb(input, modelChoice, vocabSize); | |
try { | |
const result = JSON.parse(resultJSON); | |
console.log(result); | |
if (Array.isArray(result) && result.length > 0) { | |
displayTabs(result); | |
showResult(0); // Show first tab by default | |
} else { | |
document.getElementById("code").innerText = "No tokens found."; | |
document.getElementById("mermaidContainer").innerHTML = ""; | |
} | |
} catch (e) { | |
console.error("Error parsing result:", e); | |
} | |
} | |
} | |
function displayTabs(results) { | |
const tabsContainer = document.getElementById("wordTabs"); | |
tabsContainer.innerHTML = ""; | |
results.forEach((entry, index) => { | |
const finalSegmentation = entry.Segmentations[entry.Segmentations.length - 1]; | |
const label = finalSegmentation.map(t => t.Token).join(""); | |
const tabButton = document.createElement("button"); | |
tabButton.textContent = label; | |
tabButton.classList.add("word-tab"); | |
tabButton.dataset.index = index; | |
tabButton.onclick = () => showResult(index); | |
tabsContainer.appendChild(tabButton); | |
}); | |
} | |
function showResult(index) { | |
const results = JSON.parse(tokenizeWeb( | |
document.getElementById("textInput").value.trim(), | |
document.getElementById("modelSelect").value, | |
parseInt(document.getElementById("vocabSize").value, 10) || -1 | |
)); | |
const selectedResult = results[index]; | |
const diagramCode = selectedResult.Mermaid.trim(); | |
if (diagramCode) { | |
mermaid.render("mermaidDiagram", diagramCode).then(({ svg }) => { | |
document.getElementById("mermaidContainer").innerHTML = svg; | |
}).catch(error => { | |
console.error("Mermaid rendering error:", error); | |
}); | |
} | |
// Update active tab | |
document.querySelectorAll(".word-tab").forEach(tab => tab.classList.remove("active")); | |
document.querySelector(`.word-tab[data-index="${index}"]`).classList.add("active"); | |
} | |
</script> | |
<style> | |
/* General Styles */ | |
body { | |
font-family: Arial, sans-serif; | |
background: #f8f9fa; | |
color: #333; | |
text-align: center; | |
padding: 20px; | |
} | |
h1 { | |
color: #007bff; | |
} | |
/* Input and Select */ | |
.input-group { | |
display: flex; | |
justify-content: center; | |
gap: 10px; | |
margin-bottom: 20px; | |
} | |
input, select, button { | |
padding: 10px; | |
border: 1px solid #ccc; | |
border-radius: 5px; | |
font-size: 16px; | |
} | |
input { | |
width: 300px; | |
} | |
select { | |
width: 150px; | |
background: white; | |
cursor: pointer; | |
} | |
button { | |
background: #007bff; | |
color: white; | |
font-weight: bold; | |
cursor: pointer; | |
transition: 0.3s; | |
border: none; | |
} | |
button:hover { | |
background: #0056b3; | |
} | |
button:disabled { | |
background: #ccc; | |
color: #666; | |
cursor: not-allowed; | |
} | |
#modelSelect { | |
width: 80px; | |
} | |
#vocabSize { | |
width: 110px; | |
} | |
#loadingMessage { | |
text-align: center; | |
font-weight: bold; | |
color: #555; | |
margin-top: 10px; | |
} | |
/* Word Tabs */ | |
.word-tabs { | |
display: flex; | |
justify-content: center; | |
gap: 10px; | |
margin-top: 20px; | |
margin-bottom: 20px; | |
flex-wrap: wrap; | |
} | |
.word-tab { | |
padding: 8px 15px; | |
background: #e1ecf4; | |
border: none; | |
border-radius: 20px; | |
cursor: pointer; | |
font-weight: bold; | |
transition: 0.3s; | |
} | |
.word-tab.active { | |
background: #007bff; | |
color: white; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>mbpe-dyn</h1> | |
<div class="input-group"> | |
<select id="modelSelect"> | |
<option value="m000">m000</option> | |
<option value="m010">m010</option> | |
<option value="m020">m020</option> | |
<option value="m030">m030</option> | |
<option value="m040">m040</option> | |
<option value="m050">m050</option> | |
<option value="m060">m060</option> | |
<option value="m070">m070</option> | |
<option value="m080">m080</option> | |
<option value="m090">m090</option> | |
<option value="m100" selected>m100</option> | |
</select> | |
<select id="vocabSize"> | |
<option value="16384">2¹⁴ (16K)</option> | |
<option value="32768">2¹⁵ (32K)</option> | |
<option value="65536">2¹⁶ (64K)</option> | |
<option value="131072" selected>2¹⁷ (128K)</option> | |
</select> | |
<input type="text" id="textInput" placeholder="Enter text here" value="airsickness"> | |
<button onclick="tokenizeText()" disabled>Tokenize</button> | |
</div> | |
<div id="loadingMessage">Initializing tokenizers...</div> | |
<div id="wordTabs" class="word-tabs"></div> | |
<div id="mermaidContainer"></div> | |
</body> | |
</html> |