Spaces:
Running
Running
ping98k
commited on
Commit
·
07f99cb
1
Parent(s):
293a305
Update README and index.html for embedding playground functionality and usage instructions
Browse files- README.md +25 -3
- index.html +74 -20
README.md
CHANGED
|
@@ -5,8 +5,30 @@ colorFrom: blue
|
|
| 5 |
colorTo: yellow
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
-
models:
|
| 9 |
-
- Xenova/detr-resnet-50
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
colorTo: yellow
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# Embedding WebGPU Playground
|
| 11 |
+
|
| 12 |
+
This is a browser-based playground for exploring text embeddings and group similarity using WebGPU and ONNX models.
|
| 13 |
+
|
| 14 |
+
## How it works
|
| 15 |
+
- **Input text** in the textarea. Use single newlines (`\n`) to separate lines within a group, and triple newlines (`\n\n\n`) to separate groups.
|
| 16 |
+
- For each line, the following prompt is used:
|
| 17 |
+
|
| 18 |
+
`Instruct: Given a textual input sentence, retrieve relevant categories that best describe it.\nQuery:{your sentence}`
|
| 19 |
+
- All lines in a group are embedded in a single batch call, with pooling set to `mean` and normalization enabled.
|
| 20 |
+
- Each group is averaged to a single embedding.
|
| 21 |
+
- Cosine similarity is calculated between all group embeddings, resulting in a group-by-group similarity matrix.
|
| 22 |
+
- The similarity matrix is visualized as a heatmap using Plotly (color range locked to 0–1).
|
| 23 |
+
|
| 24 |
+
## Tech stack
|
| 25 |
+
- [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) (ESM, WebGPU)
|
| 26 |
+
- [ONNX Qwen3-Embedding-0.6B-ONNX](https://huggingface.co/onnx-community/Qwen3-Embedding-0.6B-ONNX)
|
| 27 |
+
- [Plotly.js](https://plotly.com/javascript/) (UMD)
|
| 28 |
+
|
| 29 |
+
## Usage
|
| 30 |
+
1. Enter or paste your text in the textarea.
|
| 31 |
+
2. Separate groups with triple newlines.
|
| 32 |
+
3. Click **Run** to compute and visualize group similarities.
|
| 33 |
+
|
| 34 |
+
---
|
index.html
CHANGED
|
@@ -1,29 +1,83 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html
|
| 3 |
|
| 4 |
<head>
|
| 5 |
-
<meta charset="
|
| 6 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
</head>
|
| 11 |
|
| 12 |
<body>
|
| 13 |
-
<h1>
|
| 14 |
-
<
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
</body>
|
| 28 |
|
| 29 |
</html>
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html>
|
| 3 |
|
| 4 |
<head>
|
| 5 |
+
<meta charset="utf-8">
|
| 6 |
+
<title>WebGPU Embedding Playground</title>
|
| 7 |
+
<style>
|
| 8 |
+
body {
|
| 9 |
+
font-family: sans-serif;
|
| 10 |
+
margin: 0;
|
| 11 |
+
padding: 20px
|
| 12 |
+
}
|
| 13 |
|
| 14 |
+
textarea {
|
| 15 |
+
width: 100%;
|
| 16 |
+
height: 200px
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
button {
|
| 20 |
+
padding: 10px 20px;
|
| 21 |
+
margin-top: 10px
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
#plot {
|
| 25 |
+
width: 100%;
|
| 26 |
+
height: 600px
|
| 27 |
+
}
|
| 28 |
+
</style>
|
| 29 |
</head>
|
| 30 |
|
| 31 |
<body>
|
| 32 |
+
<h1>Embedding Similarity Heatmap</h1>
|
| 33 |
+
<textarea id="input"></textarea>
|
| 34 |
+
<button id="run">Run</button>
|
| 35 |
+
<div id="plot"></div>
|
| 36 |
+
<script src="https://cdn.plot.ly/plotly-2.32.0.min.js"></script>
|
| 37 |
+
<script type="module">
|
| 38 |
+
import { pipeline } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.5.2";
|
| 39 |
+
|
| 40 |
+
const embed = await pipeline(
|
| 41 |
+
"feature-extraction",
|
| 42 |
+
"onnx-community/Qwen3-Embedding-0.6B-ONNX",
|
| 43 |
+
{ device: "webgpu", dtype: "q4f16" },
|
| 44 |
+
);
|
| 45 |
+
|
| 46 |
+
const task = "Given a textual input sentence, retrieve relevant categories that best describe it.";
|
| 47 |
+
document.getElementById("run").onclick = async () => {
|
| 48 |
+
const text = document.getElementById("input").value;
|
| 49 |
+
const groups = text.split(/\n{3,}/);
|
| 50 |
+
const groupEmbeddings = [];
|
| 51 |
+
for (const g of groups) {
|
| 52 |
+
const lines = g.split(/\n/).filter(x => x.trim() != "");
|
| 53 |
+
const prompts = lines.map(s => `Instruct: ${task}\nQuery:${s}`);
|
| 54 |
+
const out = await embed(prompts, { pooling: "mean", normalize: true });
|
| 55 |
+
const embeddings = typeof out.tolist === 'function' ? out.tolist() : out.data;
|
| 56 |
+
const dim = embeddings[0].length;
|
| 57 |
+
const avg = new Float32Array(dim);
|
| 58 |
+
for (const e of embeddings) { for (let i = 0; i < dim; i++) avg[i] += e[i]; }
|
| 59 |
+
for (let i = 0; i < dim; i++) avg[i] /= embeddings.length;
|
| 60 |
+
groupEmbeddings.push(avg);
|
| 61 |
+
}
|
| 62 |
+
const n = groupEmbeddings.length;
|
| 63 |
+
const sim = [];
|
| 64 |
+
for (let i = 0; i < n; i++) {
|
| 65 |
+
const row = [];
|
| 66 |
+
for (let j = 0; j < n; j++) {
|
| 67 |
+
let dot = 0, na = 0, nb = 0;
|
| 68 |
+
for (let k = 0; k < groupEmbeddings[i].length; k++) {
|
| 69 |
+
dot += groupEmbeddings[i][k] * groupEmbeddings[j][k];
|
| 70 |
+
na += groupEmbeddings[i][k] ** 2;
|
| 71 |
+
nb += groupEmbeddings[j][k] ** 2;
|
| 72 |
+
}
|
| 73 |
+
row.push(dot / Math.sqrt(na * nb));
|
| 74 |
+
}
|
| 75 |
+
sim.push(row);
|
| 76 |
+
}
|
| 77 |
+
const data = [{ z: sim, type: "heatmap", colorscale: "Viridis", zmin: 0, zmax: 1 }];
|
| 78 |
+
Plotly.newPlot("plot", data, { xaxis: { title: "Group" }, yaxis: { title: "Group" } });
|
| 79 |
+
};
|
| 80 |
+
</script>
|
| 81 |
</body>
|
| 82 |
|
| 83 |
</html>
|