gregtatum commited on
Commit
f7fef32
·
1 Parent(s): 7aab677

Add the initial models

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ models/**/*.npy
2
+ models/**/*.zst
3
+ models/**/*.json
README.md CHANGED
@@ -1,3 +1,67 @@
1
- ---
2
- license: mpl-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - static-embeddings
4
+ ---
5
+ # Static Embeddings
6
+
7
+ This project contains multilingual static embeddings that are appropriate for generating
8
+ quick embeddings in edge devices. They are re-packaged from other projects in production
9
+ ready assets.
10
+
11
+ ## Models
12
+
13
+ * [minishlab/potion-retrieval-32M/](models/minishlab/potion-retrieval-32M/README.md)
14
+ * [minishlab/potion-multilingual-128M/](models/minishlab/potion-multilingual-128M/README.md)
15
+ * [sentence-transformers/static-retrieval-mrl-en-v1/](models/sentence-transformers/static-retrieval-mrl-en-v1/README.md)
16
+ * [sentence-transformers/static-similarity-mrl-multilingual-v1](models/sentence-transformers/static-similarity-mrl-multilingual-v1/README.md)
17
+
18
+ ## Updating
19
+
20
+ Add models to `scripts/build_models.py`.
21
+
22
+ ```sh
23
+ # Install dependencies and login to huggingface:
24
+ pipx install huggingface_hub
25
+ huggingface-cli login
26
+
27
+ # Re-build the models:
28
+ uv run scripts/build_models.py
29
+
30
+ # Version control:
31
+ git add .
32
+ git commit -m 'Updated the models'
33
+ git push
34
+ git tag v1.0.0 -m 'Model release description'
35
+ git push origin tag v1.0.0
36
+
37
+ # Upload the models
38
+ uv run scripts/upload_models.py --tag v1.0.0
39
+ ```
40
+
41
+ ## Precision
42
+
43
+ For static embeddings and cosine similarity, precision isn't as important. For an end
44
+ to end to test in Firefox on some vectors here was the cosine similarity for the same
45
+ mean pooled result. Note that the vector math happens in the f32 space, but storage
46
+ for the embeddings is in a lower precision.
47
+
48
+ > f32 vs f16: cosine similarity = 1.00000000<br/>
49
+ > → They are essentially identical in direction.
50
+ >
51
+ > f32 vs f8: cosine similarity = 0.99956375<br/>
52
+ > → Very close, only tiny quantization effects.
53
+
54
+ Note that this was done on the `torch.float8_e4m3fn`, while `torch.float8_e5m2` generally
55
+ has more loss.
56
+
57
+ Precision also affects download size. For instance with larger
58
+ [minishlab/potion-multilingual-128M/](models/minishlab/potion-multilingual-128M/README.md)
59
+ model. The `fp32` is 228M compressed, while only 51M for `fp8_e4m3`, which has competetive
60
+ quantization values.
61
+
62
+ | precision | dimensions | size |
63
+ | ------------- | ---------- | ------- |
64
+ | fp32 | 128 | 228M |
65
+ | fp16 | 128 | 114M |
66
+ | **fp8_e4m3** | 128 | **51M** |
67
+ | fp8_e5m2 | 128 | 44M |
build_models.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ import shutil
3
+ from textwrap import dedent, indent
4
+ from typing import Any
5
+ import numpy as np
6
+ from zstandard import ZstdCompressor
7
+ from pathlib import Path
8
+ import io
9
+ from sentence_transformers import SentenceTransformer
10
+ from torch.nn import EmbeddingBag
11
+ import torch
12
+ from model2vec import StaticModel
13
+ from tokenizers import Encoding, Tokenizer
14
+
15
+ models_path = Path("models")
16
+
17
+
18
+ @dataclass
19
+ class ModelCard:
20
+ owner: str
21
+ repo: str
22
+ # The dimensions that were applied with Matroyshka Loss.
23
+ matroyshka_dims: list[int]
24
+ description: str
25
+ license: str
26
+
27
+ def name(self):
28
+ return f"{self.owner}/{self.repo}"
29
+
30
+ def path(self):
31
+ return models_path / self.owner / self.repo
32
+
33
+ def get_description(self):
34
+ return dedent(self.description).strip()
35
+
36
+
37
+ def zst_compress_file(input: Path):
38
+ cctx = ZstdCompressor()
39
+ output = input.parent / f"{input.name}.zst"
40
+ print(f"Compressing {output}")
41
+ with open(input, "rb") as fin, open(output, "wb") as fout:
42
+ cctx.copy_stream(fin, fout)
43
+
44
+
45
+ def save_data(path: Path, tensor: torch.Tensor):
46
+ """Writes out the static embeddings to a .npy and .npy.zst file"""
47
+ buffer = io.BytesIO()
48
+
49
+ if tensor.dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
50
+ # Store as the raw bytes.
51
+ np.save(buffer, tensor.detach().view(torch.uint8).numpy())
52
+ else:
53
+ np.save(buffer, tensor.detach().numpy())
54
+
55
+ print(f"Saving {path}")
56
+ with (open(path, "wb") as outfile,):
57
+ outfile.write(buffer.getvalue())
58
+
59
+ zst_compress_file(path)
60
+
61
+
62
+ def quantization_loss_mse(tensor: torch.Tensor, dtype: torch.dtype):
63
+ """
64
+ Compute reconstruction loss when converting embeddings to a datatype and back using
65
+ the mean squared error, which punishes big errors more than small ones.
66
+ """
67
+
68
+ # Original → quantize → dequantize
69
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
70
+
71
+ # Mean squared error
72
+ return torch.mean((tensor - roundtrip) ** 2).item()
73
+
74
+
75
+ def quantization_loss_mae(tensor: torch.Tensor, dtype: torch.dtype):
76
+ """
77
+ Compute reconstruction loss when converting embeddings to a datatype and back using
78
+ the mean absolute error, which is less sensitive to outliers than MSE.
79
+ """
80
+
81
+ # Original → quantize → dequantize
82
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
83
+
84
+ # Mean absolute error
85
+ return torch.mean(torch.abs(tensor - roundtrip)).item()
86
+
87
+
88
+ def quantization_loss_cosine(tensor: torch.Tensor, dtype: torch.dtype):
89
+ """
90
+ Compute reconstruction loss when converting embeddings to a datatype and back using
91
+ cosine similarity. This measures whether the embedding directions are preserved
92
+ after quantization, independent of their magnitudes.
93
+ """
94
+
95
+ # Original → quantize → dequantize
96
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
97
+
98
+ # Flatten both to 2D (num_vectors, dimensions) in case tensor is 1D or higher-D
99
+ if tensor.ndim == 1:
100
+ orig = tensor.unsqueeze(0)
101
+ recon = roundtrip.unsqueeze(0)
102
+ else:
103
+ orig = tensor.view(tensor.shape[0], -1)
104
+ recon = roundtrip.view(roundtrip.shape[0], -1)
105
+
106
+ # Cosine similarity per vector, then average
107
+ cos = torch.nn.functional.cosine_similarity(orig, recon, dim=1)
108
+ return cos.mean().item()
109
+
110
+
111
+ def export_embeddings(model_card: ModelCard, embeddings: torch.Tensor) -> None:
112
+ vocab_size, dimensions = embeddings.shape
113
+
114
+ # This logic can always be adjusted for models with different shapes.
115
+ assert (
116
+ embeddings.dtype == torch.float32
117
+ ), f"The embeddings {embeddings.dtype} are assumed to be float32."
118
+
119
+ for dim in model_card.matroyshka_dims:
120
+ assert (
121
+ dim <= dimensions
122
+ ), f"The Matroyshka dimensions {dim} were bigger than the models dimensions of {dimensions}"
123
+
124
+ truncated = embeddings[:, :dim]
125
+ assert truncated.shape == torch.Size([vocab_size, dim])
126
+
127
+ save_data(model_card.path() / f"fp32.d{dim}.npy", truncated)
128
+ save_data(
129
+ model_card.path() / f"fp16.d{dim}.npy",
130
+ truncated.to(dtype=torch.float16),
131
+ )
132
+ save_data(
133
+ model_card.path() / f"fp8_e5m2.d{dim}.npy",
134
+ truncated.to(dtype=torch.float8_e5m2),
135
+ )
136
+ save_data(
137
+ model_card.path() / f"fp8_e4m3.d{dim}.npy",
138
+ truncated.to(dtype=torch.float8_e4m3fn),
139
+ )
140
+
141
+
142
+ def normalized_mean_pooling(x: torch.Tensor) -> torch.Tensor:
143
+ pooled = x.mean(dim=0)
144
+ normalized = torch.nn.functional.normalize(pooled, dim=0)
145
+ return normalized
146
+
147
+
148
+ def export_readme(
149
+ model_card: ModelCard,
150
+ embeddings: torch.Tensor,
151
+ tokenizer: Tokenizer,
152
+ ):
153
+ vocab_size, dimensions = embeddings.shape
154
+ norms = torch.norm(embeddings, dim=1) # shape: [vocab_size]
155
+
156
+ phrases = [
157
+ "The committee approved the proposal after hours of heated discussion and several last-minute amendments."
158
+ "When training large neural networks, careful tuning of hyperparameters can significantly affect performance and stability."
159
+ "Despite the heavy rain, the concert continued as planned and the crowd stayed enthusiastic until the final encore."
160
+ "In ancient mythology, heroes often embarked on perilous journeys to discover hidden truths about themselves and their world."
161
+ "The new smartphone model features an improved camera system, faster processing, and extended battery life compared to its predecessor."
162
+ "He tried to explain the concept using simple analogies, but the underlying mathematics remained difficult to grasp for most listeners."
163
+ "After weeks of negotiations, the two countries signed a historic trade agreement aimed at reducing tariffs and boosting cooperation."
164
+ "She paused for a moment before answering, choosing her words carefully to avoid misunderstanding in such a delicate situation."
165
+ "The detective pieced together the timeline of events, realizing that the key witness had provided a contradictory statement."
166
+ "Remote work has changed the way teams collaborate, with online tools replacing traditional office routines and in-person meetings."
167
+ ]
168
+
169
+ cosine_similarity = {
170
+ torch.float16: [],
171
+ torch.float8_e4m3fn: [],
172
+ torch.float8_e5m2: [],
173
+ }
174
+
175
+ for phrase in phrases:
176
+ encoding: Encoding = tokenizer.encode(phrase)
177
+ embedded_phrase = embeddings[torch.tensor(encoding.ids, dtype=torch.long)]
178
+
179
+ for dtype in cosine_similarity.keys():
180
+ pooling_unquantized = normalized_mean_pooling(embedded_phrase)
181
+ pooling_roundtrip = normalized_mean_pooling(
182
+ embedded_phrase.to(dtype).to(torch.float32)
183
+ )
184
+ cosine = torch.dot(pooling_unquantized, pooling_roundtrip).item()
185
+ cosine_similarity[dtype].append(cosine)
186
+
187
+ avg_cosine_similarity = {
188
+ dtype: sum(values) / len(values) for dtype, values in cosine_similarity.items()
189
+ }
190
+
191
+ tokenizer_examples = []
192
+ for text in [
193
+ "This is an example of encoding",
194
+ "The quick brown fox jumps over the lazy dog.",
195
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
196
+ "Привет, как дела?",
197
+ "Бързата кафява лисица прескача мързеливото куче.",
198
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
199
+ "اللغة العربية جميلة وغنية بالتاريخ.",
200
+ "مرحبا بالعالم!",
201
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
202
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
203
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
204
+ "コンピュータープログラミング",
205
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
206
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
207
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
208
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
209
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
210
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
211
+ "Hello 世界 مرحبا 🌍",
212
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
213
+ ]:
214
+ encoding = tokenizer.encode(text)
215
+ tokens = [f"`{token}`" for token in encoding.tokens]
216
+
217
+ tokenizer_examples.append(f"**Input:** {text}<br/>")
218
+ tokenizer_examples.append(f"**Tokens**: {' '.join(tokens)}")
219
+ tokenizer_examples.append("")
220
+
221
+ tokenizer_output = "\n".join(tokenizer_examples)
222
+
223
+ with (model_card.path() / "README.md").open("wt") as file:
224
+ prefix = " "
225
+
226
+ file.write(
227
+ dedent(
228
+ f"""
229
+ # [{model_card.name()}](https://huggingface.co/{model_card.name()})
230
+
231
+ License: [{model_card.license}](https://choosealicense.com/licenses/{model_card.license}/)
232
+
233
+ {indent(model_card.get_description(), prefix).strip()}
234
+
235
+ ## Model Stats
236
+
237
+ Stats that describe the embeddings tensor shapes and value distribution.
238
+
239
+ | item | metric | value |
240
+ | --------------| ----------------------- | ----- |
241
+ | vocab | size | {vocab_size:,.0f} |
242
+ | embedding | dimensions | {dimensions:,.0f} |
243
+ | vector length | mean | {norms.mean().item():.2f} |
244
+ | vector length | median | {norms.median().item():.2f} |
245
+ | vector length | stddev | {norms.std().item():.2f} |
246
+ | values | mean | {embeddings.mean().item():.2f} |
247
+ | values | median | {embeddings.median().item():.2f} |
248
+ | values | stddev | {embeddings.std().item():.2f} |
249
+
250
+ ## Mean Pooled Quantization Loss
251
+
252
+ This test roundtrips the vectors through quantization, but performs the
253
+ mean pooling arithmetic in float32 space. The quantized and unquantized
254
+ mean pooled vectors are compared to each other to determine their cosine
255
+ similarity, to show how much the meaning of the vector has changed due
256
+ to quantization.
257
+
258
+ | Precision | Cosine Similarity |
259
+ | ------------- | ----------------- |
260
+ | fp16 | {avg_cosine_similarity[torch.float16]:.5f} |
261
+ | fp8 e4m3 | {avg_cosine_similarity[torch.float8_e4m3fn]:.5f} |
262
+ | fp8 e5m2 | {avg_cosine_similarity[torch.float8_e5m2]:.5f} |
263
+
264
+ ## Quantization Loss Per Vector
265
+
266
+ While ultimately the embedding vectors will be mean pooled together, it's
267
+ still useful to look at the loss per-vector in the embedding table to see
268
+ which quantization strategies retain the most vector meaning.
269
+
270
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
271
+ is preserved after quantization, independent of scale. This is especially
272
+ relevant when embeddings are used for similarity search or retrieval.
273
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
274
+ differences. Useful for detecting whether any values are badly distorted.
275
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
276
+ original and quantized values. Easier to interpret, less sensitive to outliers.
277
+
278
+ | Precision | Metric | Value |
279
+ | ------------- | ------ | ----- |
280
+ | fp16 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float16):.5f} |
281
+ | fp8 e4m3 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float8_e4m3fn):.5f} |
282
+ | fp8 e5m2 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float8_e5m2):.5f} |
283
+ | fp16 | MSE | {quantization_loss_mse(embeddings, torch.float16):.5f} |
284
+ | fp8 e4m3 | MSE | {quantization_loss_mse(embeddings, torch.float8_e4m3fn):.5f} |
285
+ | fp8 e5m2 | MSE | {quantization_loss_mse(embeddings, torch.float8_e5m2):.5f} |
286
+ | fp16 | MAE | {quantization_loss_mae(embeddings, torch.float16):.5f} |
287
+ | fp8 e4m3 | MAE | {quantization_loss_mae(embeddings, torch.float8_e4m3fn):.5f} |
288
+ | fp8 e5m2 | MAE | {quantization_loss_mae(embeddings, torch.float8_e5m2):.5f} |
289
+
290
+ ## Tokenizer Examples
291
+
292
+ {indent(tokenizer_output, prefix).strip()}
293
+ """
294
+ ).strip()
295
+ )
296
+
297
+
298
+ def export_tokenizer(model_card: ModelCard, tokenizer: Tokenizer) -> None:
299
+ tokenizer_path = model_card.path() / "tokenizer.json"
300
+ print(f"Exporting tokenizer: {tokenizer_path}")
301
+ tokenizer.save(str(tokenizer_path))
302
+ zst_compress_file(tokenizer_path)
303
+
304
+
305
+ def export_sentence_transformers(model_card: ModelCard) -> None:
306
+ """Extract the embeddings and tokenizer from SentenceTransformers"""
307
+
308
+ print("Processing", model_card.name())
309
+
310
+ model = SentenceTransformer(model_card.name(), device="cpu")
311
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
312
+ model_card.path().mkdir(exist_ok=True, parents=True)
313
+ embeddings = torch.Tensor(embedding_bag.weight)
314
+
315
+ export_embeddings(model_card, embeddings)
316
+ export_tokenizer(model_card, model.tokenizer)
317
+ export_readme(model_card, embeddings, model.tokenizer)
318
+
319
+
320
+ def export_model2vec(model_card: ModelCard) -> None:
321
+ """Extract the embeddings and tokenizer from model2vec"""
322
+
323
+ print("Processing", model_card.name())
324
+
325
+ model = StaticModel.from_pretrained(model_card.name())
326
+ model_card.path().mkdir(exist_ok=True, parents=True)
327
+ embeddings = torch.from_numpy(model.embedding)
328
+ export_embeddings(model_card, embeddings)
329
+ export_tokenizer(model_card, model.tokenizer)
330
+ export_readme(model_card, embeddings, model.tokenizer)
331
+
332
+
333
+ def main() -> None:
334
+ # Static embedders that use sentence_transformers models.
335
+ sentence_transformers_models = [
336
+ ModelCard(
337
+ owner="sentence-transformers",
338
+ repo="static-similarity-mrl-multilingual-v1",
339
+ description="""
340
+ Multi-lingual similarity embeddings that were trained with Matroyshka loss
341
+ that allows for more effective truncation of the embedding vectors. It
342
+ was trained on a variety of domains of multilingual datasets.
343
+
344
+ It's a general purpose model that can be used for semantic textual similarity,
345
+ paraphrase mining, text classification, clustering, and more
346
+ """,
347
+ matroyshka_dims=[32, 64, 128, 256, 512, 1024],
348
+ license="apache-2.0",
349
+ ),
350
+ ModelCard(
351
+ owner="sentence-transformers",
352
+ repo="static-retrieval-mrl-en-v1",
353
+ description="""
354
+ English-only uncased similarity embeddings that were trained with Matroyshka
355
+ loss that allows for more effective truncation of the embedding vectors. It
356
+ was trained on a variety of domains of monolingual datasets. I was designed
357
+ specifically for similarity retrieval.
358
+ """,
359
+ matroyshka_dims=[32, 64, 128, 256, 512, 1024],
360
+ license="apache-2.0",
361
+ ),
362
+ ]
363
+ # Static embedders that use model2vec.
364
+ model2vec_models = [
365
+ ModelCard(
366
+ owner="minishlab",
367
+ repo="potion-multilingual-128M",
368
+ # These are assumed as their is no python reference implementation:
369
+ matroyshka_dims=[32, 64, 128, 256],
370
+ description="""
371
+ A multilingual embedder. The details are a bit scant on how it's trained as
372
+ there is no source code for it. However, it's likely a close architecture
373
+ to the potion-retrieval-32M model, but trained on Common Crawl data.
374
+
375
+ The 128M references the number of parameters in the embeddings:
376
+
377
+ 256 dimensions * 500,353 vocab.
378
+ """,
379
+ license="mit",
380
+ ),
381
+ ModelCard(
382
+ owner="minishlab",
383
+ repo="potion-retrieval-32M",
384
+ matroyshka_dims=[32, 64, 128, 256, 512],
385
+ description="""
386
+ The token embeddings from a monolingual English 32M parameter model that was
387
+ distilled from embeddings that were initialized from the the multi-domain
388
+ [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
389
+
390
+ The 32M references the number of parameters in the embeddings:
391
+
392
+ 512 dimension * 63,091 vocab.
393
+ """,
394
+ license="mit",
395
+ ),
396
+ ]
397
+
398
+ if models_path.exists():
399
+ print(f"Removing the old models folder: {models_path}")
400
+ shutil.rmtree(models_path)
401
+ models_path.mkdir()
402
+
403
+ for model_card in sentence_transformers_models:
404
+ export_sentence_transformers(model_card)
405
+
406
+ for model_card in model2vec_models:
407
+ export_model2vec(model_card)
408
+
409
+
410
+ if __name__ == "__main__":
411
+ main()
example.mjs ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { pipeline, AutoTokenizer, AutoModel, TokenizerModel, PreTrainedTokenizer } from '@huggingface/transformers';
2
+ import fs from 'node:fs/promises';
3
+ import { constants } from 'node:fs';
4
+ import path from 'path';
5
+ import { fileURLToPath } from 'url';
6
+
7
+ const DIR = path.dirname(fileURLToPath(import.meta.url));
8
+
9
+ await main()
10
+
11
+ async function main() {
12
+ const url = "https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1/resolve/main/0_StaticEmbedding/tokenizer.json"
13
+
14
+ const config = await ensureTokenizerJson(url)
15
+ const tokenizer = new PreTrainedTokenizer(config, {})
16
+
17
+ const examples = [
18
+ "This is an example of encoding",
19
+ "The quick brown fox jumps over the lazy dog.",
20
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
21
+ "Привет, как дела?",
22
+ "Бързата кафява лисица прескача мързеливото куче.",
23
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
24
+ "اللغة العربية جميلة وغنية بالتاريخ.",
25
+ "مرحبا بالعالم!",
26
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
27
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
28
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
29
+ "コンピュータープログラミング",
30
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
31
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
32
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
33
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
34
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
35
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
36
+ // Mixed scripts:
37
+ "Hello 世界 مرحبا 🌍",
38
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
39
+ ];
40
+ for (const example of examples) {
41
+ console.log(tokenizer.tokenize(example))
42
+ }
43
+ }
44
+
45
+ /**
46
+ * @param {string} path
47
+ * @returns {Promise<string>}
48
+ */
49
+ async function loadJSON(path) {
50
+ return JSON.parse(await fs.readFile(path, { encoding: 'utf8' }));
51
+ }
52
+
53
+ /**
54
+ * Download tokenizer.json if it does not already exist.
55
+ *
56
+ * @param {string} url - The URL to download tokenizer.json from
57
+ * @returns {Promise<any>} - Path to tokenizer.json
58
+ */
59
+ export async function ensureTokenizerJson(url) {
60
+ const tokenizerPath = path.join(DIR, 'tokenizer.json');
61
+
62
+ try {
63
+ await fs.access(tokenizerPath, constants.F_OK);
64
+ console.log('Using', tokenizerPath);
65
+ return loadJSON(tokenizerPath);
66
+ } catch {}
67
+
68
+ console.log("Downloading", url);
69
+ const response = await fetch(url);
70
+ const data = Buffer.from(await response.arrayBuffer());
71
+ await fs.writeFile(tokenizerPath, data);
72
+
73
+ return loadJSON(tokenizerPath);
74
+
75
+ }
experiments/multilingual.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from tokenizers import Encoding, Tokenizer
3
+ from torch.nn import EmbeddingBag
4
+ import torch
5
+
6
+
7
+ def test_tokenizer():
8
+ examples = [
9
+ "This is an example of encoding",
10
+ "The quick brown fox jumps over the lazy dog.",
11
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
12
+ "Привет, как дела?",
13
+ "Бързата кафява лисица прескача мързеливото куче.",
14
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
15
+ "اللغة العربية جميلة وغنية بالتاريخ.",
16
+ "مرحبا بالعالم!",
17
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
18
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
19
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
20
+ "コンピュータープログラミング",
21
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
22
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
23
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
24
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
25
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
26
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
27
+ "Hello 世界 مرحبا 🌍",
28
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
29
+ ]
30
+
31
+ tokenizer: Tokenizer = Tokenizer.from_file("js/tokenizer.json")
32
+
33
+ for example in examples:
34
+ encoding: Encoding = tokenizer.encode(example)
35
+ print(example)
36
+ print(encoding.tokens)
37
+ print()
38
+
39
+
40
+ # https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1
41
+ model = SentenceTransformer(
42
+ "sentence-transformers/static-similarity-mrl-multilingual-v1", device="cpu"
43
+ )
44
+
45
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
46
+ embeddings = torch.Tensor(embedding_bag.weight)
47
+
48
+ print(embeddings.shape)
49
+ assert embeddings.shape == torch.Size([105879, 1024])
50
+
51
+ print("float32")
52
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB")
53
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB")
54
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB")
55
+
56
+ print("float16")
57
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB")
58
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB")
59
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB")
experiments/potion.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model2vec import StaticModel
2
+ from tokenizers import Tokenizer
3
+ import torch
4
+
5
+ model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M")
6
+ embeddings = torch.from_numpy(model.embedding)
7
+
8
+ print("Embedding shape:", embeddings.shape)
9
+ bytes = embeddings.shape[0] * embeddings.shape[1] * 4
10
+
11
+ print("MiB:", bytes / 1024 / 1024)
12
+
13
+ tokenizer: Tokenizer = model.tokenizer
14
+ print(tokenizer.to_str())
experiments/tomaarsen.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from torch.nn import EmbeddingBag
3
+ import torch
4
+
5
+ model = SentenceTransformer("tomaarsen/static-retrieval-mrl-en-v1")
6
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
7
+ embeddings = torch.Tensor(embedding_bag.weight)
8
+
9
+ assert embeddings.shape == torch.Size([30522, 1024])
10
+
11
+ print(f"1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB:")
12
+ print(f"512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB:")
13
+ print(f"256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB:")
14
+
15
+ print("Embeddings[0]", embeddings[0])
js/example.mjs ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { pipeline, AutoTokenizer, AutoModel, TokenizerModel, PreTrainedTokenizer } from '@huggingface/transformers';
2
+ import fs from 'node:fs/promises';
3
+ import { constants } from 'node:fs';
4
+ import path from 'path';
5
+ import { fileURLToPath } from 'url';
6
+
7
+ const DIR = path.dirname(fileURLToPath(import.meta.url));
8
+
9
+ await main()
10
+
11
+ async function main() {
12
+ const url = "https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1/resolve/main/0_StaticEmbedding/tokenizer.json"
13
+
14
+ const config = await ensureTokenizerJson(url)
15
+ const tokenizer = new PreTrainedTokenizer(config, {})
16
+
17
+ const examples = [
18
+ "This is an example of encoding",
19
+ "The quick brown fox jumps over the lazy dog.",
20
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
21
+ "Привет, как дела?",
22
+ "Бързата кафява лисица прескача мързеливото куче.",
23
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
24
+ "اللغة العربية جميلة وغنية بالتاريخ.",
25
+ "مرحبا بالعالم!",
26
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
27
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
28
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
29
+ "コンピュータープログラミング",
30
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
31
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
32
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
33
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
34
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
35
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
36
+ // Mixed scripts:
37
+ "Hello 世界 مرحبا 🌍",
38
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
39
+ ];
40
+ for (const example of examples) {
41
+ console.log(tokenizer.tokenize(example))
42
+ }
43
+ }
44
+
45
+ /**
46
+ * @param {string} path
47
+ * @returns {Promise<string>}
48
+ */
49
+ async function loadJSON(path) {
50
+ return JSON.parse(await fs.readFile(path, { encoding: 'utf8' }));
51
+ }
52
+
53
+ /**
54
+ * Download tokenizer.json if it does not already exist.
55
+ *
56
+ * @param {string} url - The URL to download tokenizer.json from
57
+ * @returns {Promise<any>} - Path to tokenizer.json
58
+ */
59
+ export async function ensureTokenizerJson(url) {
60
+ const tokenizerPath = path.join(DIR, 'tokenizer.json');
61
+
62
+ try {
63
+ await fs.access(tokenizerPath, constants.F_OK);
64
+ console.log('Using', tokenizerPath);
65
+ return loadJSON(tokenizerPath);
66
+ } catch {}
67
+
68
+ console.log("Downloading", url);
69
+ const response = await fetch(url);
70
+ const data = Buffer.from(await response.arrayBuffer());
71
+ await fs.writeFile(tokenizerPath, data);
72
+
73
+ return loadJSON(tokenizerPath);
74
+
75
+ }
js/package-lock.json ADDED
@@ -0,0 +1,1067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "js",
3
+ "version": "1.0.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "js",
9
+ "version": "1.0.0",
10
+ "license": "ISC",
11
+ "dependencies": {
12
+ "@huggingface/transformers": "^3.7.2"
13
+ }
14
+ },
15
+ "node_modules/@emnapi/runtime": {
16
+ "version": "1.5.0",
17
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz",
18
+ "integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==",
19
+ "license": "MIT",
20
+ "optional": true,
21
+ "dependencies": {
22
+ "tslib": "^2.4.0"
23
+ }
24
+ },
25
+ "node_modules/@huggingface/jinja": {
26
+ "version": "0.5.1",
27
+ "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
28
+ "integrity": "sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==",
29
+ "license": "MIT",
30
+ "engines": {
31
+ "node": ">=18"
32
+ }
33
+ },
34
+ "node_modules/@huggingface/transformers": {
35
+ "version": "3.7.2",
36
+ "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.7.2.tgz",
37
+ "integrity": "sha512-6SOxo6XziupnQ5Vs5vbbs74CNB6ViHLHGQJjY6zj88JeiDtJ2d/ADKxaay688Sf2KcjtdF3dyBL11C5pJS2NxQ==",
38
+ "license": "Apache-2.0",
39
+ "dependencies": {
40
+ "@huggingface/jinja": "^0.5.1",
41
+ "onnxruntime-node": "1.21.0",
42
+ "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4",
43
+ "sharp": "^0.34.1"
44
+ }
45
+ },
46
+ "node_modules/@img/sharp-darwin-arm64": {
47
+ "version": "0.34.3",
48
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz",
49
+ "integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==",
50
+ "cpu": [
51
+ "arm64"
52
+ ],
53
+ "license": "Apache-2.0",
54
+ "optional": true,
55
+ "os": [
56
+ "darwin"
57
+ ],
58
+ "engines": {
59
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
60
+ },
61
+ "funding": {
62
+ "url": "https://opencollective.com/libvips"
63
+ },
64
+ "optionalDependencies": {
65
+ "@img/sharp-libvips-darwin-arm64": "1.2.0"
66
+ }
67
+ },
68
+ "node_modules/@img/sharp-darwin-x64": {
69
+ "version": "0.34.3",
70
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz",
71
+ "integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==",
72
+ "cpu": [
73
+ "x64"
74
+ ],
75
+ "license": "Apache-2.0",
76
+ "optional": true,
77
+ "os": [
78
+ "darwin"
79
+ ],
80
+ "engines": {
81
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
82
+ },
83
+ "funding": {
84
+ "url": "https://opencollective.com/libvips"
85
+ },
86
+ "optionalDependencies": {
87
+ "@img/sharp-libvips-darwin-x64": "1.2.0"
88
+ }
89
+ },
90
+ "node_modules/@img/sharp-libvips-darwin-arm64": {
91
+ "version": "1.2.0",
92
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz",
93
+ "integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==",
94
+ "cpu": [
95
+ "arm64"
96
+ ],
97
+ "license": "LGPL-3.0-or-later",
98
+ "optional": true,
99
+ "os": [
100
+ "darwin"
101
+ ],
102
+ "funding": {
103
+ "url": "https://opencollective.com/libvips"
104
+ }
105
+ },
106
+ "node_modules/@img/sharp-libvips-darwin-x64": {
107
+ "version": "1.2.0",
108
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz",
109
+ "integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==",
110
+ "cpu": [
111
+ "x64"
112
+ ],
113
+ "license": "LGPL-3.0-or-later",
114
+ "optional": true,
115
+ "os": [
116
+ "darwin"
117
+ ],
118
+ "funding": {
119
+ "url": "https://opencollective.com/libvips"
120
+ }
121
+ },
122
+ "node_modules/@img/sharp-libvips-linux-arm": {
123
+ "version": "1.2.0",
124
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz",
125
+ "integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==",
126
+ "cpu": [
127
+ "arm"
128
+ ],
129
+ "license": "LGPL-3.0-or-later",
130
+ "optional": true,
131
+ "os": [
132
+ "linux"
133
+ ],
134
+ "funding": {
135
+ "url": "https://opencollective.com/libvips"
136
+ }
137
+ },
138
+ "node_modules/@img/sharp-libvips-linux-arm64": {
139
+ "version": "1.2.0",
140
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz",
141
+ "integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==",
142
+ "cpu": [
143
+ "arm64"
144
+ ],
145
+ "license": "LGPL-3.0-or-later",
146
+ "optional": true,
147
+ "os": [
148
+ "linux"
149
+ ],
150
+ "funding": {
151
+ "url": "https://opencollective.com/libvips"
152
+ }
153
+ },
154
+ "node_modules/@img/sharp-libvips-linux-ppc64": {
155
+ "version": "1.2.0",
156
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz",
157
+ "integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==",
158
+ "cpu": [
159
+ "ppc64"
160
+ ],
161
+ "license": "LGPL-3.0-or-later",
162
+ "optional": true,
163
+ "os": [
164
+ "linux"
165
+ ],
166
+ "funding": {
167
+ "url": "https://opencollective.com/libvips"
168
+ }
169
+ },
170
+ "node_modules/@img/sharp-libvips-linux-s390x": {
171
+ "version": "1.2.0",
172
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz",
173
+ "integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==",
174
+ "cpu": [
175
+ "s390x"
176
+ ],
177
+ "license": "LGPL-3.0-or-later",
178
+ "optional": true,
179
+ "os": [
180
+ "linux"
181
+ ],
182
+ "funding": {
183
+ "url": "https://opencollective.com/libvips"
184
+ }
185
+ },
186
+ "node_modules/@img/sharp-libvips-linux-x64": {
187
+ "version": "1.2.0",
188
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz",
189
+ "integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==",
190
+ "cpu": [
191
+ "x64"
192
+ ],
193
+ "license": "LGPL-3.0-or-later",
194
+ "optional": true,
195
+ "os": [
196
+ "linux"
197
+ ],
198
+ "funding": {
199
+ "url": "https://opencollective.com/libvips"
200
+ }
201
+ },
202
+ "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
203
+ "version": "1.2.0",
204
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz",
205
+ "integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==",
206
+ "cpu": [
207
+ "arm64"
208
+ ],
209
+ "license": "LGPL-3.0-or-later",
210
+ "optional": true,
211
+ "os": [
212
+ "linux"
213
+ ],
214
+ "funding": {
215
+ "url": "https://opencollective.com/libvips"
216
+ }
217
+ },
218
+ "node_modules/@img/sharp-libvips-linuxmusl-x64": {
219
+ "version": "1.2.0",
220
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz",
221
+ "integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==",
222
+ "cpu": [
223
+ "x64"
224
+ ],
225
+ "license": "LGPL-3.0-or-later",
226
+ "optional": true,
227
+ "os": [
228
+ "linux"
229
+ ],
230
+ "funding": {
231
+ "url": "https://opencollective.com/libvips"
232
+ }
233
+ },
234
+ "node_modules/@img/sharp-linux-arm": {
235
+ "version": "0.34.3",
236
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz",
237
+ "integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==",
238
+ "cpu": [
239
+ "arm"
240
+ ],
241
+ "license": "Apache-2.0",
242
+ "optional": true,
243
+ "os": [
244
+ "linux"
245
+ ],
246
+ "engines": {
247
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
248
+ },
249
+ "funding": {
250
+ "url": "https://opencollective.com/libvips"
251
+ },
252
+ "optionalDependencies": {
253
+ "@img/sharp-libvips-linux-arm": "1.2.0"
254
+ }
255
+ },
256
+ "node_modules/@img/sharp-linux-arm64": {
257
+ "version": "0.34.3",
258
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz",
259
+ "integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==",
260
+ "cpu": [
261
+ "arm64"
262
+ ],
263
+ "license": "Apache-2.0",
264
+ "optional": true,
265
+ "os": [
266
+ "linux"
267
+ ],
268
+ "engines": {
269
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
270
+ },
271
+ "funding": {
272
+ "url": "https://opencollective.com/libvips"
273
+ },
274
+ "optionalDependencies": {
275
+ "@img/sharp-libvips-linux-arm64": "1.2.0"
276
+ }
277
+ },
278
+ "node_modules/@img/sharp-linux-ppc64": {
279
+ "version": "0.34.3",
280
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz",
281
+ "integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==",
282
+ "cpu": [
283
+ "ppc64"
284
+ ],
285
+ "license": "Apache-2.0",
286
+ "optional": true,
287
+ "os": [
288
+ "linux"
289
+ ],
290
+ "engines": {
291
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
292
+ },
293
+ "funding": {
294
+ "url": "https://opencollective.com/libvips"
295
+ },
296
+ "optionalDependencies": {
297
+ "@img/sharp-libvips-linux-ppc64": "1.2.0"
298
+ }
299
+ },
300
+ "node_modules/@img/sharp-linux-s390x": {
301
+ "version": "0.34.3",
302
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz",
303
+ "integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==",
304
+ "cpu": [
305
+ "s390x"
306
+ ],
307
+ "license": "Apache-2.0",
308
+ "optional": true,
309
+ "os": [
310
+ "linux"
311
+ ],
312
+ "engines": {
313
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
314
+ },
315
+ "funding": {
316
+ "url": "https://opencollective.com/libvips"
317
+ },
318
+ "optionalDependencies": {
319
+ "@img/sharp-libvips-linux-s390x": "1.2.0"
320
+ }
321
+ },
322
+ "node_modules/@img/sharp-linux-x64": {
323
+ "version": "0.34.3",
324
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz",
325
+ "integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==",
326
+ "cpu": [
327
+ "x64"
328
+ ],
329
+ "license": "Apache-2.0",
330
+ "optional": true,
331
+ "os": [
332
+ "linux"
333
+ ],
334
+ "engines": {
335
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
336
+ },
337
+ "funding": {
338
+ "url": "https://opencollective.com/libvips"
339
+ },
340
+ "optionalDependencies": {
341
+ "@img/sharp-libvips-linux-x64": "1.2.0"
342
+ }
343
+ },
344
+ "node_modules/@img/sharp-linuxmusl-arm64": {
345
+ "version": "0.34.3",
346
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz",
347
+ "integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==",
348
+ "cpu": [
349
+ "arm64"
350
+ ],
351
+ "license": "Apache-2.0",
352
+ "optional": true,
353
+ "os": [
354
+ "linux"
355
+ ],
356
+ "engines": {
357
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
358
+ },
359
+ "funding": {
360
+ "url": "https://opencollective.com/libvips"
361
+ },
362
+ "optionalDependencies": {
363
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0"
364
+ }
365
+ },
366
+ "node_modules/@img/sharp-linuxmusl-x64": {
367
+ "version": "0.34.3",
368
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz",
369
+ "integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==",
370
+ "cpu": [
371
+ "x64"
372
+ ],
373
+ "license": "Apache-2.0",
374
+ "optional": true,
375
+ "os": [
376
+ "linux"
377
+ ],
378
+ "engines": {
379
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
380
+ },
381
+ "funding": {
382
+ "url": "https://opencollective.com/libvips"
383
+ },
384
+ "optionalDependencies": {
385
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0"
386
+ }
387
+ },
388
+ "node_modules/@img/sharp-wasm32": {
389
+ "version": "0.34.3",
390
+ "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz",
391
+ "integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==",
392
+ "cpu": [
393
+ "wasm32"
394
+ ],
395
+ "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
396
+ "optional": true,
397
+ "dependencies": {
398
+ "@emnapi/runtime": "^1.4.4"
399
+ },
400
+ "engines": {
401
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
402
+ },
403
+ "funding": {
404
+ "url": "https://opencollective.com/libvips"
405
+ }
406
+ },
407
+ "node_modules/@img/sharp-win32-arm64": {
408
+ "version": "0.34.3",
409
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz",
410
+ "integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==",
411
+ "cpu": [
412
+ "arm64"
413
+ ],
414
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
415
+ "optional": true,
416
+ "os": [
417
+ "win32"
418
+ ],
419
+ "engines": {
420
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
421
+ },
422
+ "funding": {
423
+ "url": "https://opencollective.com/libvips"
424
+ }
425
+ },
426
+ "node_modules/@img/sharp-win32-ia32": {
427
+ "version": "0.34.3",
428
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz",
429
+ "integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==",
430
+ "cpu": [
431
+ "ia32"
432
+ ],
433
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
434
+ "optional": true,
435
+ "os": [
436
+ "win32"
437
+ ],
438
+ "engines": {
439
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
440
+ },
441
+ "funding": {
442
+ "url": "https://opencollective.com/libvips"
443
+ }
444
+ },
445
+ "node_modules/@img/sharp-win32-x64": {
446
+ "version": "0.34.3",
447
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz",
448
+ "integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==",
449
+ "cpu": [
450
+ "x64"
451
+ ],
452
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
453
+ "optional": true,
454
+ "os": [
455
+ "win32"
456
+ ],
457
+ "engines": {
458
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
459
+ },
460
+ "funding": {
461
+ "url": "https://opencollective.com/libvips"
462
+ }
463
+ },
464
+ "node_modules/@isaacs/fs-minipass": {
465
+ "version": "4.0.1",
466
+ "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
467
+ "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
468
+ "license": "ISC",
469
+ "dependencies": {
470
+ "minipass": "^7.0.4"
471
+ },
472
+ "engines": {
473
+ "node": ">=18.0.0"
474
+ }
475
+ },
476
+ "node_modules/@protobufjs/aspromise": {
477
+ "version": "1.1.2",
478
+ "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
479
+ "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
480
+ "license": "BSD-3-Clause"
481
+ },
482
+ "node_modules/@protobufjs/base64": {
483
+ "version": "1.1.2",
484
+ "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
485
+ "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
486
+ "license": "BSD-3-Clause"
487
+ },
488
+ "node_modules/@protobufjs/codegen": {
489
+ "version": "2.0.4",
490
+ "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
491
+ "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
492
+ "license": "BSD-3-Clause"
493
+ },
494
+ "node_modules/@protobufjs/eventemitter": {
495
+ "version": "1.1.0",
496
+ "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
497
+ "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
498
+ "license": "BSD-3-Clause"
499
+ },
500
+ "node_modules/@protobufjs/fetch": {
501
+ "version": "1.1.0",
502
+ "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
503
+ "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
504
+ "license": "BSD-3-Clause",
505
+ "dependencies": {
506
+ "@protobufjs/aspromise": "^1.1.1",
507
+ "@protobufjs/inquire": "^1.1.0"
508
+ }
509
+ },
510
+ "node_modules/@protobufjs/float": {
511
+ "version": "1.0.2",
512
+ "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
513
+ "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
514
+ "license": "BSD-3-Clause"
515
+ },
516
+ "node_modules/@protobufjs/inquire": {
517
+ "version": "1.1.0",
518
+ "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
519
+ "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
520
+ "license": "BSD-3-Clause"
521
+ },
522
+ "node_modules/@protobufjs/path": {
523
+ "version": "1.1.2",
524
+ "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
525
+ "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
526
+ "license": "BSD-3-Clause"
527
+ },
528
+ "node_modules/@protobufjs/pool": {
529
+ "version": "1.1.0",
530
+ "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
531
+ "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
532
+ "license": "BSD-3-Clause"
533
+ },
534
+ "node_modules/@protobufjs/utf8": {
535
+ "version": "1.1.0",
536
+ "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
537
+ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
538
+ "license": "BSD-3-Clause"
539
+ },
540
+ "node_modules/@types/node": {
541
+ "version": "24.3.1",
542
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.1.tgz",
543
+ "integrity": "sha512-3vXmQDXy+woz+gnrTvuvNrPzekOi+Ds0ReMxw0LzBiK3a+1k0kQn9f2NWk+lgD4rJehFUmYy2gMhJ2ZI+7YP9g==",
544
+ "license": "MIT",
545
+ "dependencies": {
546
+ "undici-types": "~7.10.0"
547
+ }
548
+ },
549
+ "node_modules/boolean": {
550
+ "version": "3.2.0",
551
+ "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
552
+ "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
553
+ "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.",
554
+ "license": "MIT"
555
+ },
556
+ "node_modules/chownr": {
557
+ "version": "3.0.0",
558
+ "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
559
+ "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
560
+ "license": "BlueOak-1.0.0",
561
+ "engines": {
562
+ "node": ">=18"
563
+ }
564
+ },
565
+ "node_modules/color": {
566
+ "version": "4.2.3",
567
+ "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
568
+ "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
569
+ "license": "MIT",
570
+ "dependencies": {
571
+ "color-convert": "^2.0.1",
572
+ "color-string": "^1.9.0"
573
+ },
574
+ "engines": {
575
+ "node": ">=12.5.0"
576
+ }
577
+ },
578
+ "node_modules/color-convert": {
579
+ "version": "2.0.1",
580
+ "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
581
+ "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
582
+ "license": "MIT",
583
+ "dependencies": {
584
+ "color-name": "~1.1.4"
585
+ },
586
+ "engines": {
587
+ "node": ">=7.0.0"
588
+ }
589
+ },
590
+ "node_modules/color-name": {
591
+ "version": "1.1.4",
592
+ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
593
+ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
594
+ "license": "MIT"
595
+ },
596
+ "node_modules/color-string": {
597
+ "version": "1.9.1",
598
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
599
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
600
+ "license": "MIT",
601
+ "dependencies": {
602
+ "color-name": "^1.0.0",
603
+ "simple-swizzle": "^0.2.2"
604
+ }
605
+ },
606
+ "node_modules/define-data-property": {
607
+ "version": "1.1.4",
608
+ "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
609
+ "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
610
+ "license": "MIT",
611
+ "dependencies": {
612
+ "es-define-property": "^1.0.0",
613
+ "es-errors": "^1.3.0",
614
+ "gopd": "^1.0.1"
615
+ },
616
+ "engines": {
617
+ "node": ">= 0.4"
618
+ },
619
+ "funding": {
620
+ "url": "https://github.com/sponsors/ljharb"
621
+ }
622
+ },
623
+ "node_modules/define-properties": {
624
+ "version": "1.2.1",
625
+ "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
626
+ "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
627
+ "license": "MIT",
628
+ "dependencies": {
629
+ "define-data-property": "^1.0.1",
630
+ "has-property-descriptors": "^1.0.0",
631
+ "object-keys": "^1.1.1"
632
+ },
633
+ "engines": {
634
+ "node": ">= 0.4"
635
+ },
636
+ "funding": {
637
+ "url": "https://github.com/sponsors/ljharb"
638
+ }
639
+ },
640
+ "node_modules/detect-libc": {
641
+ "version": "2.0.4",
642
+ "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz",
643
+ "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==",
644
+ "license": "Apache-2.0",
645
+ "engines": {
646
+ "node": ">=8"
647
+ }
648
+ },
649
+ "node_modules/detect-node": {
650
+ "version": "2.1.0",
651
+ "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
652
+ "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
653
+ "license": "MIT"
654
+ },
655
+ "node_modules/es-define-property": {
656
+ "version": "1.0.1",
657
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
658
+ "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
659
+ "license": "MIT",
660
+ "engines": {
661
+ "node": ">= 0.4"
662
+ }
663
+ },
664
+ "node_modules/es-errors": {
665
+ "version": "1.3.0",
666
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
667
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
668
+ "license": "MIT",
669
+ "engines": {
670
+ "node": ">= 0.4"
671
+ }
672
+ },
673
+ "node_modules/es6-error": {
674
+ "version": "4.1.1",
675
+ "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
676
+ "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==",
677
+ "license": "MIT"
678
+ },
679
+ "node_modules/escape-string-regexp": {
680
+ "version": "4.0.0",
681
+ "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
682
+ "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
683
+ "license": "MIT",
684
+ "engines": {
685
+ "node": ">=10"
686
+ },
687
+ "funding": {
688
+ "url": "https://github.com/sponsors/sindresorhus"
689
+ }
690
+ },
691
+ "node_modules/flatbuffers": {
692
+ "version": "25.2.10",
693
+ "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.2.10.tgz",
694
+ "integrity": "sha512-7JlN9ZvLDG1McO3kbX0k4v+SUAg48L1rIwEvN6ZQl/eCtgJz9UylTMzE9wrmYrcorgxm3CX/3T/w5VAub99UUw==",
695
+ "license": "Apache-2.0"
696
+ },
697
+ "node_modules/global-agent": {
698
+ "version": "3.0.0",
699
+ "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
700
+ "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
701
+ "license": "BSD-3-Clause",
702
+ "dependencies": {
703
+ "boolean": "^3.0.1",
704
+ "es6-error": "^4.1.1",
705
+ "matcher": "^3.0.0",
706
+ "roarr": "^2.15.3",
707
+ "semver": "^7.3.2",
708
+ "serialize-error": "^7.0.1"
709
+ },
710
+ "engines": {
711
+ "node": ">=10.0"
712
+ }
713
+ },
714
+ "node_modules/globalthis": {
715
+ "version": "1.0.4",
716
+ "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
717
+ "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
718
+ "license": "MIT",
719
+ "dependencies": {
720
+ "define-properties": "^1.2.1",
721
+ "gopd": "^1.0.1"
722
+ },
723
+ "engines": {
724
+ "node": ">= 0.4"
725
+ },
726
+ "funding": {
727
+ "url": "https://github.com/sponsors/ljharb"
728
+ }
729
+ },
730
+ "node_modules/gopd": {
731
+ "version": "1.2.0",
732
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
733
+ "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
734
+ "license": "MIT",
735
+ "engines": {
736
+ "node": ">= 0.4"
737
+ },
738
+ "funding": {
739
+ "url": "https://github.com/sponsors/ljharb"
740
+ }
741
+ },
742
+ "node_modules/guid-typescript": {
743
+ "version": "1.0.9",
744
+ "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
745
+ "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
746
+ "license": "ISC"
747
+ },
748
+ "node_modules/has-property-descriptors": {
749
+ "version": "1.0.2",
750
+ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
751
+ "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
752
+ "license": "MIT",
753
+ "dependencies": {
754
+ "es-define-property": "^1.0.0"
755
+ },
756
+ "funding": {
757
+ "url": "https://github.com/sponsors/ljharb"
758
+ }
759
+ },
760
+ "node_modules/is-arrayish": {
761
+ "version": "0.3.2",
762
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
763
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==",
764
+ "license": "MIT"
765
+ },
766
+ "node_modules/json-stringify-safe": {
767
+ "version": "5.0.1",
768
+ "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
769
+ "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
770
+ "license": "ISC"
771
+ },
772
+ "node_modules/long": {
773
+ "version": "5.3.2",
774
+ "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
775
+ "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
776
+ "license": "Apache-2.0"
777
+ },
778
+ "node_modules/matcher": {
779
+ "version": "3.0.0",
780
+ "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
781
+ "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
782
+ "license": "MIT",
783
+ "dependencies": {
784
+ "escape-string-regexp": "^4.0.0"
785
+ },
786
+ "engines": {
787
+ "node": ">=10"
788
+ }
789
+ },
790
+ "node_modules/minipass": {
791
+ "version": "7.1.2",
792
+ "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
793
+ "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
794
+ "license": "ISC",
795
+ "engines": {
796
+ "node": ">=16 || 14 >=14.17"
797
+ }
798
+ },
799
+ "node_modules/minizlib": {
800
+ "version": "3.0.2",
801
+ "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
802
+ "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
803
+ "license": "MIT",
804
+ "dependencies": {
805
+ "minipass": "^7.1.2"
806
+ },
807
+ "engines": {
808
+ "node": ">= 18"
809
+ }
810
+ },
811
+ "node_modules/mkdirp": {
812
+ "version": "3.0.1",
813
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
814
+ "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
815
+ "license": "MIT",
816
+ "bin": {
817
+ "mkdirp": "dist/cjs/src/bin.js"
818
+ },
819
+ "engines": {
820
+ "node": ">=10"
821
+ },
822
+ "funding": {
823
+ "url": "https://github.com/sponsors/isaacs"
824
+ }
825
+ },
826
+ "node_modules/object-keys": {
827
+ "version": "1.1.1",
828
+ "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
829
+ "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
830
+ "license": "MIT",
831
+ "engines": {
832
+ "node": ">= 0.4"
833
+ }
834
+ },
835
+ "node_modules/onnxruntime-common": {
836
+ "version": "1.21.0",
837
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.21.0.tgz",
838
+ "integrity": "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==",
839
+ "license": "MIT"
840
+ },
841
+ "node_modules/onnxruntime-node": {
842
+ "version": "1.21.0",
843
+ "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.21.0.tgz",
844
+ "integrity": "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==",
845
+ "hasInstallScript": true,
846
+ "license": "MIT",
847
+ "os": [
848
+ "win32",
849
+ "darwin",
850
+ "linux"
851
+ ],
852
+ "dependencies": {
853
+ "global-agent": "^3.0.0",
854
+ "onnxruntime-common": "1.21.0",
855
+ "tar": "^7.0.1"
856
+ }
857
+ },
858
+ "node_modules/onnxruntime-web": {
859
+ "version": "1.22.0-dev.20250409-89f8206ba4",
860
+ "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.22.0-dev.20250409-89f8206ba4.tgz",
861
+ "integrity": "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==",
862
+ "license": "MIT",
863
+ "dependencies": {
864
+ "flatbuffers": "^25.1.24",
865
+ "guid-typescript": "^1.0.9",
866
+ "long": "^5.2.3",
867
+ "onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4",
868
+ "platform": "^1.3.6",
869
+ "protobufjs": "^7.2.4"
870
+ }
871
+ },
872
+ "node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
873
+ "version": "1.22.0-dev.20250409-89f8206ba4",
874
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.22.0-dev.20250409-89f8206ba4.tgz",
875
+ "integrity": "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==",
876
+ "license": "MIT"
877
+ },
878
+ "node_modules/platform": {
879
+ "version": "1.3.6",
880
+ "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
881
+ "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
882
+ "license": "MIT"
883
+ },
884
+ "node_modules/protobufjs": {
885
+ "version": "7.5.4",
886
+ "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
887
+ "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
888
+ "hasInstallScript": true,
889
+ "license": "BSD-3-Clause",
890
+ "dependencies": {
891
+ "@protobufjs/aspromise": "^1.1.2",
892
+ "@protobufjs/base64": "^1.1.2",
893
+ "@protobufjs/codegen": "^2.0.4",
894
+ "@protobufjs/eventemitter": "^1.1.0",
895
+ "@protobufjs/fetch": "^1.1.0",
896
+ "@protobufjs/float": "^1.0.2",
897
+ "@protobufjs/inquire": "^1.1.0",
898
+ "@protobufjs/path": "^1.1.2",
899
+ "@protobufjs/pool": "^1.1.0",
900
+ "@protobufjs/utf8": "^1.1.0",
901
+ "@types/node": ">=13.7.0",
902
+ "long": "^5.0.0"
903
+ },
904
+ "engines": {
905
+ "node": ">=12.0.0"
906
+ }
907
+ },
908
+ "node_modules/roarr": {
909
+ "version": "2.15.4",
910
+ "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
911
+ "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
912
+ "license": "BSD-3-Clause",
913
+ "dependencies": {
914
+ "boolean": "^3.0.1",
915
+ "detect-node": "^2.0.4",
916
+ "globalthis": "^1.0.1",
917
+ "json-stringify-safe": "^5.0.1",
918
+ "semver-compare": "^1.0.0",
919
+ "sprintf-js": "^1.1.2"
920
+ },
921
+ "engines": {
922
+ "node": ">=8.0"
923
+ }
924
+ },
925
+ "node_modules/semver": {
926
+ "version": "7.7.2",
927
+ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
928
+ "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
929
+ "license": "ISC",
930
+ "bin": {
931
+ "semver": "bin/semver.js"
932
+ },
933
+ "engines": {
934
+ "node": ">=10"
935
+ }
936
+ },
937
+ "node_modules/semver-compare": {
938
+ "version": "1.0.0",
939
+ "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
940
+ "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==",
941
+ "license": "MIT"
942
+ },
943
+ "node_modules/serialize-error": {
944
+ "version": "7.0.1",
945
+ "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
946
+ "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
947
+ "license": "MIT",
948
+ "dependencies": {
949
+ "type-fest": "^0.13.1"
950
+ },
951
+ "engines": {
952
+ "node": ">=10"
953
+ },
954
+ "funding": {
955
+ "url": "https://github.com/sponsors/sindresorhus"
956
+ }
957
+ },
958
+ "node_modules/sharp": {
959
+ "version": "0.34.3",
960
+ "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz",
961
+ "integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==",
962
+ "hasInstallScript": true,
963
+ "license": "Apache-2.0",
964
+ "dependencies": {
965
+ "color": "^4.2.3",
966
+ "detect-libc": "^2.0.4",
967
+ "semver": "^7.7.2"
968
+ },
969
+ "engines": {
970
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
971
+ },
972
+ "funding": {
973
+ "url": "https://opencollective.com/libvips"
974
+ },
975
+ "optionalDependencies": {
976
+ "@img/sharp-darwin-arm64": "0.34.3",
977
+ "@img/sharp-darwin-x64": "0.34.3",
978
+ "@img/sharp-libvips-darwin-arm64": "1.2.0",
979
+ "@img/sharp-libvips-darwin-x64": "1.2.0",
980
+ "@img/sharp-libvips-linux-arm": "1.2.0",
981
+ "@img/sharp-libvips-linux-arm64": "1.2.0",
982
+ "@img/sharp-libvips-linux-ppc64": "1.2.0",
983
+ "@img/sharp-libvips-linux-s390x": "1.2.0",
984
+ "@img/sharp-libvips-linux-x64": "1.2.0",
985
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0",
986
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0",
987
+ "@img/sharp-linux-arm": "0.34.3",
988
+ "@img/sharp-linux-arm64": "0.34.3",
989
+ "@img/sharp-linux-ppc64": "0.34.3",
990
+ "@img/sharp-linux-s390x": "0.34.3",
991
+ "@img/sharp-linux-x64": "0.34.3",
992
+ "@img/sharp-linuxmusl-arm64": "0.34.3",
993
+ "@img/sharp-linuxmusl-x64": "0.34.3",
994
+ "@img/sharp-wasm32": "0.34.3",
995
+ "@img/sharp-win32-arm64": "0.34.3",
996
+ "@img/sharp-win32-ia32": "0.34.3",
997
+ "@img/sharp-win32-x64": "0.34.3"
998
+ }
999
+ },
1000
+ "node_modules/simple-swizzle": {
1001
+ "version": "0.2.2",
1002
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
1003
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
1004
+ "license": "MIT",
1005
+ "dependencies": {
1006
+ "is-arrayish": "^0.3.1"
1007
+ }
1008
+ },
1009
+ "node_modules/sprintf-js": {
1010
+ "version": "1.1.3",
1011
+ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
1012
+ "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
1013
+ "license": "BSD-3-Clause"
1014
+ },
1015
+ "node_modules/tar": {
1016
+ "version": "7.4.3",
1017
+ "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
1018
+ "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
1019
+ "license": "ISC",
1020
+ "dependencies": {
1021
+ "@isaacs/fs-minipass": "^4.0.0",
1022
+ "chownr": "^3.0.0",
1023
+ "minipass": "^7.1.2",
1024
+ "minizlib": "^3.0.1",
1025
+ "mkdirp": "^3.0.1",
1026
+ "yallist": "^5.0.0"
1027
+ },
1028
+ "engines": {
1029
+ "node": ">=18"
1030
+ }
1031
+ },
1032
+ "node_modules/tslib": {
1033
+ "version": "2.8.1",
1034
+ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
1035
+ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
1036
+ "license": "0BSD",
1037
+ "optional": true
1038
+ },
1039
+ "node_modules/type-fest": {
1040
+ "version": "0.13.1",
1041
+ "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
1042
+ "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
1043
+ "license": "(MIT OR CC0-1.0)",
1044
+ "engines": {
1045
+ "node": ">=10"
1046
+ },
1047
+ "funding": {
1048
+ "url": "https://github.com/sponsors/sindresorhus"
1049
+ }
1050
+ },
1051
+ "node_modules/undici-types": {
1052
+ "version": "7.10.0",
1053
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
1054
+ "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
1055
+ "license": "MIT"
1056
+ },
1057
+ "node_modules/yallist": {
1058
+ "version": "5.0.0",
1059
+ "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
1060
+ "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
1061
+ "license": "BlueOak-1.0.0",
1062
+ "engines": {
1063
+ "node": ">=18"
1064
+ }
1065
+ }
1066
+ }
1067
+ }
js/package.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "js",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "index.js",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "keywords": [],
10
+ "author": "",
11
+ "license": "ISC",
12
+ "dependencies": {
13
+ "@huggingface/transformers": "^3.7.2"
14
+ }
15
+ }
js/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
js/tsconfig.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "module": "ESNext",
4
+ "moduleResolution": "nodenext",
5
+ // Set the baseUrl to the root of the project.
6
+ "baseUrl": "src",
7
+ // Make the type checking as strict as possible.
8
+ "strict": true,
9
+ // TypeScript will check JS files only if they have a @ts-check comment in them.
10
+ "allowJs": true,
11
+ "checkJs": true,
12
+ // Only type check, don't emit files.
13
+ "noEmit": true,
14
+ // Allow esnext syntax. Otherwise the default is ES5 only.
15
+ "target": "esnext",
16
+ "lib": ["esnext", "dom"],
17
+ "esModuleInterop": true
18
+ },
19
+ // Add a @ts-check comment to a JS file to start type checking it.
20
+ "include": ["example.mjs"],
21
+ // "files": ["src/@types/globals.d.ts"],
22
+ "exclude": []
23
+ }
models/minishlab/potion-multilingual-128M/README.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [minishlab/potion-multilingual-128M](https://huggingface.co/minishlab/potion-multilingual-128M)
2
+
3
+ License: [mit](https://choosealicense.com/licenses/mit/)
4
+
5
+ A multilingual embedder. The details are a bit scant on how it's trained as
6
+ there is no source code for it. However, it's likely a close architecture
7
+ to the potion-retrieval-32M model, but trained on Common Crawl data.
8
+
9
+ The 128M references the number of parameters in the embeddings:
10
+
11
+ 256 dimensions * 500,353 vocab.
12
+
13
+ ## Model Stats
14
+
15
+ Stats that describe the embeddings tensor shapes and value distribution.
16
+
17
+ | item | metric | value |
18
+ | --------------| ----------------------- | ----- |
19
+ | vocab | size | 500,353 |
20
+ | embedding | dimensions | 256 |
21
+ | vector length | mean | 12.73 |
22
+ | vector length | median | 11.94 |
23
+ | vector length | stddev | 5.12 |
24
+ | values | mean | -0.00 |
25
+ | values | median | -0.00 |
26
+ | values | stddev | 0.86 |
27
+
28
+ ## Mean Pooled Quantization Loss
29
+
30
+ This test roundtrips the vectors through quantization, but performs the
31
+ mean pooling arithmetic in float32 space. The quantized and unquantized
32
+ mean pooled vectors are compared to each other to determine their cosine
33
+ similarity, to show how much the meaning of the vector has changed due
34
+ to quantization.
35
+
36
+ | Precision | Cosine Similarity |
37
+ | ------------- | ----------------- |
38
+ | fp16 | 1.00000 |
39
+ | fp8 e4m3 | 0.99993 |
40
+ | fp8 e5m2 | 0.99973 |
41
+
42
+ ## Quantization Loss Per Vector
43
+
44
+ While ultimately the embedding vectors will be mean pooled together, it's
45
+ still useful to look at the loss per-vector in the embedding table to see
46
+ which quantization strategies retain the most vector meaning.
47
+
48
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
49
+ is preserved after quantization, independent of scale. This is especially
50
+ relevant when embeddings are used for similarity search or retrieval.
51
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
52
+ differences. Useful for detecting whether any values are badly distorted.
53
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
54
+ original and quantized values. Easier to interpret, less sensitive to outliers.
55
+
56
+ | Precision | Metric | Value |
57
+ | ------------- | ------ | ----- |
58
+ | fp16 | cosine similarity | 1.00000 |
59
+ | fp8 e4m3 | cosine similarity | 0.99965 |
60
+ | fp8 e5m2 | cosine similarity | 0.99863 |
61
+ | fp16 | MSE | 0.00000 |
62
+ | fp8 e4m3 | MSE | 0.00052 |
63
+ | fp8 e5m2 | MSE | 0.00205 |
64
+ | fp16 | MAE | 0.00011 |
65
+ | fp8 e4m3 | MAE | 0.01364 |
66
+ | fp8 e5m2 | MAE | 0.02717 |
67
+
68
+ ## Tokenizer Examples
69
+
70
+ **Input:** This is an example of encoding<br/>
71
+ **Tokens**: `▁This` `▁is` `▁an` `▁example` `▁of` `▁encoding`
72
+
73
+ **Input:** The quick brown fox jumps over the lazy dog.<br/>
74
+ **Tokens**: `▁The` `▁quick` `▁brown` `▁fox` `▁jumps` `▁over` `▁the` `▁lazy` `▁dog` `▁` `.`
75
+
76
+ **Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
77
+ **Tokens**: `▁Cura` `ça` `o` `▁` `,` `▁na` `ï` `ve` `▁fiancé` `▁` `,` `▁ja` `lap` `eño` `▁` `,` `▁déjà` `▁vu` `▁` `.`
78
+
79
+ **Input:** Привет, как дела?<br/>
80
+ **Tokens**: `▁При` `вет` `▁` `,` `▁как` `▁дела` `▁?`
81
+
82
+ **Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
83
+ **Tokens**: `▁Бър` `за` `та` `▁кафяв` `а` `▁лис` `ица` `▁пре` `ска` `ча` `▁` `мър` `зе` `ливо` `то` `▁куче` `▁` `.`
84
+
85
+ **Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
86
+ **Tokens**: `▁Γ` `ρή` `γο` `ρη` `▁καφέ` `▁α` `λε` `πού` `▁` `πη` `δά` `ει` `▁πάνω` `▁από` `▁τον` `▁τε` `μπ` `έλη` `▁σκύλο` `▁` `.`
87
+
88
+ **Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
89
+ **Tokens**: `▁اللغة` `▁العربية` `▁جميلة` `▁وغ` `نية` `▁بال` `تاريخ` `▁` `.`
90
+
91
+ **Input:** مرحبا بالعالم!<br/>
92
+ **Tokens**: `▁مرحبا` `▁بالعالم` `▁!`
93
+
94
+ **Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
95
+ **Tokens**: `▁Simp` `l` `ified` `▁:` `▁` `快速` `的` `棕` `色` `狐` `狸` `跳` `过` `懒` `狗` `。`
96
+
97
+ **Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
98
+ **Tokens**: `▁Tradition` `al` `▁:` `▁` `快速` `的` `棕` `色` `狐` `狸` `跳` `過` `懶` `狗` `。`
99
+
100
+ **Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
101
+ **Tokens**: `▁素` `早い` `茶` `色` `の` `狐` `が` `怠` `け` `者の` `犬` `を` `飛び` `越` `える` `。`
102
+
103
+ **Input:** コンピュータープログラミング<br/>
104
+ **Tokens**: `▁` `コンピュ��タ` `ー` `プロ` `グラ` `ミ` `ング`
105
+
106
+ **Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
107
+ **Tokens**: `▁빠른` `▁갈` `색` `▁여` `우` `가` `▁게` `으` `른` `▁` `개를` `▁뛰어` `넘` `습니다` `▁` `.`
108
+
109
+ **Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
110
+ **Tokens**: `▁तेज़` `▁भू` `री` `▁लो` `म` `ड़ी` `▁आ` `ल` `सी` `▁कुत्ते` `▁के` `▁ऊपर` `▁` `कू` `द` `ती` `▁है` `।`
111
+
112
+ **Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
113
+ **Tokens**: `▁দ্রুত` `▁বাদাম` `ী` `▁শি` `য়াল` `▁অ` `ল` `স` `▁কু` `কুর` `ের` `▁উপর` `▁দিয়ে` `▁লা` `ফ` `▁দেয়` `।`
114
+
115
+ **Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
116
+ **Tokens**: `▁வேக` `மான` `▁பழ` `ு` `ப்பு` `▁ந` `ரி` `▁சோ` `ம்` `பே` `றி` `▁நா` `யின்` `▁மேல்` `▁கு` `தி` `க்கிறது` `▁` `.`
117
+
118
+ **Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
119
+ **Tokens**: `▁` `สุนัข` `จิ` `้ง` `จอ` `ก` `สีน้ําตาล` `กระโดด` `ข้าม` `สุนัข` `ขี้` `เกีย` `จ` `▁` `.`
120
+
121
+ **Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
122
+ **Tokens**: `▁` `ብሩ` `ክ` `▁ቡና` `ማ` `▁` `ቀበ` `ሮ` `▁ሰ` `ነፍ` `▁` `ው` `ሻ` `ን` `▁ተ` `ዘ` `ል` `ሏል` `።`
123
+
124
+ **Input:** Hello 世界 مرحبا 🌍<br/>
125
+ **Tokens**: `▁Hello` `▁世界` `▁مرحبا` `▁🌍`
126
+
127
+ **Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
128
+ **Tokens**: `▁123` `▁` `,` `▁α` `β` `γ` `▁` `,` `▁аб` `в` `▁` `,` `▁العربية` `▁` `,` `▁中文` `▁` `,` `▁हिन्दी` `▁` `.`
models/minishlab/potion-retrieval-32M/README.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [minishlab/potion-retrieval-32M](https://huggingface.co/minishlab/potion-retrieval-32M)
2
+
3
+ License: [mit](https://choosealicense.com/licenses/mit/)
4
+
5
+ The token embeddings from a monolingual English 32M parameter model that was
6
+ distilled from embeddings that were initialized from the the multi-domain
7
+ [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
8
+
9
+ The 32M references the number of parameters in the embeddings:
10
+
11
+ 512 dimension * 63,091 vocab.
12
+
13
+ ## Model Stats
14
+
15
+ Stats that describe the embeddings tensor shapes and value distribution.
16
+
17
+ | item | metric | value |
18
+ | --------------| ----------------------- | ----- |
19
+ | vocab | size | 63,091 |
20
+ | embedding | dimensions | 512 |
21
+ | vector length | mean | 130.27 |
22
+ | vector length | median | 130.39 |
23
+ | vector length | stddev | 30.43 |
24
+ | values | mean | 0.01 |
25
+ | values | median | 0.01 |
26
+ | values | stddev | 5.91 |
27
+
28
+ ## Mean Pooled Quantization Loss
29
+
30
+ This test roundtrips the vectors through quantization, but performs the
31
+ mean pooling arithmetic in float32 space. The quantized and unquantized
32
+ mean pooled vectors are compared to each other to determine their cosine
33
+ similarity, to show how much the meaning of the vector has changed due
34
+ to quantization.
35
+
36
+ | Precision | Cosine Similarity |
37
+ | ------------- | ----------------- |
38
+ | fp16 | 1.00000 |
39
+ | fp8 e4m3 | 0.99970 |
40
+ | fp8 e5m2 | 0.99887 |
41
+
42
+ ## Quantization Loss Per Vector
43
+
44
+ While ultimately the embedding vectors will be mean pooled together, it's
45
+ still useful to look at the loss per-vector in the embedding table to see
46
+ which quantization strategies retain the most vector meaning.
47
+
48
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
49
+ is preserved after quantization, independent of scale. This is especially
50
+ relevant when embeddings are used for similarity search or retrieval.
51
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
52
+ differences. Useful for detecting whether any values are badly distorted.
53
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
54
+ original and quantized values. Easier to interpret, less sensitive to outliers.
55
+
56
+ | Precision | Metric | Value |
57
+ | ------------- | ------ | ----- |
58
+ | fp16 | cosine similarity | 1.00000 |
59
+ | fp8 e4m3 | cosine similarity | 0.99965 |
60
+ | fp8 e5m2 | cosine similarity | 0.99862 |
61
+ | fp16 | MSE | 0.00000 |
62
+ | fp8 e4m3 | MSE | 0.02454 |
63
+ | fp8 e5m2 | MSE | 0.09720 |
64
+ | fp16 | MAE | 0.00076 |
65
+ | fp8 e4m3 | MAE | 0.09763 |
66
+ | fp8 e5m2 | MAE | 0.19461 |
67
+
68
+ ## Tokenizer Examples
69
+
70
+ **Input:** This is an example of encoding<br/>
71
+ **Tokens**: `[CLS]` `this` `is` `an` `example` `of` `encoding` `[SEP]`
72
+
73
+ **Input:** The quick brown fox jumps over the lazy dog.<br/>
74
+ **Tokens**: `[CLS]` `the` `quick` `brown` `fox` `jumps` `over` `the` `lazy` `dog` `.` `[SEP]`
75
+
76
+ **Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
77
+ **Tokens**: `[CLS]` `curacao` `,` `naive` `fiance` `,` `jalapeno` `,` `deja` `vu` `.` `[SEP]`
78
+
79
+ **Input:** Привет, как дела?<br/>
80
+ **Tokens**: `[CLS]` `п` `##р` `##и` `##в` `##е` `##т` `,` `как` `д` `##е` `##л` `##а` `?` `[SEP]`
81
+
82
+ **Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
83
+ **Tokens**: `[CLS]` `б` `##ъ` `##р` `##з` `##а` `##т` `##а` `к` `##а` `##ф` `##я` `##в` `##а` `л` `##и` `##с` `##и` `##ц` `##а` `п` `##р` `##е` `##с` `##ка` `##ч` `##а` `м` `##ъ` `##р` `##з` `##е` `##л` `##и` `##в` `##о` `##т` `##о` `к` `##у` `##ч` `##е` `.` `[SEP]`
84
+
85
+ **Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
86
+ **Tokens**: `[CLS]` `γ` `##ρ` `##η` `##γ` `##ο` `##ρ` `##η` `κ` `##α` `##φ` `##ε` `α` `##λ` `##ε` `##π` `##ου` `π` `##η` `##δ` `##α` `##ε` `##ι` `π` `##α` `##ν` `##ω` `α` `##π` `##ο` `τ` `##ο` `##ν` `τ` `##ε` `##μ` `##π` `##ε` `##λ` `##η` `σ` `##κ` `##υ` `##λ` `##ο` `.` `[SEP]`
87
+
88
+ **Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
89
+ **Tokens**: `[CLS]` `ا` `##ل` `##ل` `##غ` `##ة` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `ج` `##م` `##ي` `##ل` `##ة` `و` `##غ` `##ن` `##ي` `##ة` `با` `##ل` `##ت` `##ا` `##ر` `##ي` `##خ` `.` `[SEP]`
90
+
91
+ **Input:** مرحبا بالعالم!<br/>
92
+ **Tokens**: `[CLS]` `م` `##ر` `##ح` `##ب` `##ا` `با` `##ل` `##ع` `##ا` `##ل` `##م` `!` `[SEP]`
93
+
94
+ **Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
95
+ **Tokens**: `[CLS]` `simplified` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
96
+
97
+ **Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
98
+ **Tokens**: `[CLS]` `traditional` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
99
+
100
+ **Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
101
+ **Tokens**: `[CLS]` `[UNK]` `[UNK]` `い` `[UNK]` `[UNK]` `の` `[UNK]` `か` `[UNK]` `け` `[UNK]` `の` `犬` `を` `[UNK]` `ひ` `[UNK]` `え` `##る` `。` `[SEP]`
102
+
103
+ **Input:** コンピュータープログラミング<br/>
104
+ **Tokens**: `[CLS]` `コ` `##ン` `##ヒ` `##ュ` `##ー` `##タ` `##ー` `##フ` `##ロ` `##ク` `##ラ` `##ミ` `##ン` `##ク` `[SEP]`
105
+
106
+ **Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
107
+ **Tokens**: `[CLS]` `[UNK]` `ᄀ` `##ᅡ` `##ᆯ` `##ᄉ` `##ᅢ` `##ᆨ` `ᄋ` `##ᅧ` `##ᄋ` `##ᅮ` `##ᄀ` `##ᅡ` `ᄀ` `##ᅦ` `##ᄋ` `##ᅳ` `##ᄅ` `##ᅳ` `##ᆫ` `ᄀ` `##ᅢ` `##ᄅ` `##ᅳ` `##ᆯ` `[UNK]` `.` `[SEP]`
108
+
109
+ **Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
110
+ **Tokens**: `[CLS]` `त` `##ज` `भ` `##र` `##ी` `ल` `##ो` `##म` `##ड` `##ी` `आ` `##ल` `##स` `##ी` `क` `##त` `##त` `क` `[UNK]` `क` `##द` `##त` `##ी` `ह` `।` `[SEP]`
111
+
112
+ **Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
113
+ **Tokens**: `[CLS]` `দ` `##র` `##ত` `ব` `##া` `##দ` `##া` `##ম` `##ী` `শ` `##ি` `##য` `##া` `##ল` `অ` `##ল` `##স` `ক` `##ক` `##র` `##ে` `##র` `উ` `##প` `##র` `দ` `##ি` `##য` `##ে` `[UNK]` `দ` `##ে` `##য` `।` `[SEP]`
114
+
115
+ **Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
116
+ **Tokens**: `[CLS]` `வ` `##ே` `##க` `##ம` `##ா` `##ன` `[UNK]` `ந` `##ர` `##ி` `[UNK]` `ந` `##ா` `##ய` `##ி` `##ன` `ம` `##ே` `##ல` `[UNK]` `.` `[SEP]`
117
+
118
+ **Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
119
+ **Tokens**: `[CLS]` `[UNK]` `.` `[SEP]`
120
+
121
+ **Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
122
+ **Tokens**: `[CLS]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[SEP]`
123
+
124
+ **Input:** Hello 世界 مرحبا 🌍<br/>
125
+ **Tokens**: `[CLS]` `hello` `世` `[UNK]` `م` `##ر` `##ح` `##ب` `##ا` `[UNK]` `[SEP]`
126
+
127
+ **Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
128
+ **Tokens**: `[CLS]` `123` `,` `α` `##β` `##γ` `,` `а` `##б` `##в` `,` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `,` `中` `文` `,` `ह` `##ि` `##न` `##द` `##ी` `.` `[SEP]`
models/sentence-transformers/static-retrieval-mrl-en-v1/README.md ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [sentence-transformers/static-retrieval-mrl-en-v1](https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1)
2
+
3
+ License: [apache-2.0](https://choosealicense.com/licenses/apache-2.0/)
4
+
5
+ English-only uncased similarity embeddings that were trained with Matroyshka
6
+ loss that allows for more effective truncation of the embedding vectors. It
7
+ was trained on a variety of domains of monolingual datasets. I was designed
8
+ specifically for similarity retrieval.
9
+
10
+ ## Model Stats
11
+
12
+ Stats that describe the embeddings tensor shapes and value distribution.
13
+
14
+ | item | metric | value |
15
+ | --------------| ----------------------- | ----- |
16
+ | vocab | size | 30,522 |
17
+ | embedding | dimensions | 1,024 |
18
+ | vector length | mean | 555.04 |
19
+ | vector length | median | 573.92 |
20
+ | vector length | stddev | 219.06 |
21
+ | values | mean | 0.02 |
22
+ | values | median | 0.01 |
23
+ | values | stddev | 18.65 |
24
+
25
+ ## Mean Pooled Quantization Loss
26
+
27
+ This test roundtrips the vectors through quantization, but performs the
28
+ mean pooling arithmetic in float32 space. The quantized and unquantized
29
+ mean pooled vectors are compared to each other to determine their cosine
30
+ similarity, to show how much the meaning of the vector has changed due
31
+ to quantization.
32
+
33
+ | Precision | Cosine Similarity |
34
+ | ------------- | ----------------- |
35
+ | fp16 | 1.00000 |
36
+ | fp8 e4m3 | 0.99972 |
37
+ | fp8 e5m2 | 0.99887 |
38
+
39
+ ## Quantization Loss Per Vector
40
+
41
+ While ultimately the embedding vectors will be mean pooled together, it's
42
+ still useful to look at the loss per-vector in the embedding table to see
43
+ which quantization strategies retain the most vector meaning.
44
+
45
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
46
+ is preserved after quantization, independent of scale. This is especially
47
+ relevant when embeddings are used for similarity search or retrieval.
48
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
49
+ differences. Useful for detecting whether any values are badly distorted.
50
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
51
+ original and quantized values. Easier to interpret, less sensitive to outliers.
52
+
53
+ | Precision | Metric | Value |
54
+ | ------------- | ------ | ----- |
55
+ | fp16 | cosine similarity | 1.00000 |
56
+ | fp8 e4m3 | cosine similarity | 0.99965 |
57
+ | fp8 e5m2 | cosine similarity | 0.99861 |
58
+ | fp16 | MSE | 0.00001 |
59
+ | fp8 e4m3 | MSE | 0.24369 |
60
+ | fp8 e5m2 | MSE | 0.96497 |
61
+ | fp16 | MAE | 0.00244 |
62
+ | fp8 e4m3 | MAE | 0.31206 |
63
+ | fp8 e5m2 | MAE | 0.62205 |
64
+
65
+ ## Tokenizer Examples
66
+
67
+ **Input:** This is an example of encoding<br/>
68
+ **Tokens**: `[CLS]` `this` `is` `an` `example` `of` `encoding` `[SEP]`
69
+
70
+ **Input:** The quick brown fox jumps over the lazy dog.<br/>
71
+ **Tokens**: `[CLS]` `the` `quick` `brown` `fox` `jumps` `over` `the` `lazy` `dog` `.` `[SEP]`
72
+
73
+ **Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
74
+ **Tokens**: `[CLS]` `cu` `##rac` `##ao` `,` `naive` `fiance` `,` `ja` `##la` `##pen` `##o` `,` `de` `##ja` `vu` `.` `[SEP]`
75
+
76
+ **Input:** Привет, как дела?<br/>
77
+ **Tokens**: `[CLS]` `п` `##р` `##и` `##в` `##е` `##т` `,` `к` `##а` `##к` `д` `##е` `##л` `##а` `?` `[SEP]`
78
+
79
+ **Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
80
+ **Tokens**: `[CLS]` `б` `##ъ` `##р` `##з` `##а` `##т` `##а` `к` `##а` `##ф` `##я` `##в` `##а` `л` `##и` `##с` `##и` `##ц` `##а` `п` `##р` `##е` `##с` `##ка` `##ч` `##а` `м` `##ъ` `##р` `##з` `##е` `##л` `##и` `##в` `##о` `##т` `##о` `к` `##у` `##ч` `##е` `.` `[SEP]`
81
+
82
+ **Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
83
+ **Tokens**: `[CLS]` `γ` `##ρ` `##η` `##γ` `##ο` `##ρ` `##η` `κ` `##α` `##φ` `##ε` `α` `##λ` `##ε` `##π` `##ου` `π` `##η` `##δ` `##α` `##ε` `##ι` `π` `##α` `##ν` `##ω` `α` `##π` `##ο` `τ` `##ο` `##ν` `τ` `##ε` `##μ` `##π` `##ε` `##λ` `##η` `σ` `##κ` `##υ` `##λ` `##ο` `.` `[SEP]`
84
+
85
+ **Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
86
+ **Tokens**: `[CLS]` `ا` `##ل` `##ل` `##غ` `##ة` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `ج` `##م` `##ي` `##ل` `##ة` `و` `##غ` `##ن` `##ي` `##ة` `ب` `##ا` `##ل` `##ت` `##ا` `##ر` `##ي` `##خ` `.` `[SEP]`
87
+
88
+ **Input:** مرحبا بالعالم!<br/>
89
+ **Tokens**: `[CLS]` `م` `##ر` `##ح` `##ب` `##ا` `ب` `##ا` `##ل` `##ع` `##ا` `##ل` `##م` `!` `[SEP]`
90
+
91
+ **Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
92
+ **Tokens**: `[CLS]` `simplified` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
93
+
94
+ **Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
95
+ **Tokens**: `[CLS]` `traditional` `:` `[UNK]` `[UNK]` `的` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `。` `[SEP]`
96
+
97
+ **Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
98
+ **Tokens**: `[CLS]` `[UNK]` `[UNK]` `い` `[UNK]` `[UNK]` `の` `[UNK]` `か` `[UNK]` `け` `[UNK]` `の` `犬` `を` `[UNK]` `ひ` `[UNK]` `え` `##る` `。` `[SEP]`
99
+
100
+ **Input:** コンピュータープログラミング<br/>
101
+ **Tokens**: `[CLS]` `コ` `##ン` `##ヒ` `##ュ` `##ー` `##タ` `##ー` `##フ` `##ロ` `##ク` `##ラ` `##ミ` `##ン` `##ク` `[SEP]`
102
+
103
+ **Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
104
+ **Tokens**: `[CLS]` `[UNK]` `ᄀ` `##ᅡ` `##ᆯ` `##ᄉ` `##ᅢ` `##ᆨ` `ᄋ` `##ᅧ` `##ᄋ` `##ᅮ` `##ᄀ` `##ᅡ` `ᄀ` `##ᅦ` `##ᄋ` `##ᅳ` `##ᄅ` `##ᅳ` `##ᆫ` `ᄀ` `##ᅢ` `##ᄅ` `##ᅳ` `##ᆯ` `[UNK]` `.` `[SEP]`
105
+
106
+ **Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
107
+ **Tokens**: `[CLS]` `त` `##ज` `भ` `##र` `##ी` `ल` `##ो` `##म` `##ड` `##ी` `आ` `##ल` `##स` `##ी` `क` `##त` `##त` `क` `[UNK]` `क` `##द` `##त` `##ी` `ह` `।` `[SEP]`
108
+
109
+ **Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
110
+ **Tokens**: `[CLS]` `দ` `##র` `##ত` `ব` `##া` `##দ` `##া` `##ম` `##ী` `শ` `##ি` `##য` `##া` `##ল` `অ` `##ল` `##স` `ক` `##ক` `##র` `##ে` `##র` `উ` `##প` `##র` `দ` `##ি` `##য` `##ে` `[UNK]` `দ` `##ে` `##য` `।` `[SEP]`
111
+
112
+ **Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
113
+ **Tokens**: `[CLS]` `வ` `##ே` `##க` `##ம` `##ா` `##ன` `[UNK]` `ந` `##ர` `##ி` `[UNK]` `ந` `##ா` `##ய` `##ி` `##ன` `ம` `##ே` `##ல` `[UNK]` `.` `[SEP]`
114
+
115
+ **Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
116
+ **Tokens**: `[CLS]` `[UNK]` `.` `[SEP]`
117
+
118
+ **Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
119
+ **Tokens**: `[CLS]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[SEP]`
120
+
121
+ **Input:** Hello 世界 مرحبا 🌍<br/>
122
+ **Tokens**: `[CLS]` `hello` `世` `[UNK]` `م` `##ر` `##ح` `##ب` `##ا` `[UNK]` `[SEP]`
123
+
124
+ **Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
125
+ **Tokens**: `[CLS]` `123` `,` `α` `##β` `##γ` `,` `а` `##б` `##в` `,` `ا` `##ل` `##ع` `##ر` `##ب` `##ي` `##ة` `,` `中` `文` `,` `ह` `##ि` `##न` `##द` `##ी` `.` `[SEP]`
models/sentence-transformers/static-similarity-mrl-multilingual-v1/README.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # [sentence-transformers/static-similarity-mrl-multilingual-v1](https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1)
2
+
3
+ License: [apache-2.0](https://choosealicense.com/licenses/apache-2.0/)
4
+
5
+ Multi-lingual similarity embeddings that were trained with Matroyshka loss
6
+ that allows for more effective truncation of the embedding vectors. It
7
+ was trained on a variety of domains of multilingual datasets.
8
+
9
+ It's a general purpose model that can be used for semantic textual similarity,
10
+ paraphrase mining, text classification, clustering, and more
11
+
12
+ ## Model Stats
13
+
14
+ Stats that describe the embeddings tensor shapes and value distribution.
15
+
16
+ | item | metric | value |
17
+ | --------------| ----------------------- | ----- |
18
+ | vocab | size | 105,879 |
19
+ | embedding | dimensions | 1,024 |
20
+ | vector length | mean | 413.61 |
21
+ | vector length | median | 437.74 |
22
+ | vector length | stddev | 195.51 |
23
+ | values | mean | -0.02 |
24
+ | values | median | -0.01 |
25
+ | values | stddev | 14.30 |
26
+
27
+ ## Mean Pooled Quantization Loss
28
+
29
+ This test roundtrips the vectors through quantization, but performs the
30
+ mean pooling arithmetic in float32 space. The quantized and unquantized
31
+ mean pooled vectors are compared to each other to determine their cosine
32
+ similarity, to show how much the meaning of the vector has changed due
33
+ to quantization.
34
+
35
+ | Precision | Cosine Similarity |
36
+ | ------------- | ----------------- |
37
+ | fp16 | 1.00000 |
38
+ | fp8 e4m3 | 0.99980 |
39
+ | fp8 e5m2 | 0.99921 |
40
+
41
+ ## Quantization Loss Per Vector
42
+
43
+ While ultimately the embedding vectors will be mean pooled together, it's
44
+ still useful to look at the loss per-vector in the embedding table to see
45
+ which quantization strategies retain the most vector meaning.
46
+
47
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
48
+ is preserved after quantization, independent of scale. This is especially
49
+ relevant when embeddings are used for similarity search or retrieval.
50
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
51
+ differences. Useful for detecting whether any values are badly distorted.
52
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
53
+ original and quantized values. Easier to interpret, less sensitive to outliers.
54
+
55
+ | Precision | Metric | Value |
56
+ | ------------- | ------ | ----- |
57
+ | fp16 | cosine similarity | 1.00000 |
58
+ | fp8 e4m3 | cosine similarity | 0.99965 |
59
+ | fp8 e5m2 | cosine similarity | 0.99861 |
60
+ | fp16 | MSE | 0.00001 |
61
+ | fp8 e4m3 | MSE | 0.14369 |
62
+ | fp8 e5m2 | MSE | 0.56917 |
63
+ | fp16 | MAE | 0.00183 |
64
+ | fp8 e4m3 | MAE | 0.23372 |
65
+ | fp8 e5m2 | MAE | 0.46585 |
66
+
67
+ ## Tokenizer Examples
68
+
69
+ **Input:** This is an example of encoding<br/>
70
+ **Tokens**: `[CLS]` `this` `is` `an` `example` `of` `en` `##co` `##ding` `[SEP]`
71
+
72
+ **Input:** The quick brown fox jumps over the lazy dog.<br/>
73
+ **Tokens**: `[CLS]` `the` `quick` `brown` `fox` `jump` `##s` `over` `the` `la` `##zy` `dog` `.` `[SEP]`
74
+
75
+ **Input:** Curaçao, naïve fiancé, jalapeño, déjà vu.<br/>
76
+ **Tokens**: `[CLS]` `curacao` `,` `nai` `##ve` `fia` `##nce` `,` `ja` `##lap` `##eno` `,` `deja` `vu` `.` `[SEP]`
77
+
78
+ **Input:** Привет, как дела?<br/>
79
+ **Tokens**: `[CLS]` `при` `##вет` `,` `как` `дела` `?` `[SEP]`
80
+
81
+ **Input:** Бързата кафява лисица прескача мързеливото куче.<br/>
82
+ **Tokens**: `[CLS]` `б` `##ър` `##за` `##та` `ка` `##ф` `##ява` `ли` `##си` `##ца` `пре` `##ска` `##ча` `м` `##ър` `##зе` `##ливо` `##то` `к` `##уч` `##е` `.` `[SEP]`
83
+
84
+ **Input:** Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.<br/>
85
+ **Tokens**: `[CLS]` `γ` `##ρη` `##γο` `##ρη` `κ` `##α` `##φ` `##ε` `α` `##λε` `##που` `π` `##η` `##δα` `##ει` `πανω` `απο` `τον` `τ` `##ε` `##μ` `##πε` `##λη` `σ` `##κ` `##υλο` `.` `[SEP]`
86
+
87
+ **Input:** اللغة العربية جميلة وغنية بالتاريخ.<br/>
88
+ **Tokens**: `[CLS]` `اللغة` `العربية` `ج` `##ميل` `##ة` `و` `##غنية` `با` `##لت` `##اري` `##خ` `.` `[SEP]`
89
+
90
+ **Input:** مرحبا بالعالم!<br/>
91
+ **Tokens**: `[CLS]` `م` `##رح` `##با` `با` `##ل` `##عا` `##لم` `!` `[SEP]`
92
+
93
+ **Input:** Simplified: 快速的棕色狐狸跳过懒狗。<br/>
94
+ **Tokens**: `[CLS]` `simplified` `:` `快` `速` `的` `棕` `色` `狐` `狸` `跳` `过` `懒` `狗` `。` `[SEP]`
95
+
96
+ **Input:** Traditional: 快速的棕色狐狸跳過懶狗。<br/>
97
+ **Tokens**: `[CLS]` `traditional` `:` `快` `速` `的` `棕` `色` `狐` `狸` `跳` `過` `懶` `狗` `。` `[SEP]`
98
+
99
+ **Input:** 素早い茶色の狐が怠け者の犬を飛び越える。<br/>
100
+ **Tokens**: `[CLS]` `素` `早` `い` `茶` `色` `の` `狐` `か` `怠` `け` `者` `の` `犬` `を` `飛` `ひ` `越` `える` `。` `[SEP]`
101
+
102
+ **Input:** コンピュータープログラミング<br/>
103
+ **Tokens**: `[CLS]` `コ` `##ン` `##ヒ` `##ュー` `##ター` `##フロ` `##ク` `##ラ` `##ミ` `##ンク` `[SEP]`
104
+
105
+ **Input:** 빠른 갈색 여우가 게으른 개를 뛰어넘습니다.<br/>
106
+ **Tokens**: `[CLS]` `ᄈ` `##ᅡ른` `가` `##ᆯ` `##색` `ᄋ` `##ᅧ` `##우` `##가` `ᄀ` `##ᅦ` `##ᄋ` `##ᅳ` `##른` `ᄀ` `##ᅢ를` `ᄄ` `##ᅱ` `##어` `##너` `##ᆷ` `##스` `##ᆸ니다` `.` `[SEP]`
107
+
108
+ **Input:** तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।<br/>
109
+ **Tokens**: `[CLS]` `त` `##ज` `भर` `##ी` `ल` `##ो` `##म` `##डी` `आल` `##सी` `क` `##तत` `क` `ऊपर` `क` `##द` `##ती` `ह` `।` `[SEP]`
110
+
111
+ **Input:** দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।<br/>
112
+ **Tokens**: `[CLS]` `দ` `##রত` `বা` `##দা` `##মী` `শ` `##িযা` `##ল` `অ` `##ল` `##স` `ক` `##কর` `##ের` `উপর` `দিযে` `ল` `##া` `##ফ` `দেয` `।` `[SEP]`
113
+
114
+ **Input:** வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.<br/>
115
+ **Tokens**: `[CLS]` `வ` `##ே` `##கம` `##ான` `ப` `##ழு` `##பபு` `நர` `##ி` `ச` `##ோ` `##ம` `##ப` `##ே` `##றி` `ந` `##ாய` `##ின` `மேல` `க` `##ு` `##தி` `##ககிறது` `.` `[SEP]`
116
+
117
+ **Input:** สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.<br/>
118
+ **Tokens**: `[CLS]` `[UNK]` `.` `[SEP]`
119
+
120
+ **Input:** ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።<br/>
121
+ **Tokens**: `[CLS]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[UNK]` `[SEP]`
122
+
123
+ **Input:** Hello 世界 مرحبا 🌍<br/>
124
+ **Tokens**: `[CLS]` `hello` `世` `界` `م` `##رح` `##با` `[UNK]` `[SEP]`
125
+
126
+ **Input:** 123, αβγ, абв, العربية, 中文, हिन्दी.<br/>
127
+ **Tokens**: `[CLS]` `123` `,` `α` `##β` `##γ` `,` `аб` `##в` `,` `العربية` `,` `中` `文` `,` `हिनदी` `.` `[SEP]`
multilingual.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from tokenizers import Encoding, Tokenizer
3
+ from torch.nn import EmbeddingBag
4
+ import torch
5
+
6
+
7
+ def test_tokenizer():
8
+ examples = [
9
+ "This is an example of encoding",
10
+ "The quick brown fox jumps over the lazy dog.",
11
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
12
+ "Привет, как дела?",
13
+ "Бързата кафява лисица прескача мързеливото куче.",
14
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
15
+ "اللغة العربية جميلة وغنية بالتاريخ.",
16
+ "مرحبا بالعالم!",
17
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
18
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
19
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
20
+ "コンピュータープログラミング",
21
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
22
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
23
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
24
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
25
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
26
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
27
+ "Hello 世界 مرحبا 🌍",
28
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
29
+ ]
30
+
31
+ tokenizer: Tokenizer = Tokenizer.from_file("js/tokenizer.json")
32
+
33
+ for example in examples:
34
+ encoding: Encoding = tokenizer.encode(example)
35
+ print(example)
36
+ print(encoding.tokens)
37
+ print()
38
+
39
+
40
+ # https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1
41
+ model = SentenceTransformer(
42
+ "sentence-transformers/static-similarity-mrl-multilingual-v1", device="cpu"
43
+ )
44
+
45
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
46
+ embeddings = torch.Tensor(embedding_bag.weight)
47
+
48
+ print(embeddings.shape)
49
+ assert embeddings.shape == torch.Size([105879, 1024])
50
+
51
+ print("float32")
52
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB")
53
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB")
54
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB")
55
+
56
+ print("float16")
57
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB")
58
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB")
59
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB")
package-lock.json ADDED
@@ -0,0 +1,1067 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "js",
3
+ "version": "1.0.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "js",
9
+ "version": "1.0.0",
10
+ "license": "ISC",
11
+ "dependencies": {
12
+ "@huggingface/transformers": "^3.7.2"
13
+ }
14
+ },
15
+ "node_modules/@emnapi/runtime": {
16
+ "version": "1.5.0",
17
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz",
18
+ "integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==",
19
+ "license": "MIT",
20
+ "optional": true,
21
+ "dependencies": {
22
+ "tslib": "^2.4.0"
23
+ }
24
+ },
25
+ "node_modules/@huggingface/jinja": {
26
+ "version": "0.5.1",
27
+ "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
28
+ "integrity": "sha512-yUZLld4lrM9iFxHCwFQ7D1HW2MWMwSbeB7WzWqFYDWK+rEb+WldkLdAJxUPOmgICMHZLzZGVcVjFh3w/YGubng==",
29
+ "license": "MIT",
30
+ "engines": {
31
+ "node": ">=18"
32
+ }
33
+ },
34
+ "node_modules/@huggingface/transformers": {
35
+ "version": "3.7.2",
36
+ "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.7.2.tgz",
37
+ "integrity": "sha512-6SOxo6XziupnQ5Vs5vbbs74CNB6ViHLHGQJjY6zj88JeiDtJ2d/ADKxaay688Sf2KcjtdF3dyBL11C5pJS2NxQ==",
38
+ "license": "Apache-2.0",
39
+ "dependencies": {
40
+ "@huggingface/jinja": "^0.5.1",
41
+ "onnxruntime-node": "1.21.0",
42
+ "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4",
43
+ "sharp": "^0.34.1"
44
+ }
45
+ },
46
+ "node_modules/@img/sharp-darwin-arm64": {
47
+ "version": "0.34.3",
48
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz",
49
+ "integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==",
50
+ "cpu": [
51
+ "arm64"
52
+ ],
53
+ "license": "Apache-2.0",
54
+ "optional": true,
55
+ "os": [
56
+ "darwin"
57
+ ],
58
+ "engines": {
59
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
60
+ },
61
+ "funding": {
62
+ "url": "https://opencollective.com/libvips"
63
+ },
64
+ "optionalDependencies": {
65
+ "@img/sharp-libvips-darwin-arm64": "1.2.0"
66
+ }
67
+ },
68
+ "node_modules/@img/sharp-darwin-x64": {
69
+ "version": "0.34.3",
70
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz",
71
+ "integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==",
72
+ "cpu": [
73
+ "x64"
74
+ ],
75
+ "license": "Apache-2.0",
76
+ "optional": true,
77
+ "os": [
78
+ "darwin"
79
+ ],
80
+ "engines": {
81
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
82
+ },
83
+ "funding": {
84
+ "url": "https://opencollective.com/libvips"
85
+ },
86
+ "optionalDependencies": {
87
+ "@img/sharp-libvips-darwin-x64": "1.2.0"
88
+ }
89
+ },
90
+ "node_modules/@img/sharp-libvips-darwin-arm64": {
91
+ "version": "1.2.0",
92
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz",
93
+ "integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==",
94
+ "cpu": [
95
+ "arm64"
96
+ ],
97
+ "license": "LGPL-3.0-or-later",
98
+ "optional": true,
99
+ "os": [
100
+ "darwin"
101
+ ],
102
+ "funding": {
103
+ "url": "https://opencollective.com/libvips"
104
+ }
105
+ },
106
+ "node_modules/@img/sharp-libvips-darwin-x64": {
107
+ "version": "1.2.0",
108
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz",
109
+ "integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==",
110
+ "cpu": [
111
+ "x64"
112
+ ],
113
+ "license": "LGPL-3.0-or-later",
114
+ "optional": true,
115
+ "os": [
116
+ "darwin"
117
+ ],
118
+ "funding": {
119
+ "url": "https://opencollective.com/libvips"
120
+ }
121
+ },
122
+ "node_modules/@img/sharp-libvips-linux-arm": {
123
+ "version": "1.2.0",
124
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz",
125
+ "integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==",
126
+ "cpu": [
127
+ "arm"
128
+ ],
129
+ "license": "LGPL-3.0-or-later",
130
+ "optional": true,
131
+ "os": [
132
+ "linux"
133
+ ],
134
+ "funding": {
135
+ "url": "https://opencollective.com/libvips"
136
+ }
137
+ },
138
+ "node_modules/@img/sharp-libvips-linux-arm64": {
139
+ "version": "1.2.0",
140
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz",
141
+ "integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==",
142
+ "cpu": [
143
+ "arm64"
144
+ ],
145
+ "license": "LGPL-3.0-or-later",
146
+ "optional": true,
147
+ "os": [
148
+ "linux"
149
+ ],
150
+ "funding": {
151
+ "url": "https://opencollective.com/libvips"
152
+ }
153
+ },
154
+ "node_modules/@img/sharp-libvips-linux-ppc64": {
155
+ "version": "1.2.0",
156
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz",
157
+ "integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==",
158
+ "cpu": [
159
+ "ppc64"
160
+ ],
161
+ "license": "LGPL-3.0-or-later",
162
+ "optional": true,
163
+ "os": [
164
+ "linux"
165
+ ],
166
+ "funding": {
167
+ "url": "https://opencollective.com/libvips"
168
+ }
169
+ },
170
+ "node_modules/@img/sharp-libvips-linux-s390x": {
171
+ "version": "1.2.0",
172
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz",
173
+ "integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==",
174
+ "cpu": [
175
+ "s390x"
176
+ ],
177
+ "license": "LGPL-3.0-or-later",
178
+ "optional": true,
179
+ "os": [
180
+ "linux"
181
+ ],
182
+ "funding": {
183
+ "url": "https://opencollective.com/libvips"
184
+ }
185
+ },
186
+ "node_modules/@img/sharp-libvips-linux-x64": {
187
+ "version": "1.2.0",
188
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz",
189
+ "integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==",
190
+ "cpu": [
191
+ "x64"
192
+ ],
193
+ "license": "LGPL-3.0-or-later",
194
+ "optional": true,
195
+ "os": [
196
+ "linux"
197
+ ],
198
+ "funding": {
199
+ "url": "https://opencollective.com/libvips"
200
+ }
201
+ },
202
+ "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
203
+ "version": "1.2.0",
204
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz",
205
+ "integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==",
206
+ "cpu": [
207
+ "arm64"
208
+ ],
209
+ "license": "LGPL-3.0-or-later",
210
+ "optional": true,
211
+ "os": [
212
+ "linux"
213
+ ],
214
+ "funding": {
215
+ "url": "https://opencollective.com/libvips"
216
+ }
217
+ },
218
+ "node_modules/@img/sharp-libvips-linuxmusl-x64": {
219
+ "version": "1.2.0",
220
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz",
221
+ "integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==",
222
+ "cpu": [
223
+ "x64"
224
+ ],
225
+ "license": "LGPL-3.0-or-later",
226
+ "optional": true,
227
+ "os": [
228
+ "linux"
229
+ ],
230
+ "funding": {
231
+ "url": "https://opencollective.com/libvips"
232
+ }
233
+ },
234
+ "node_modules/@img/sharp-linux-arm": {
235
+ "version": "0.34.3",
236
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz",
237
+ "integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==",
238
+ "cpu": [
239
+ "arm"
240
+ ],
241
+ "license": "Apache-2.0",
242
+ "optional": true,
243
+ "os": [
244
+ "linux"
245
+ ],
246
+ "engines": {
247
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
248
+ },
249
+ "funding": {
250
+ "url": "https://opencollective.com/libvips"
251
+ },
252
+ "optionalDependencies": {
253
+ "@img/sharp-libvips-linux-arm": "1.2.0"
254
+ }
255
+ },
256
+ "node_modules/@img/sharp-linux-arm64": {
257
+ "version": "0.34.3",
258
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz",
259
+ "integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==",
260
+ "cpu": [
261
+ "arm64"
262
+ ],
263
+ "license": "Apache-2.0",
264
+ "optional": true,
265
+ "os": [
266
+ "linux"
267
+ ],
268
+ "engines": {
269
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
270
+ },
271
+ "funding": {
272
+ "url": "https://opencollective.com/libvips"
273
+ },
274
+ "optionalDependencies": {
275
+ "@img/sharp-libvips-linux-arm64": "1.2.0"
276
+ }
277
+ },
278
+ "node_modules/@img/sharp-linux-ppc64": {
279
+ "version": "0.34.3",
280
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz",
281
+ "integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==",
282
+ "cpu": [
283
+ "ppc64"
284
+ ],
285
+ "license": "Apache-2.0",
286
+ "optional": true,
287
+ "os": [
288
+ "linux"
289
+ ],
290
+ "engines": {
291
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
292
+ },
293
+ "funding": {
294
+ "url": "https://opencollective.com/libvips"
295
+ },
296
+ "optionalDependencies": {
297
+ "@img/sharp-libvips-linux-ppc64": "1.2.0"
298
+ }
299
+ },
300
+ "node_modules/@img/sharp-linux-s390x": {
301
+ "version": "0.34.3",
302
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz",
303
+ "integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==",
304
+ "cpu": [
305
+ "s390x"
306
+ ],
307
+ "license": "Apache-2.0",
308
+ "optional": true,
309
+ "os": [
310
+ "linux"
311
+ ],
312
+ "engines": {
313
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
314
+ },
315
+ "funding": {
316
+ "url": "https://opencollective.com/libvips"
317
+ },
318
+ "optionalDependencies": {
319
+ "@img/sharp-libvips-linux-s390x": "1.2.0"
320
+ }
321
+ },
322
+ "node_modules/@img/sharp-linux-x64": {
323
+ "version": "0.34.3",
324
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz",
325
+ "integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==",
326
+ "cpu": [
327
+ "x64"
328
+ ],
329
+ "license": "Apache-2.0",
330
+ "optional": true,
331
+ "os": [
332
+ "linux"
333
+ ],
334
+ "engines": {
335
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
336
+ },
337
+ "funding": {
338
+ "url": "https://opencollective.com/libvips"
339
+ },
340
+ "optionalDependencies": {
341
+ "@img/sharp-libvips-linux-x64": "1.2.0"
342
+ }
343
+ },
344
+ "node_modules/@img/sharp-linuxmusl-arm64": {
345
+ "version": "0.34.3",
346
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz",
347
+ "integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==",
348
+ "cpu": [
349
+ "arm64"
350
+ ],
351
+ "license": "Apache-2.0",
352
+ "optional": true,
353
+ "os": [
354
+ "linux"
355
+ ],
356
+ "engines": {
357
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
358
+ },
359
+ "funding": {
360
+ "url": "https://opencollective.com/libvips"
361
+ },
362
+ "optionalDependencies": {
363
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0"
364
+ }
365
+ },
366
+ "node_modules/@img/sharp-linuxmusl-x64": {
367
+ "version": "0.34.3",
368
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz",
369
+ "integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==",
370
+ "cpu": [
371
+ "x64"
372
+ ],
373
+ "license": "Apache-2.0",
374
+ "optional": true,
375
+ "os": [
376
+ "linux"
377
+ ],
378
+ "engines": {
379
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
380
+ },
381
+ "funding": {
382
+ "url": "https://opencollective.com/libvips"
383
+ },
384
+ "optionalDependencies": {
385
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0"
386
+ }
387
+ },
388
+ "node_modules/@img/sharp-wasm32": {
389
+ "version": "0.34.3",
390
+ "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz",
391
+ "integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==",
392
+ "cpu": [
393
+ "wasm32"
394
+ ],
395
+ "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
396
+ "optional": true,
397
+ "dependencies": {
398
+ "@emnapi/runtime": "^1.4.4"
399
+ },
400
+ "engines": {
401
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
402
+ },
403
+ "funding": {
404
+ "url": "https://opencollective.com/libvips"
405
+ }
406
+ },
407
+ "node_modules/@img/sharp-win32-arm64": {
408
+ "version": "0.34.3",
409
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz",
410
+ "integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==",
411
+ "cpu": [
412
+ "arm64"
413
+ ],
414
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
415
+ "optional": true,
416
+ "os": [
417
+ "win32"
418
+ ],
419
+ "engines": {
420
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
421
+ },
422
+ "funding": {
423
+ "url": "https://opencollective.com/libvips"
424
+ }
425
+ },
426
+ "node_modules/@img/sharp-win32-ia32": {
427
+ "version": "0.34.3",
428
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz",
429
+ "integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==",
430
+ "cpu": [
431
+ "ia32"
432
+ ],
433
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
434
+ "optional": true,
435
+ "os": [
436
+ "win32"
437
+ ],
438
+ "engines": {
439
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
440
+ },
441
+ "funding": {
442
+ "url": "https://opencollective.com/libvips"
443
+ }
444
+ },
445
+ "node_modules/@img/sharp-win32-x64": {
446
+ "version": "0.34.3",
447
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz",
448
+ "integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==",
449
+ "cpu": [
450
+ "x64"
451
+ ],
452
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
453
+ "optional": true,
454
+ "os": [
455
+ "win32"
456
+ ],
457
+ "engines": {
458
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
459
+ },
460
+ "funding": {
461
+ "url": "https://opencollective.com/libvips"
462
+ }
463
+ },
464
+ "node_modules/@isaacs/fs-minipass": {
465
+ "version": "4.0.1",
466
+ "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
467
+ "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
468
+ "license": "ISC",
469
+ "dependencies": {
470
+ "minipass": "^7.0.4"
471
+ },
472
+ "engines": {
473
+ "node": ">=18.0.0"
474
+ }
475
+ },
476
+ "node_modules/@protobufjs/aspromise": {
477
+ "version": "1.1.2",
478
+ "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
479
+ "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
480
+ "license": "BSD-3-Clause"
481
+ },
482
+ "node_modules/@protobufjs/base64": {
483
+ "version": "1.1.2",
484
+ "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
485
+ "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
486
+ "license": "BSD-3-Clause"
487
+ },
488
+ "node_modules/@protobufjs/codegen": {
489
+ "version": "2.0.4",
490
+ "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
491
+ "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
492
+ "license": "BSD-3-Clause"
493
+ },
494
+ "node_modules/@protobufjs/eventemitter": {
495
+ "version": "1.1.0",
496
+ "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
497
+ "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
498
+ "license": "BSD-3-Clause"
499
+ },
500
+ "node_modules/@protobufjs/fetch": {
501
+ "version": "1.1.0",
502
+ "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
503
+ "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
504
+ "license": "BSD-3-Clause",
505
+ "dependencies": {
506
+ "@protobufjs/aspromise": "^1.1.1",
507
+ "@protobufjs/inquire": "^1.1.0"
508
+ }
509
+ },
510
+ "node_modules/@protobufjs/float": {
511
+ "version": "1.0.2",
512
+ "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
513
+ "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
514
+ "license": "BSD-3-Clause"
515
+ },
516
+ "node_modules/@protobufjs/inquire": {
517
+ "version": "1.1.0",
518
+ "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
519
+ "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
520
+ "license": "BSD-3-Clause"
521
+ },
522
+ "node_modules/@protobufjs/path": {
523
+ "version": "1.1.2",
524
+ "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
525
+ "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
526
+ "license": "BSD-3-Clause"
527
+ },
528
+ "node_modules/@protobufjs/pool": {
529
+ "version": "1.1.0",
530
+ "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
531
+ "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
532
+ "license": "BSD-3-Clause"
533
+ },
534
+ "node_modules/@protobufjs/utf8": {
535
+ "version": "1.1.0",
536
+ "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
537
+ "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
538
+ "license": "BSD-3-Clause"
539
+ },
540
+ "node_modules/@types/node": {
541
+ "version": "24.3.1",
542
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-24.3.1.tgz",
543
+ "integrity": "sha512-3vXmQDXy+woz+gnrTvuvNrPzekOi+Ds0ReMxw0LzBiK3a+1k0kQn9f2NWk+lgD4rJehFUmYy2gMhJ2ZI+7YP9g==",
544
+ "license": "MIT",
545
+ "dependencies": {
546
+ "undici-types": "~7.10.0"
547
+ }
548
+ },
549
+ "node_modules/boolean": {
550
+ "version": "3.2.0",
551
+ "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
552
+ "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
553
+ "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.",
554
+ "license": "MIT"
555
+ },
556
+ "node_modules/chownr": {
557
+ "version": "3.0.0",
558
+ "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
559
+ "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
560
+ "license": "BlueOak-1.0.0",
561
+ "engines": {
562
+ "node": ">=18"
563
+ }
564
+ },
565
+ "node_modules/color": {
566
+ "version": "4.2.3",
567
+ "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
568
+ "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
569
+ "license": "MIT",
570
+ "dependencies": {
571
+ "color-convert": "^2.0.1",
572
+ "color-string": "^1.9.0"
573
+ },
574
+ "engines": {
575
+ "node": ">=12.5.0"
576
+ }
577
+ },
578
+ "node_modules/color-convert": {
579
+ "version": "2.0.1",
580
+ "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
581
+ "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
582
+ "license": "MIT",
583
+ "dependencies": {
584
+ "color-name": "~1.1.4"
585
+ },
586
+ "engines": {
587
+ "node": ">=7.0.0"
588
+ }
589
+ },
590
+ "node_modules/color-name": {
591
+ "version": "1.1.4",
592
+ "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
593
+ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
594
+ "license": "MIT"
595
+ },
596
+ "node_modules/color-string": {
597
+ "version": "1.9.1",
598
+ "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
599
+ "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
600
+ "license": "MIT",
601
+ "dependencies": {
602
+ "color-name": "^1.0.0",
603
+ "simple-swizzle": "^0.2.2"
604
+ }
605
+ },
606
+ "node_modules/define-data-property": {
607
+ "version": "1.1.4",
608
+ "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
609
+ "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
610
+ "license": "MIT",
611
+ "dependencies": {
612
+ "es-define-property": "^1.0.0",
613
+ "es-errors": "^1.3.0",
614
+ "gopd": "^1.0.1"
615
+ },
616
+ "engines": {
617
+ "node": ">= 0.4"
618
+ },
619
+ "funding": {
620
+ "url": "https://github.com/sponsors/ljharb"
621
+ }
622
+ },
623
+ "node_modules/define-properties": {
624
+ "version": "1.2.1",
625
+ "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
626
+ "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
627
+ "license": "MIT",
628
+ "dependencies": {
629
+ "define-data-property": "^1.0.1",
630
+ "has-property-descriptors": "^1.0.0",
631
+ "object-keys": "^1.1.1"
632
+ },
633
+ "engines": {
634
+ "node": ">= 0.4"
635
+ },
636
+ "funding": {
637
+ "url": "https://github.com/sponsors/ljharb"
638
+ }
639
+ },
640
+ "node_modules/detect-libc": {
641
+ "version": "2.0.4",
642
+ "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.4.tgz",
643
+ "integrity": "sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==",
644
+ "license": "Apache-2.0",
645
+ "engines": {
646
+ "node": ">=8"
647
+ }
648
+ },
649
+ "node_modules/detect-node": {
650
+ "version": "2.1.0",
651
+ "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
652
+ "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
653
+ "license": "MIT"
654
+ },
655
+ "node_modules/es-define-property": {
656
+ "version": "1.0.1",
657
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
658
+ "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
659
+ "license": "MIT",
660
+ "engines": {
661
+ "node": ">= 0.4"
662
+ }
663
+ },
664
+ "node_modules/es-errors": {
665
+ "version": "1.3.0",
666
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
667
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
668
+ "license": "MIT",
669
+ "engines": {
670
+ "node": ">= 0.4"
671
+ }
672
+ },
673
+ "node_modules/es6-error": {
674
+ "version": "4.1.1",
675
+ "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
676
+ "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==",
677
+ "license": "MIT"
678
+ },
679
+ "node_modules/escape-string-regexp": {
680
+ "version": "4.0.0",
681
+ "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
682
+ "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
683
+ "license": "MIT",
684
+ "engines": {
685
+ "node": ">=10"
686
+ },
687
+ "funding": {
688
+ "url": "https://github.com/sponsors/sindresorhus"
689
+ }
690
+ },
691
+ "node_modules/flatbuffers": {
692
+ "version": "25.2.10",
693
+ "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.2.10.tgz",
694
+ "integrity": "sha512-7JlN9ZvLDG1McO3kbX0k4v+SUAg48L1rIwEvN6ZQl/eCtgJz9UylTMzE9wrmYrcorgxm3CX/3T/w5VAub99UUw==",
695
+ "license": "Apache-2.0"
696
+ },
697
+ "node_modules/global-agent": {
698
+ "version": "3.0.0",
699
+ "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
700
+ "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
701
+ "license": "BSD-3-Clause",
702
+ "dependencies": {
703
+ "boolean": "^3.0.1",
704
+ "es6-error": "^4.1.1",
705
+ "matcher": "^3.0.0",
706
+ "roarr": "^2.15.3",
707
+ "semver": "^7.3.2",
708
+ "serialize-error": "^7.0.1"
709
+ },
710
+ "engines": {
711
+ "node": ">=10.0"
712
+ }
713
+ },
714
+ "node_modules/globalthis": {
715
+ "version": "1.0.4",
716
+ "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
717
+ "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
718
+ "license": "MIT",
719
+ "dependencies": {
720
+ "define-properties": "^1.2.1",
721
+ "gopd": "^1.0.1"
722
+ },
723
+ "engines": {
724
+ "node": ">= 0.4"
725
+ },
726
+ "funding": {
727
+ "url": "https://github.com/sponsors/ljharb"
728
+ }
729
+ },
730
+ "node_modules/gopd": {
731
+ "version": "1.2.0",
732
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
733
+ "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
734
+ "license": "MIT",
735
+ "engines": {
736
+ "node": ">= 0.4"
737
+ },
738
+ "funding": {
739
+ "url": "https://github.com/sponsors/ljharb"
740
+ }
741
+ },
742
+ "node_modules/guid-typescript": {
743
+ "version": "1.0.9",
744
+ "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
745
+ "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
746
+ "license": "ISC"
747
+ },
748
+ "node_modules/has-property-descriptors": {
749
+ "version": "1.0.2",
750
+ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
751
+ "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
752
+ "license": "MIT",
753
+ "dependencies": {
754
+ "es-define-property": "^1.0.0"
755
+ },
756
+ "funding": {
757
+ "url": "https://github.com/sponsors/ljharb"
758
+ }
759
+ },
760
+ "node_modules/is-arrayish": {
761
+ "version": "0.3.2",
762
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
763
+ "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==",
764
+ "license": "MIT"
765
+ },
766
+ "node_modules/json-stringify-safe": {
767
+ "version": "5.0.1",
768
+ "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
769
+ "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
770
+ "license": "ISC"
771
+ },
772
+ "node_modules/long": {
773
+ "version": "5.3.2",
774
+ "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
775
+ "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
776
+ "license": "Apache-2.0"
777
+ },
778
+ "node_modules/matcher": {
779
+ "version": "3.0.0",
780
+ "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
781
+ "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
782
+ "license": "MIT",
783
+ "dependencies": {
784
+ "escape-string-regexp": "^4.0.0"
785
+ },
786
+ "engines": {
787
+ "node": ">=10"
788
+ }
789
+ },
790
+ "node_modules/minipass": {
791
+ "version": "7.1.2",
792
+ "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
793
+ "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
794
+ "license": "ISC",
795
+ "engines": {
796
+ "node": ">=16 || 14 >=14.17"
797
+ }
798
+ },
799
+ "node_modules/minizlib": {
800
+ "version": "3.0.2",
801
+ "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.2.tgz",
802
+ "integrity": "sha512-oG62iEk+CYt5Xj2YqI5Xi9xWUeZhDI8jjQmC5oThVH5JGCTgIjr7ciJDzC7MBzYd//WvR1OTmP5Q38Q8ShQtVA==",
803
+ "license": "MIT",
804
+ "dependencies": {
805
+ "minipass": "^7.1.2"
806
+ },
807
+ "engines": {
808
+ "node": ">= 18"
809
+ }
810
+ },
811
+ "node_modules/mkdirp": {
812
+ "version": "3.0.1",
813
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
814
+ "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
815
+ "license": "MIT",
816
+ "bin": {
817
+ "mkdirp": "dist/cjs/src/bin.js"
818
+ },
819
+ "engines": {
820
+ "node": ">=10"
821
+ },
822
+ "funding": {
823
+ "url": "https://github.com/sponsors/isaacs"
824
+ }
825
+ },
826
+ "node_modules/object-keys": {
827
+ "version": "1.1.1",
828
+ "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
829
+ "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
830
+ "license": "MIT",
831
+ "engines": {
832
+ "node": ">= 0.4"
833
+ }
834
+ },
835
+ "node_modules/onnxruntime-common": {
836
+ "version": "1.21.0",
837
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.21.0.tgz",
838
+ "integrity": "sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==",
839
+ "license": "MIT"
840
+ },
841
+ "node_modules/onnxruntime-node": {
842
+ "version": "1.21.0",
843
+ "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.21.0.tgz",
844
+ "integrity": "sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==",
845
+ "hasInstallScript": true,
846
+ "license": "MIT",
847
+ "os": [
848
+ "win32",
849
+ "darwin",
850
+ "linux"
851
+ ],
852
+ "dependencies": {
853
+ "global-agent": "^3.0.0",
854
+ "onnxruntime-common": "1.21.0",
855
+ "tar": "^7.0.1"
856
+ }
857
+ },
858
+ "node_modules/onnxruntime-web": {
859
+ "version": "1.22.0-dev.20250409-89f8206ba4",
860
+ "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.22.0-dev.20250409-89f8206ba4.tgz",
861
+ "integrity": "sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==",
862
+ "license": "MIT",
863
+ "dependencies": {
864
+ "flatbuffers": "^25.1.24",
865
+ "guid-typescript": "^1.0.9",
866
+ "long": "^5.2.3",
867
+ "onnxruntime-common": "1.22.0-dev.20250409-89f8206ba4",
868
+ "platform": "^1.3.6",
869
+ "protobufjs": "^7.2.4"
870
+ }
871
+ },
872
+ "node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
873
+ "version": "1.22.0-dev.20250409-89f8206ba4",
874
+ "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.22.0-dev.20250409-89f8206ba4.tgz",
875
+ "integrity": "sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==",
876
+ "license": "MIT"
877
+ },
878
+ "node_modules/platform": {
879
+ "version": "1.3.6",
880
+ "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
881
+ "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
882
+ "license": "MIT"
883
+ },
884
+ "node_modules/protobufjs": {
885
+ "version": "7.5.4",
886
+ "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
887
+ "integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
888
+ "hasInstallScript": true,
889
+ "license": "BSD-3-Clause",
890
+ "dependencies": {
891
+ "@protobufjs/aspromise": "^1.1.2",
892
+ "@protobufjs/base64": "^1.1.2",
893
+ "@protobufjs/codegen": "^2.0.4",
894
+ "@protobufjs/eventemitter": "^1.1.0",
895
+ "@protobufjs/fetch": "^1.1.0",
896
+ "@protobufjs/float": "^1.0.2",
897
+ "@protobufjs/inquire": "^1.1.0",
898
+ "@protobufjs/path": "^1.1.2",
899
+ "@protobufjs/pool": "^1.1.0",
900
+ "@protobufjs/utf8": "^1.1.0",
901
+ "@types/node": ">=13.7.0",
902
+ "long": "^5.0.0"
903
+ },
904
+ "engines": {
905
+ "node": ">=12.0.0"
906
+ }
907
+ },
908
+ "node_modules/roarr": {
909
+ "version": "2.15.4",
910
+ "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
911
+ "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
912
+ "license": "BSD-3-Clause",
913
+ "dependencies": {
914
+ "boolean": "^3.0.1",
915
+ "detect-node": "^2.0.4",
916
+ "globalthis": "^1.0.1",
917
+ "json-stringify-safe": "^5.0.1",
918
+ "semver-compare": "^1.0.0",
919
+ "sprintf-js": "^1.1.2"
920
+ },
921
+ "engines": {
922
+ "node": ">=8.0"
923
+ }
924
+ },
925
+ "node_modules/semver": {
926
+ "version": "7.7.2",
927
+ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
928
+ "integrity": "sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==",
929
+ "license": "ISC",
930
+ "bin": {
931
+ "semver": "bin/semver.js"
932
+ },
933
+ "engines": {
934
+ "node": ">=10"
935
+ }
936
+ },
937
+ "node_modules/semver-compare": {
938
+ "version": "1.0.0",
939
+ "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
940
+ "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==",
941
+ "license": "MIT"
942
+ },
943
+ "node_modules/serialize-error": {
944
+ "version": "7.0.1",
945
+ "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
946
+ "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
947
+ "license": "MIT",
948
+ "dependencies": {
949
+ "type-fest": "^0.13.1"
950
+ },
951
+ "engines": {
952
+ "node": ">=10"
953
+ },
954
+ "funding": {
955
+ "url": "https://github.com/sponsors/sindresorhus"
956
+ }
957
+ },
958
+ "node_modules/sharp": {
959
+ "version": "0.34.3",
960
+ "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz",
961
+ "integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==",
962
+ "hasInstallScript": true,
963
+ "license": "Apache-2.0",
964
+ "dependencies": {
965
+ "color": "^4.2.3",
966
+ "detect-libc": "^2.0.4",
967
+ "semver": "^7.7.2"
968
+ },
969
+ "engines": {
970
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
971
+ },
972
+ "funding": {
973
+ "url": "https://opencollective.com/libvips"
974
+ },
975
+ "optionalDependencies": {
976
+ "@img/sharp-darwin-arm64": "0.34.3",
977
+ "@img/sharp-darwin-x64": "0.34.3",
978
+ "@img/sharp-libvips-darwin-arm64": "1.2.0",
979
+ "@img/sharp-libvips-darwin-x64": "1.2.0",
980
+ "@img/sharp-libvips-linux-arm": "1.2.0",
981
+ "@img/sharp-libvips-linux-arm64": "1.2.0",
982
+ "@img/sharp-libvips-linux-ppc64": "1.2.0",
983
+ "@img/sharp-libvips-linux-s390x": "1.2.0",
984
+ "@img/sharp-libvips-linux-x64": "1.2.0",
985
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0",
986
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0",
987
+ "@img/sharp-linux-arm": "0.34.3",
988
+ "@img/sharp-linux-arm64": "0.34.3",
989
+ "@img/sharp-linux-ppc64": "0.34.3",
990
+ "@img/sharp-linux-s390x": "0.34.3",
991
+ "@img/sharp-linux-x64": "0.34.3",
992
+ "@img/sharp-linuxmusl-arm64": "0.34.3",
993
+ "@img/sharp-linuxmusl-x64": "0.34.3",
994
+ "@img/sharp-wasm32": "0.34.3",
995
+ "@img/sharp-win32-arm64": "0.34.3",
996
+ "@img/sharp-win32-ia32": "0.34.3",
997
+ "@img/sharp-win32-x64": "0.34.3"
998
+ }
999
+ },
1000
+ "node_modules/simple-swizzle": {
1001
+ "version": "0.2.2",
1002
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
1003
+ "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
1004
+ "license": "MIT",
1005
+ "dependencies": {
1006
+ "is-arrayish": "^0.3.1"
1007
+ }
1008
+ },
1009
+ "node_modules/sprintf-js": {
1010
+ "version": "1.1.3",
1011
+ "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
1012
+ "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
1013
+ "license": "BSD-3-Clause"
1014
+ },
1015
+ "node_modules/tar": {
1016
+ "version": "7.4.3",
1017
+ "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
1018
+ "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
1019
+ "license": "ISC",
1020
+ "dependencies": {
1021
+ "@isaacs/fs-minipass": "^4.0.0",
1022
+ "chownr": "^3.0.0",
1023
+ "minipass": "^7.1.2",
1024
+ "minizlib": "^3.0.1",
1025
+ "mkdirp": "^3.0.1",
1026
+ "yallist": "^5.0.0"
1027
+ },
1028
+ "engines": {
1029
+ "node": ">=18"
1030
+ }
1031
+ },
1032
+ "node_modules/tslib": {
1033
+ "version": "2.8.1",
1034
+ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
1035
+ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
1036
+ "license": "0BSD",
1037
+ "optional": true
1038
+ },
1039
+ "node_modules/type-fest": {
1040
+ "version": "0.13.1",
1041
+ "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
1042
+ "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
1043
+ "license": "(MIT OR CC0-1.0)",
1044
+ "engines": {
1045
+ "node": ">=10"
1046
+ },
1047
+ "funding": {
1048
+ "url": "https://github.com/sponsors/sindresorhus"
1049
+ }
1050
+ },
1051
+ "node_modules/undici-types": {
1052
+ "version": "7.10.0",
1053
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
1054
+ "integrity": "sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==",
1055
+ "license": "MIT"
1056
+ },
1057
+ "node_modules/yallist": {
1058
+ "version": "5.0.0",
1059
+ "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
1060
+ "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
1061
+ "license": "BlueOak-1.0.0",
1062
+ "engines": {
1063
+ "node": ">=18"
1064
+ }
1065
+ }
1066
+ }
1067
+ }
package.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "js",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "index.js",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "keywords": [],
10
+ "author": "",
11
+ "license": "ISC",
12
+ "dependencies": {
13
+ "@huggingface/transformers": "^3.7.2"
14
+ }
15
+ }
potion.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model2vec import StaticModel
2
+ from tokenizers import Tokenizer
3
+ import torch
4
+
5
+ model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M")
6
+ embeddings = torch.from_numpy(model.embedding)
7
+
8
+ print("Embedding shape:", embeddings.shape)
9
+ bytes = embeddings.shape[0] * embeddings.shape[1] * 4
10
+
11
+ print("MiB:", bytes / 1024 / 1024)
12
+
13
+ tokenizer: Tokenizer = model.tokenizer
14
+ print(tokenizer.to_str())
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "sentence-embeddings"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "model2vec>=0.6.0",
9
+ "numpy>=2.3.2",
10
+ "sentence-transformers>=5.1.0",
11
+ "zstandard>=0.24.0",
12
+ ]
scripts/build_models.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ import shutil
3
+ from textwrap import dedent, indent
4
+ from typing import Any
5
+ import numpy as np
6
+ from zstandard import ZstdCompressor
7
+ from pathlib import Path
8
+ import io
9
+ from sentence_transformers import SentenceTransformer
10
+ from torch.nn import EmbeddingBag
11
+ import torch
12
+ from model2vec import StaticModel
13
+ from tokenizers import Encoding, Tokenizer
14
+
15
+ models_path = Path("models")
16
+
17
+
18
+ @dataclass
19
+ class ModelCard:
20
+ owner: str
21
+ repo: str
22
+ # The dimensions that were applied with Matroyshka Loss.
23
+ matroyshka_dims: list[int]
24
+ description: str
25
+ license: str
26
+
27
+ def name(self):
28
+ return f"{self.owner}/{self.repo}"
29
+
30
+ def path(self):
31
+ return models_path / self.owner / self.repo
32
+
33
+ def get_description(self):
34
+ return dedent(self.description).strip()
35
+
36
+
37
+ def zst_compress_file(input: Path):
38
+ cctx = ZstdCompressor()
39
+ output = input.parent / f"{input.name}.zst"
40
+ print(f"Compressing {output}")
41
+ with open(input, "rb") as fin, open(output, "wb") as fout:
42
+ cctx.copy_stream(fin, fout)
43
+
44
+
45
+ def save_data(path: Path, tensor: torch.Tensor):
46
+ """Writes out the static embeddings to a .npy and .npy.zst file"""
47
+ buffer = io.BytesIO()
48
+
49
+ if tensor.dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
50
+ # Store as the raw bytes.
51
+ np.save(buffer, tensor.detach().view(torch.uint8).numpy())
52
+ else:
53
+ np.save(buffer, tensor.detach().numpy())
54
+
55
+ print(f"Saving {path}")
56
+ with (open(path, "wb") as outfile,):
57
+ outfile.write(buffer.getvalue())
58
+
59
+ zst_compress_file(path)
60
+
61
+
62
+ def quantization_loss_mse(tensor: torch.Tensor, dtype: torch.dtype):
63
+ """
64
+ Compute reconstruction loss when converting embeddings to a datatype and back using
65
+ the mean squared error, which punishes big errors more than small ones.
66
+ """
67
+
68
+ # Original → quantize → dequantize
69
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
70
+
71
+ # Mean squared error
72
+ return torch.mean((tensor - roundtrip) ** 2).item()
73
+
74
+
75
+ def quantization_loss_mae(tensor: torch.Tensor, dtype: torch.dtype):
76
+ """
77
+ Compute reconstruction loss when converting embeddings to a datatype and back using
78
+ the mean absolute error, which is less sensitive to outliers than MSE.
79
+ """
80
+
81
+ # Original → quantize → dequantize
82
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
83
+
84
+ # Mean absolute error
85
+ return torch.mean(torch.abs(tensor - roundtrip)).item()
86
+
87
+
88
+ def quantization_loss_cosine(tensor: torch.Tensor, dtype: torch.dtype):
89
+ """
90
+ Compute reconstruction loss when converting embeddings to a datatype and back using
91
+ cosine similarity. This measures whether the embedding directions are preserved
92
+ after quantization, independent of their magnitudes.
93
+ """
94
+
95
+ # Original → quantize → dequantize
96
+ roundtrip = tensor.detach().to(dtype).to(tensor.dtype)
97
+
98
+ # Flatten both to 2D (num_vectors, dimensions) in case tensor is 1D or higher-D
99
+ if tensor.ndim == 1:
100
+ orig = tensor.unsqueeze(0)
101
+ recon = roundtrip.unsqueeze(0)
102
+ else:
103
+ orig = tensor.view(tensor.shape[0], -1)
104
+ recon = roundtrip.view(roundtrip.shape[0], -1)
105
+
106
+ # Cosine similarity per vector, then average
107
+ cos = torch.nn.functional.cosine_similarity(orig, recon, dim=1)
108
+ return cos.mean().item()
109
+
110
+
111
+ def export_embeddings(model_card: ModelCard, embeddings: torch.Tensor) -> None:
112
+ vocab_size, dimensions = embeddings.shape
113
+
114
+ # This logic can always be adjusted for models with different shapes.
115
+ assert (
116
+ embeddings.dtype == torch.float32
117
+ ), f"The embeddings {embeddings.dtype} are assumed to be float32."
118
+
119
+ for dim in model_card.matroyshka_dims:
120
+ assert (
121
+ dim <= dimensions
122
+ ), f"The Matroyshka dimensions {dim} were bigger than the models dimensions of {dimensions}"
123
+
124
+ truncated = embeddings[:, :dim]
125
+ assert truncated.shape == torch.Size([vocab_size, dim])
126
+
127
+ save_data(model_card.path() / f"fp32.d{dim}.npy", truncated)
128
+ save_data(
129
+ model_card.path() / f"fp16.d{dim}.npy",
130
+ truncated.to(dtype=torch.float16),
131
+ )
132
+ save_data(
133
+ model_card.path() / f"fp8_e5m2.d{dim}.npy",
134
+ truncated.to(dtype=torch.float8_e5m2),
135
+ )
136
+ save_data(
137
+ model_card.path() / f"fp8_e4m3.d{dim}.npy",
138
+ truncated.to(dtype=torch.float8_e4m3fn),
139
+ )
140
+
141
+
142
+ def normalized_mean_pooling(x: torch.Tensor) -> torch.Tensor:
143
+ pooled = x.mean(dim=0)
144
+ normalized = torch.nn.functional.normalize(pooled, dim=0)
145
+ return normalized
146
+
147
+
148
+ def export_readme(
149
+ model_card: ModelCard,
150
+ embeddings: torch.Tensor,
151
+ tokenizer: Tokenizer,
152
+ ):
153
+ vocab_size, dimensions = embeddings.shape
154
+ norms = torch.norm(embeddings, dim=1) # shape: [vocab_size]
155
+
156
+ phrases = [
157
+ "The committee approved the proposal after hours of heated discussion and several last-minute amendments."
158
+ "When training large neural networks, careful tuning of hyperparameters can significantly affect performance and stability."
159
+ "Despite the heavy rain, the concert continued as planned and the crowd stayed enthusiastic until the final encore."
160
+ "In ancient mythology, heroes often embarked on perilous journeys to discover hidden truths about themselves and their world."
161
+ "The new smartphone model features an improved camera system, faster processing, and extended battery life compared to its predecessor."
162
+ "He tried to explain the concept using simple analogies, but the underlying mathematics remained difficult to grasp for most listeners."
163
+ "After weeks of negotiations, the two countries signed a historic trade agreement aimed at reducing tariffs and boosting cooperation."
164
+ "She paused for a moment before answering, choosing her words carefully to avoid misunderstanding in such a delicate situation."
165
+ "The detective pieced together the timeline of events, realizing that the key witness had provided a contradictory statement."
166
+ "Remote work has changed the way teams collaborate, with online tools replacing traditional office routines and in-person meetings."
167
+ ]
168
+
169
+ cosine_similarity = {
170
+ torch.float16: [],
171
+ torch.float8_e4m3fn: [],
172
+ torch.float8_e5m2: [],
173
+ }
174
+
175
+ for phrase in phrases:
176
+ encoding: Encoding = tokenizer.encode(phrase)
177
+ embedded_phrase = embeddings[torch.tensor(encoding.ids, dtype=torch.long)]
178
+
179
+ for dtype in cosine_similarity.keys():
180
+ pooling_unquantized = normalized_mean_pooling(embedded_phrase)
181
+ pooling_roundtrip = normalized_mean_pooling(
182
+ embedded_phrase.to(dtype).to(torch.float32)
183
+ )
184
+ cosine = torch.dot(pooling_unquantized, pooling_roundtrip).item()
185
+ cosine_similarity[dtype].append(cosine)
186
+
187
+ avg_cosine_similarity = {
188
+ dtype: sum(values) / len(values) for dtype, values in cosine_similarity.items()
189
+ }
190
+
191
+ tokenizer_examples = []
192
+ for text in [
193
+ "This is an example of encoding",
194
+ "The quick brown fox jumps over the lazy dog.",
195
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
196
+ "Привет, как дела?",
197
+ "Бързата кафява лисица прескача мързеливото куче.",
198
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
199
+ "اللغة العربية جميلة وغنية بالتاريخ.",
200
+ "مرحبا بالعالم!",
201
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
202
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
203
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
204
+ "コンピュータープログラミング",
205
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
206
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
207
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
208
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
209
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
210
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
211
+ "Hello 世界 مرحبا 🌍",
212
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
213
+ ]:
214
+ encoding = tokenizer.encode(text)
215
+ tokens = [f"`{token}`" for token in encoding.tokens]
216
+
217
+ tokenizer_examples.append(f"**Input:** {text}<br/>")
218
+ tokenizer_examples.append(f"**Tokens**: {' '.join(tokens)}")
219
+ tokenizer_examples.append("")
220
+
221
+ tokenizer_output = "\n".join(tokenizer_examples)
222
+
223
+ with (model_card.path() / "README.md").open("wt") as file:
224
+ prefix = " "
225
+
226
+ file.write(
227
+ dedent(
228
+ f"""
229
+ # [{model_card.name()}](https://huggingface.co/{model_card.name()})
230
+
231
+ License: [{model_card.license}](https://choosealicense.com/licenses/{model_card.license}/)
232
+
233
+ {indent(model_card.get_description(), prefix).strip()}
234
+
235
+ ## Model Stats
236
+
237
+ Stats that describe the embeddings tensor shapes and value distribution.
238
+
239
+ | item | metric | value |
240
+ | --------------| ----------------------- | ----- |
241
+ | vocab | size | {vocab_size:,.0f} |
242
+ | embedding | dimensions | {dimensions:,.0f} |
243
+ | vector length | mean | {norms.mean().item():.2f} |
244
+ | vector length | median | {norms.median().item():.2f} |
245
+ | vector length | stddev | {norms.std().item():.2f} |
246
+ | values | mean | {embeddings.mean().item():.2f} |
247
+ | values | median | {embeddings.median().item():.2f} |
248
+ | values | stddev | {embeddings.std().item():.2f} |
249
+
250
+ ## Mean Pooled Quantization Loss
251
+
252
+ This test roundtrips the vectors through quantization, but performs the
253
+ mean pooling arithmetic in float32 space. The quantized and unquantized
254
+ mean pooled vectors are compared to each other to determine their cosine
255
+ similarity, to show how much the meaning of the vector has changed due
256
+ to quantization.
257
+
258
+ | Precision | Cosine Similarity |
259
+ | ------------- | ----------------- |
260
+ | fp16 | {avg_cosine_similarity[torch.float16]:.5f} |
261
+ | fp8 e4m3 | {avg_cosine_similarity[torch.float8_e4m3fn]:.5f} |
262
+ | fp8 e5m2 | {avg_cosine_similarity[torch.float8_e5m2]:.5f} |
263
+
264
+ ## Quantization Loss Per Vector
265
+
266
+ While ultimately the embedding vectors will be mean pooled together, it's
267
+ still useful to look at the loss per-vector in the embedding table to see
268
+ which quantization strategies retain the most vector meaning.
269
+
270
+ - **Cosine Similarity** — measures how well the *direction* of embedding vectors
271
+ is preserved after quantization, independent of scale. This is especially
272
+ relevant when embeddings are used for similarity search or retrieval.
273
+ - **MSE (Mean Squared Error)** — emphasizes large errors by squaring the
274
+ differences. Useful for detecting whether any values are badly distorted.
275
+ - **MAE (Mean Absolute Error)** — the average absolute difference between
276
+ original and quantized values. Easier to interpret, less sensitive to outliers.
277
+
278
+ | Precision | Metric | Value |
279
+ | ------------- | ------ | ----- |
280
+ | fp16 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float16):.5f} |
281
+ | fp8 e4m3 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float8_e4m3fn):.5f} |
282
+ | fp8 e5m2 | cosine similarity | {quantization_loss_cosine(embeddings, torch.float8_e5m2):.5f} |
283
+ | fp16 | MSE | {quantization_loss_mse(embeddings, torch.float16):.5f} |
284
+ | fp8 e4m3 | MSE | {quantization_loss_mse(embeddings, torch.float8_e4m3fn):.5f} |
285
+ | fp8 e5m2 | MSE | {quantization_loss_mse(embeddings, torch.float8_e5m2):.5f} |
286
+ | fp16 | MAE | {quantization_loss_mae(embeddings, torch.float16):.5f} |
287
+ | fp8 e4m3 | MAE | {quantization_loss_mae(embeddings, torch.float8_e4m3fn):.5f} |
288
+ | fp8 e5m2 | MAE | {quantization_loss_mae(embeddings, torch.float8_e5m2):.5f} |
289
+
290
+ ## Tokenizer Examples
291
+
292
+ {indent(tokenizer_output, prefix).strip()}
293
+ """
294
+ ).strip()
295
+ )
296
+
297
+
298
+ def export_tokenizer(model_card: ModelCard, tokenizer: Tokenizer) -> None:
299
+ tokenizer_path = model_card.path() / "tokenizer.json"
300
+ print(f"Exporting tokenizer: {tokenizer_path}")
301
+ tokenizer.save(str(tokenizer_path))
302
+ zst_compress_file(tokenizer_path)
303
+
304
+
305
+ def export_sentence_transformers(model_card: ModelCard) -> None:
306
+ """Extract the embeddings and tokenizer from SentenceTransformers"""
307
+
308
+ print("Processing", model_card.name())
309
+
310
+ model = SentenceTransformer(model_card.name(), device="cpu")
311
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
312
+ model_card.path().mkdir(exist_ok=True, parents=True)
313
+ embeddings = torch.Tensor(embedding_bag.weight)
314
+
315
+ export_embeddings(model_card, embeddings)
316
+ export_tokenizer(model_card, model.tokenizer)
317
+ export_readme(model_card, embeddings, model.tokenizer)
318
+
319
+
320
+ def export_model2vec(model_card: ModelCard) -> None:
321
+ """Extract the embeddings and tokenizer from model2vec"""
322
+
323
+ print("Processing", model_card.name())
324
+
325
+ model = StaticModel.from_pretrained(model_card.name())
326
+ model_card.path().mkdir(exist_ok=True, parents=True)
327
+ embeddings = torch.from_numpy(model.embedding)
328
+ export_embeddings(model_card, embeddings)
329
+ export_tokenizer(model_card, model.tokenizer)
330
+ export_readme(model_card, embeddings, model.tokenizer)
331
+
332
+
333
+ def main() -> None:
334
+ # Static embedders that use sentence_transformers models.
335
+ sentence_transformers_models = [
336
+ ModelCard(
337
+ owner="sentence-transformers",
338
+ repo="static-similarity-mrl-multilingual-v1",
339
+ description="""
340
+ Multi-lingual similarity embeddings that were trained with Matroyshka loss
341
+ that allows for more effective truncation of the embedding vectors. It
342
+ was trained on a variety of domains of multilingual datasets.
343
+
344
+ It's a general purpose model that can be used for semantic textual similarity,
345
+ paraphrase mining, text classification, clustering, and more
346
+ """,
347
+ matroyshka_dims=[32, 64, 128, 256, 512, 1024],
348
+ license="apache-2.0",
349
+ ),
350
+ ModelCard(
351
+ owner="sentence-transformers",
352
+ repo="static-retrieval-mrl-en-v1",
353
+ description="""
354
+ English-only uncased similarity embeddings that were trained with Matroyshka
355
+ loss that allows for more effective truncation of the embedding vectors. It
356
+ was trained on a variety of domains of monolingual datasets. I was designed
357
+ specifically for similarity retrieval.
358
+ """,
359
+ matroyshka_dims=[32, 64, 128, 256, 512, 1024],
360
+ license="apache-2.0",
361
+ ),
362
+ ]
363
+ # Static embedders that use model2vec.
364
+ model2vec_models = [
365
+ ModelCard(
366
+ owner="minishlab",
367
+ repo="potion-multilingual-128M",
368
+ # These are assumed as their is no python reference implementation:
369
+ matroyshka_dims=[32, 64, 128, 256],
370
+ description="""
371
+ A multilingual embedder. The details are a bit scant on how it's trained as
372
+ there is no source code for it. However, it's likely a close architecture
373
+ to the potion-retrieval-32M model, but trained on Common Crawl data.
374
+
375
+ The 128M references the number of parameters in the embeddings:
376
+
377
+ 256 dimensions * 500,353 vocab.
378
+ """,
379
+ license="mit",
380
+ ),
381
+ ModelCard(
382
+ owner="minishlab",
383
+ repo="potion-retrieval-32M",
384
+ matroyshka_dims=[32, 64, 128, 256, 512],
385
+ description="""
386
+ The token embeddings from a monolingual English 32M parameter model that was
387
+ distilled from embeddings that were initialized from the the multi-domain
388
+ [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)
389
+
390
+ The 32M references the number of parameters in the embeddings:
391
+
392
+ 512 dimension * 63,091 vocab.
393
+ """,
394
+ license="mit",
395
+ ),
396
+ ]
397
+
398
+ if models_path.exists():
399
+ print(f"Removing the old models folder: {models_path}")
400
+ shutil.rmtree(models_path)
401
+ models_path.mkdir()
402
+
403
+ for model_card in sentence_transformers_models:
404
+ export_sentence_transformers(model_card)
405
+
406
+ for model_card in model2vec_models:
407
+ export_model2vec(model_card)
408
+
409
+
410
+ if __name__ == "__main__":
411
+ main()
scripts/experiments/multilingual.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from tokenizers import Encoding, Tokenizer
3
+ from torch.nn import EmbeddingBag
4
+ import torch
5
+
6
+
7
+ def test_tokenizer():
8
+ examples = [
9
+ "This is an example of encoding",
10
+ "The quick brown fox jumps over the lazy dog.",
11
+ "Curaçao, naïve fiancé, jalapeño, déjà vu.",
12
+ "Привет, как дела?",
13
+ "Бързата кафява лисица прескача мързеливото куче.",
14
+ "Γρήγορη καφέ αλεπού πηδάει πάνω από τον τεμπέλη σκύλο.",
15
+ "اللغة العربية جميلة وغنية بالتاريخ.",
16
+ "مرحبا بالعالم!",
17
+ "Simplified: 快速的棕色狐狸跳过懒狗。",
18
+ "Traditional: 快速的棕色狐狸跳過懶狗。",
19
+ "素早い茶色の狐が怠け者の犬を飛び越える。",
20
+ "コンピュータープログラミング",
21
+ "빠른 갈색 여우가 게으른 개를 뛰어넘습니다.",
22
+ "तेज़ भूरी लोमड़ी आलसी कुत्ते के ऊपर कूदती है।",
23
+ "দ্রুত বাদামী শিয়াল অলস কুকুরের উপর দিয়ে লাফ দেয়।",
24
+ "வேகமான பழுப்பு நரி சோம்பேறி நாயின் மேல் குதிக்கிறது.",
25
+ "สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ.",
26
+ "ብሩክ ቡናማ ቀበሮ ሰነፍ ውሻን ተዘልሏል።",
27
+ "Hello 世界 مرحبا 🌍",
28
+ "123, αβγ, абв, العربية, 中文, हिन्दी.",
29
+ ]
30
+
31
+ tokenizer: Tokenizer = Tokenizer.from_file("js/tokenizer.json")
32
+
33
+ for example in examples:
34
+ encoding: Encoding = tokenizer.encode(example)
35
+ print(example)
36
+ print(encoding.tokens)
37
+ print()
38
+
39
+
40
+ # https://huggingface.co/sentence-transformers/static-similarity-mrl-multilingual-v1
41
+ model = SentenceTransformer(
42
+ "sentence-transformers/static-similarity-mrl-multilingual-v1", device="cpu"
43
+ )
44
+
45
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
46
+ embeddings = torch.Tensor(embedding_bag.weight)
47
+
48
+ print(embeddings.shape)
49
+ assert embeddings.shape == torch.Size([105879, 1024])
50
+
51
+ print("float32")
52
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB")
53
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB")
54
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB")
55
+
56
+ print("float16")
57
+ print(f" 1024 dim - {embeddings.shape[0] * 1024 * 2 / 1024 / 1024:,.1f} MiB")
58
+ print(f" 512 dim - {embeddings.shape[0] * 512 * 2 / 1024 / 1024:,.1f} MiB")
59
+ print(f" 256 dim - {embeddings.shape[0] * 256 * 2 / 1024 / 1024:,.1f} MiB")
scripts/experiments/potion.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model2vec import StaticModel
2
+ from tokenizers import Tokenizer
3
+ import torch
4
+
5
+ model = StaticModel.from_pretrained("minishlab/potion-multilingual-128M")
6
+ embeddings = torch.from_numpy(model.embedding)
7
+
8
+ print("Embedding shape:", embeddings.shape)
9
+ bytes = embeddings.shape[0] * embeddings.shape[1] * 4
10
+
11
+ print("MiB:", bytes / 1024 / 1024)
12
+
13
+ tokenizer: Tokenizer = model.tokenizer
14
+ print(tokenizer.to_str())
scripts/experiments/tomaarsen.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from torch.nn import EmbeddingBag
3
+ import torch
4
+
5
+ model = SentenceTransformer("tomaarsen/static-retrieval-mrl-en-v1")
6
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
7
+ embeddings = torch.Tensor(embedding_bag.weight)
8
+
9
+ assert embeddings.shape == torch.Size([30522, 1024])
10
+
11
+ print(f"1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB:")
12
+ print(f"512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB:")
13
+ print(f"256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB:")
14
+
15
+ print("Embeddings[0]", embeddings[0])
scripts/upload_models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import subprocess
3
+ from pathlib import Path
4
+
5
+
6
+ def main() -> None:
7
+ parser = argparse.ArgumentParser(
8
+ description=__doc__,
9
+ # Preserves whitespace in the help text.
10
+ formatter_class=argparse.RawTextHelpFormatter,
11
+ )
12
+
13
+ parser.add_argument(
14
+ "--tag", type=str, required=True, help="The git tag for the release"
15
+ )
16
+
17
+ args = parser.parse_args()
18
+ tag: str = args.tag
19
+
20
+ try:
21
+ subprocess.run(
22
+ ["git", "rev-parse", "--verify", f"refs/tags/{tag}"],
23
+ check=True,
24
+ stdout=subprocess.PIPE,
25
+ stderr=subprocess.PIPE,
26
+ )
27
+ except subprocess.CalledProcessError:
28
+ raise SystemExit(f"Error: Git tag '{tag}' does not exist.")
29
+
30
+ repo_root = Path(__file__).parent.parent.resolve()
31
+
32
+ command = f"gsutil cp -r {repo_root / "models"} gs://moz-model-hub/mozilla/static-embeddings/{tag}/"
33
+
34
+ print(f"Uploading models")
35
+ print(command)
36
+
37
+ subprocess.run(
38
+ command,
39
+ shell=True,
40
+ check=True,
41
+ )
42
+
43
+ print("All models have been uploaded successfully.")
44
+
45
+
46
+ if __name__ == "__main__":
47
+ main()
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tomaarsen.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from torch.nn import EmbeddingBag
3
+ import torch
4
+
5
+ model = SentenceTransformer("tomaarsen/static-retrieval-mrl-en-v1")
6
+ embedding_bag: EmbeddingBag = model[0].embedding # type: ignore
7
+ embeddings = torch.Tensor(embedding_bag.weight)
8
+
9
+ assert embeddings.shape == torch.Size([30522, 1024])
10
+
11
+ print(f"1024 dim - {embeddings.shape[0] * 1024 * 4 / 1024 / 1024:,.1f} MiB:")
12
+ print(f"512 dim - {embeddings.shape[0] * 512 * 4 / 1024 / 1024:,.1f} MiB:")
13
+ print(f"256 dim - {embeddings.shape[0] * 256 * 4 / 1024 / 1024:,.1f} MiB:")
14
+
15
+ print("Embeddings[0]", embeddings[0])
tsconfig.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "module": "ESNext",
4
+ "moduleResolution": "nodenext",
5
+ // Set the baseUrl to the root of the project.
6
+ "baseUrl": "src",
7
+ // Make the type checking as strict as possible.
8
+ "strict": true,
9
+ // TypeScript will check JS files only if they have a @ts-check comment in them.
10
+ "allowJs": true,
11
+ "checkJs": true,
12
+ // Only type check, don't emit files.
13
+ "noEmit": true,
14
+ // Allow esnext syntax. Otherwise the default is ES5 only.
15
+ "target": "esnext",
16
+ "lib": ["esnext", "dom"],
17
+ "esModuleInterop": true
18
+ },
19
+ // Add a @ts-check comment to a JS file to start type checking it.
20
+ "include": ["example.mjs"],
21
+ // "files": ["src/@types/globals.d.ts"],
22
+ "exclude": []
23
+ }
uv.lock ADDED
The diff for this file is too large to render. See raw diff