rabaevn commited on
Commit
9b076d1
·
verified ·
1 Parent(s): fe1afb0

Training in progress, step 800, checkpoint

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
last-checkpoint/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
last-checkpoint/2_Dense/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 768,
3
+ "out_features": 3072,
4
+ "bias": false,
5
+ "activation_function": "torch.nn.modules.linear.Identity"
6
+ }
last-checkpoint/2_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f119b436f400958939f9956238cb48249829f236ff2bb77c14813c4df81b2bfe
3
+ size 9437272
last-checkpoint/3_Dense/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 3072,
3
+ "out_features": 768,
4
+ "bias": false,
5
+ "activation_function": "torch.nn.modules.linear.Identity"
6
+ }
last-checkpoint/3_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021f678cea9ef228f88596107b33a391fedc6db720ff066a8708144b0fcc5a92
3
+ size 9437272
last-checkpoint/README.md ADDED
@@ -0,0 +1,764 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - sentence-transformers
7
+ - sentence-similarity
8
+ - feature-extraction
9
+ - dense
10
+ - generated_from_trainer
11
+ - dataset_size:441985
12
+ - loss:CachedMultipleNegativesRankingLoss
13
+ base_model: google/embeddinggemma-300m
14
+ widget:
15
+ - source_sentence: "title: \nSwords of Revealing Light SDPLEN026 Common 1st Edition"
16
+ sentences:
17
+ - 'description
18
+
19
+ Structure Deck Powercode Link is a Structure Deck in the YuGiOh Official Card
20
+ Game and YuGiOh Trading Card Game It is the 35th Deck in the OCGs Structure Deck
21
+ series following Structure Deck Cyberse Link It is the 42nd Deck in the TCGs Structure
22
+ Deck series following Lair of Darkness Structure Deck'
23
+ - 'description
24
+
25
+ Features Glowing in the dark effect These handmade luminous marbles are eyecathing
26
+ and in line with the most party themes such as black light party neon theme party
27
+ growth party Halloween party and so on so you can use them to decorate your party
28
+ to create the party vibe you want Play together These doted style glass marbles
29
+ are funny toys for boys and girls They can play the related game on the garden
30
+ yard lawn playground park and more It is a nice opportunity to develop their social
31
+ skill and get away from electronic products Specifications Material glass Diameter
32
+ approx 063 inch Color as pictures shown Package included 35 x Luminous marbles Warm
33
+ notices Manual measurement please allow slight errors on size The color might
34
+ exist slightly difference due to different displayswhat you will get package comes
35
+ with 35 pieces handmade glass marbles in 7 colors 5 pieces of each color with
36
+ the same size enough quantity to meet your need You can also share them with your
37
+ friends
38
+
39
+ Size and muticolor doted style glass marbles are 063 inch in diameter featuring
40
+ 7 colors such as transparent pinkishorange yellow purple green sea blue and dark
41
+ blue They are glowing at night and look very interesting then you can imagine
42
+ yourself in the universe
43
+
44
+ Quality material these muticolors luminous marbles are made from quality glass
45
+ which are firm and sturdy smooth with a nice finish durable and highstrength designed
46
+ for longtime use giving you nice play feeling
47
+
48
+ How to make marbles glow brighter you can make the glow marbles sunbathe or illuminate
49
+ them with ultraviolet light to make the marbles shine brighter and longer bringing
50
+ you delightful mood Recommended for ages 5 years and up
51
+
52
+ Multiple uses the luminous glass marbles can effectively improve boys and girls
53
+ eye to hand coordination and enhance their fine motor skills They are also nice
54
+ stocking stuffers Easter baskets birthday presents and home decoration For example
55
+ you can use them to add a splash of color in your fish tanks and vases'
56
+ - 'description
57
+
58
+ Legendary Encounters A Predator Deck Building Game is really two games in one
59
+ You can play as humans working together to escape and fight off the Predator Or
60
+ your group can play as Predators competing to hunt humans and earn the most Honor
61
+ You can mix both Legendary Encounters games in order to play as Predators hunting
62
+ the Aliens You would play on the Aliens board pick a location from the Aliens
63
+ game choose Aliens Objectives and more See Rulebook for details If youre feeling
64
+ extra adventurous there are even more ways to mix the games such as playing as
65
+ Predators and hunting a mix of Human Prey and Aliens Or you could mix in Marvel
66
+ Legendary cards for even more craziness700 playable cards all with original art
67
+ Game mat and rulebook included
68
+
69
+ 15 players
70
+
71
+ 3060 minute play time
72
+
73
+ Cards included 35 Experience 25 Brute Strength 15 Role Avatars 10 Human 5 Predator
74
+ 15 Role Character Cards 10 Human 5 Predator 10 Commanders 8 Killer Instincts 224
75
+ Character cards 16 characters with 14 cards each
76
+
77
+ Cards Included 2 Locations 6 Objectives 66 Enemy cards 6 different minidecks 66
78
+ Prey cards 6 different minidecks 24 Young Blood cards 24 Mercenary cards 40 Enemy
79
+ Strikes 60 Prey Strikes 20 Traps 20 Gear 20 Tests 20 Challenges'
80
+ - source_sentence: "title: \nFunko POP Muppets VINYL Snowth"
81
+ sentences:
82
+ - 'description
83
+
84
+ From the Manufacturer
85
+
86
+ This Muppets Snowth Pop Vinyl figure will have you humming Do Dooo Do Doo doo
87
+ too From Jim Hensons Muppets comes Snowth the furry pink creature with horns
88
+ and round yellow lips Snowths were performed simultaneously by Frank Oz with each
89
+ puppet on each arm Snowth is adorable as ever as it stands 4 tall and comes in
90
+ a stylized artful display boxCollect them all
91
+
92
+ Displayable window box
93
+
94
+ Muppets Classics'
95
+ - 'description
96
+
97
+ On September 20 1983 in Burbank CA the USPS released this setenant in an effort
98
+ to break down he barriers and stereotypes which have been created One stamp simply
99
+ shows the American Sign Language sign for I Love You while the other pictures
100
+ a mother signing I Love You to her childADA
101
+
102
+ Deaf
103
+
104
+ Hearing Impaired'
105
+ - 'description
106
+
107
+ From Avatar Ty Lee as a stylized Pop vinyl from Funko Figure stands 3 34 inches
108
+ and comes in a window display box Check out the other Avatar figures from Funko
109
+ Collect them allImported
110
+
111
+ Product TypeToys And Games
112
+
113
+ Item Package Dimension35 L X45 W X625 H
114
+
115
+ Item Package Weight025 Lbs
116
+
117
+ Country Of Origin Viet Nam'
118
+ - source_sentence: "title: \nFuryu Accel World Kuroyukihime Figure"
119
+ sentences:
120
+ - 'description
121
+
122
+ From the Manufacturer
123
+
124
+ A rescue themed vehicle with a Dalmation RollaRound that both magically come to
125
+ life when baby plays Push along the Firetruck to see the siren lights move up
126
+ and down Put the Round into the the truck to see the Dogs head turn back and forth
127
+ as he rollA rescue themed vehicle with a Dalmation RollaRound that both magically
128
+ come to life when baby plays
129
+
130
+ Push along the Firetruck to see the siren lights move up and down
131
+
132
+ Put the Round into the the truck to see the Dogs head turn back and forth as he
133
+ rolls along
134
+
135
+ Part of RollARounds Collection
136
+
137
+ Age Range 6 to 36 Months'
138
+ - 'description
139
+
140
+ Disney Toy Story Buzz Lightyear Little Lights with Hook for Samsung Galaxy S3
141
+ Mini i8190'
142
+ - 'description
143
+
144
+ Accel World FuRyu 5 Kuroyukihime Sitting PVC Statue KuroyukihimeBrand New Official
145
+ Item
146
+
147
+ Great for Collectors'
148
+ - source_sentence: "title: \nJellycat Pretty Patisserie Tarte Au Citron Food Plush"
149
+ sentences:
150
+ - 'description
151
+
152
+ Features Benefits 60pcs New Year latex balloons and foil balloons colored in
153
+ gold and black printed with festive words are great atmosphere creator for your
154
+ home and party Made of durable latex or foil material wonderful for hanging on
155
+ the wall ceiling window and other places you want Specifications Size 12 inches
156
+ 18 inch Color black gold Material latex foil material Package Includes 60 x New
157
+ Year Balloon 4 x Foil BalloonLarge Quantity Are you looking for New Year decorations
158
+ Our cute balloons will be a great choice for you 60pcs latex balloons and 4pcs
159
+ large foil balloons great combination for your home and party decoration
160
+
161
+ Delicate Design Taking black and gold as theme colors printed with festive words
162
+ or designed with confetti the foil balloons are shaped in star or round classic
163
+ and exquisite
164
+
165
+ Good Quality Made of good quality latex or foil material safe and durable the
166
+ foil one is reusable you can fill them with air or helium
167
+
168
+ Size Each latex balloon measures 12 inch and the foil balloons measure 18 inch
169
+ appropriate size for hanging on the wall ceiling window and other places you want
170
+
171
+ Wide Application Perfect to decorate your New Year party decoration Christmas
172
+ school activities and other occasions suitable for both indoor and outdoor bar
173
+ stage props etc'
174
+ - 'description
175
+
176
+ Finally A pet bear that wont gnaw on your head or bite your arms off This adorable
177
+ bouncing head animal doll is ready for you to give him a great big bear hug Soft
178
+ white fur makes this shaking head figure look great on any dashboard bookshelf
179
+ desktop or countertop He loves the dashboard adhesive that travels with him because
180
+ the sticky pad reminds him of honey Youll love it because it will securely attach
181
+ him to your car dashboard but it wont leave any permanent residue With adorable
182
+ beady eyes this bobblehead will make you the envy of all your friends Perfect
183
+ gift for any bobble head collector a trucker with an empty dashboard or children
184
+ Kids love these cute little bobbing head toys One of many ridiculously awesome
185
+ nodding head dash board therapists brought to you as part of Batty Bargains Legendary
186
+ Bobbleheads collection Order yours todayNo live bears were harmed in the making
187
+ of this bobblehead bear Fuzzy black fur made of velvet textured flocking
188
+
189
+ Big strong paws and great listening skills make for a phenomenal dash board therapist
190
+ When your not strong lean on this bobbing head bear Its bouncing head will help
191
+ you though the tough times
192
+
193
+ A beautiful toy dash board doll great for any road trip One of many dashboard
194
+ figures available as part of Batty Bargains Legendary Bobbleheads Collection
195
+
196
+ Adjustable bobble head moves easily and smoothly making it look like it is nodding
197
+ or shaking its head Guaranteed to bob at a wide range of angles Just loosen or
198
+ tighten the weighted bolt on the back of its bobbing head
199
+
200
+ Easily mounts on almost any car or vehicle dashboard in seconds with included
201
+ adhesive pad Easily removed without leaving any permanent residue'
202
+ - 'description
203
+
204
+ The Jellycat brand was established in London in 1999 to create quirky original
205
+ and innovative soft toys for all ages Jellycat offers the best selection of soft
206
+ plush stuffed animals and toys in the cutest and most luxurious of fabrics and
207
+ textures With unmatched quality find the perfect stuffed animal with the coolest
208
+ designs for babies kids and adults alike Plush toys from Jellycat come in a variety
209
+ of sizes to cuddle including mini small medium large huge and really big They
210
+ also feature a variety of collections and themes to adorn any nursery or childrens
211
+ room for both boys and girls making a Jellycat the perfect gift Best of all your
212
+ Jellycat stuffed animal will provide countless hours of soft hugs and memories
213
+ for years to comePlush measures 4 x 4 x 2 inches
214
+
215
+ Suitable for all ages
216
+
217
+ Made of 95 Polyester 5 Spandex
218
+
219
+ Spot clean only
220
+
221
+ Designed by Jellycat in London UK'
222
+ - source_sentence: "title: \nFairytale and Historic Minifigure Set"
223
+ sentences:
224
+ - 'description
225
+
226
+ Features 1 Cartoon Dinosaur Plush Doll is suitable for being Ornament Gift Pillow
227
+ and Rag Toy 2 Made of quality soft plush and being fully filled with pp cotton
228
+ Not afraid of squeezing 3 Not easy to deform with good resilience It has tightly
229
+ threaded side which features not easy to produce cotton 4 Perfect to cuddle with
230
+ in bed or to keep you company when you nap on the couch Great gift for kids girls
231
+ and anyone you love 5 For the cleaning of such short plush doll clean the surface
232
+ with cold water and place it in the sun after washing to ensure that the short
233
+ plush is fluffy Notice Actual color may be slightly different from the image due
234
+ to different display and light effect Please allow 13cm deviation due to manual
235
+ measurement Description Material plush PP cotton Size 3560cm Decoration form hanging
236
+ placing ColorStyle As the picture shown'
237
+ - 'description
238
+
239
+ Toysmiths mission is to supply quality toys and gifts while delivering superior
240
+ customer service to retailers We offer products in many key categories including
241
+ active play science discovery arts crafts impulse novelty toys and nostalgic
242
+ retro classics Since our inception in 1981 our owners Bill and Nancy Smith have
243
+ worked very diligently to grow from a small family business shipping from their
244
+ garage to a large office complex and distribution center that stocks and ships
245
+ over 1600 products to more than 6000 accounts However our philosophy remains the
246
+ same maintaining a strong family work environment while offering the best selection
247
+ and quality of products together with the highest standards of service to all
248
+ our customersEducational toys that help children learn
249
+
250
+ Made using safe and high quality materials
251
+
252
+ Toys for all age groups'
253
+ - 'description
254
+
255
+ The LEGO Education 779349 or 9349 227piece fairytale and historic minifigure set
256
+ includes elements to build 22 multicultural male and female minifigures representing
257
+ characters from fairytales and history Children four years and older can explore
258
+ roleplaying while creating and reenacting fairytales and stories developing imagination
259
+ and imitative skills Themes include pirates a witch and wizard a king and queen
260
+ knights mine workers a mermaid and merman and more and accessories such as wands
261
+ hats flowers spiders and snakes The set is compatible with any LEGO Education
262
+ system supports a group of five students and comes with a decorated box for storage
263
+
264
+ Since 1980 LEGO Education has delivered handson curriculumbased resources for
265
+ teachers and students worldwide LEGO Education believes a handson mindson approach
266
+ helps students actively take ownership of the learning process and develop 21stcentury
267
+ skills such as creative thinking and problem solving through reallife engaging
268
+ experiencesExploring differences to real life makebelieve and historic characters
269
+
270
+ Storytelling through characters and things they do
271
+
272
+ Developing fantasy imagination and imitative skills
273
+
274
+ Featuring a variety of minifigures that enable children to create and act out
275
+ their favourite fairytales and stories
276
+
277
+ Ideal for 4 years old children or above'
278
+ datasets:
279
+ - guyhadad01/Amazon_2023_items_processed_filtered
280
+ pipeline_tag: sentence-similarity
281
+ library_name: sentence-transformers
282
+ metrics:
283
+ - cosine_accuracy@1
284
+ - cosine_accuracy@3
285
+ - cosine_accuracy@5
286
+ - cosine_accuracy@10
287
+ - cosine_precision@1
288
+ - cosine_precision@3
289
+ - cosine_precision@5
290
+ - cosine_precision@10
291
+ - cosine_recall@1
292
+ - cosine_recall@3
293
+ - cosine_recall@5
294
+ - cosine_recall@10
295
+ - cosine_ndcg@10
296
+ - cosine_mrr@10
297
+ - cosine_map@100
298
+ model-index:
299
+ - name: EmbeddingGemma-300m fine-tuned on Amazon Toys & Games
300
+ results:
301
+ - task:
302
+ type: information-retrieval
303
+ name: Information Retrieval
304
+ dataset:
305
+ name: dev eval
306
+ type: dev-eval
307
+ metrics:
308
+ - type: cosine_accuracy@1
309
+ value: 0.308
310
+ name: Cosine Accuracy@1
311
+ - type: cosine_accuracy@3
312
+ value: 0.437
313
+ name: Cosine Accuracy@3
314
+ - type: cosine_accuracy@5
315
+ value: 0.507
316
+ name: Cosine Accuracy@5
317
+ - type: cosine_accuracy@10
318
+ value: 0.585
319
+ name: Cosine Accuracy@10
320
+ - type: cosine_precision@1
321
+ value: 0.308
322
+ name: Cosine Precision@1
323
+ - type: cosine_precision@3
324
+ value: 0.1456666666666667
325
+ name: Cosine Precision@3
326
+ - type: cosine_precision@5
327
+ value: 0.10139999999999998
328
+ name: Cosine Precision@5
329
+ - type: cosine_precision@10
330
+ value: 0.0585
331
+ name: Cosine Precision@10
332
+ - type: cosine_recall@1
333
+ value: 0.308
334
+ name: Cosine Recall@1
335
+ - type: cosine_recall@3
336
+ value: 0.437
337
+ name: Cosine Recall@3
338
+ - type: cosine_recall@5
339
+ value: 0.507
340
+ name: Cosine Recall@5
341
+ - type: cosine_recall@10
342
+ value: 0.585
343
+ name: Cosine Recall@10
344
+ - type: cosine_ndcg@10
345
+ value: 0.43779247661127096
346
+ name: Cosine Ndcg@10
347
+ - type: cosine_mrr@10
348
+ value: 0.39167420634920597
349
+ name: Cosine Mrr@10
350
+ - type: cosine_map@100
351
+ value: 0.3994706245247452
352
+ name: Cosine Map@100
353
+ ---
354
+
355
+ # EmbeddingGemma-300m fine-tuned on Amazon Toys & Games
356
+
357
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m) on the [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
358
+
359
+ ## Model Details
360
+
361
+ ### Model Description
362
+ - **Model Type:** Sentence Transformer
363
+ - **Base model:** [google/embeddinggemma-300m](https://huggingface.co/google/embeddinggemma-300m) <!-- at revision 57c266a740f537b4dc058e1b0cda161fd15afa75 -->
364
+ - **Maximum Sequence Length:** 2048 tokens
365
+ - **Output Dimensionality:** 768 dimensions
366
+ - **Similarity Function:** Cosine Similarity
367
+ - **Training Dataset:**
368
+ - [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered)
369
+ - **Language:** en
370
+ - **License:** apache-2.0
371
+
372
+ ### Model Sources
373
+
374
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
375
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
376
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
377
+
378
+ ### Full Model Architecture
379
+
380
+ ```
381
+ SentenceTransformer(
382
+ (0): Transformer({'max_seq_length': 2048, 'do_lower_case': False, 'architecture': 'Gemma3TextModel'})
383
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
384
+ (2): Dense({'in_features': 768, 'out_features': 3072, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
385
+ (3): Dense({'in_features': 3072, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity'})
386
+ (4): Normalize()
387
+ )
388
+ ```
389
+
390
+ ## Usage
391
+
392
+ ### Direct Usage (Sentence Transformers)
393
+
394
+ First install the Sentence Transformers library:
395
+
396
+ ```bash
397
+ pip install -U sentence-transformers
398
+ ```
399
+
400
+ Then you can load this model and run inference.
401
+ ```python
402
+ from sentence_transformers import SentenceTransformer
403
+
404
+ # Download from the 🤗 Hub
405
+ model = SentenceTransformer("rabaevn/EncodeRec")
406
+ # Run inference
407
+ queries = [
408
+ "title: \nFairytale and Historic Minifigure Set",
409
+ ]
410
+ documents = [
411
+ 'description\nThe LEGO Education 779349 or 9349 227piece fairytale and historic minifigure set includes elements to build 22 multicultural male and female minifigures representing characters from fairytales and history Children four years and older can explore roleplaying while creating and reenacting fairytales and stories developing imagination and imitative skills Themes include pirates a witch and wizard a king and queen knights mine workers a mermaid and merman and more and accessories such as wands hats flowers spiders and snakes The set is compatible with any LEGO Education system supports a group of five students and comes with a decorated box for storage\nSince 1980 LEGO Education has delivered handson curriculumbased resources for teachers and students worldwide LEGO Education believes a handson mindson approach helps students actively take ownership of the learning process and develop 21stcentury skills such as creative thinking and problem solving through reallife engaging experiencesExploring differences to real life makebelieve and historic characters\nStorytelling through characters and things they do\nDeveloping fantasy imagination and imitative skills\nFeaturing a variety of minifigures that enable children to create and act out their favourite fairytales and stories\nIdeal for 4 years old children or above',
412
+ 'description\nToysmiths mission is to supply quality toys and gifts while delivering superior customer service to retailers We offer products in many key categories including active play science discovery arts crafts impulse novelty toys and nostalgic retro classics Since our inception in 1981 our owners Bill and Nancy Smith have worked very diligently to grow from a small family business shipping from their garage to a large office complex and distribution center that stocks and ships over 1600 products to more than 6000 accounts However our philosophy remains the same maintaining a strong family work environment while offering the best selection and quality of products together with the highest standards of service to all our customersEducational toys that help children learn\nMade using safe and high quality materials\nToys for all age groups',
413
+ 'description\nFeatures 1 Cartoon Dinosaur Plush Doll is suitable for being Ornament Gift Pillow and Rag Toy 2 Made of quality soft plush and being fully filled with pp cotton Not afraid of squeezing 3 Not easy to deform with good resilience It has tightly threaded side which features not easy to produce cotton 4 Perfect to cuddle with in bed or to keep you company when you nap on the couch Great gift for kids girls and anyone you love 5 For the cleaning of such short plush doll clean the surface with cold water and place it in the sun after washing to ensure that the short plush is fluffy Notice Actual color may be slightly different from the image due to different display and light effect Please allow 13cm deviation due to manual measurement Description Material plush PP cotton Size 3560cm Decoration form hanging placing ColorStyle As the picture shown',
414
+ ]
415
+ query_embeddings = model.encode_query(queries)
416
+ document_embeddings = model.encode_document(documents)
417
+ print(query_embeddings.shape, document_embeddings.shape)
418
+ # [1, 768] [3, 768]
419
+
420
+ # Get the similarity scores for the embeddings
421
+ similarities = model.similarity(query_embeddings, document_embeddings)
422
+ print(similarities)
423
+ # tensor([[0.4024, 0.3496, 0.2501]])
424
+ ```
425
+
426
+ <!--
427
+ ### Direct Usage (Transformers)
428
+
429
+ <details><summary>Click to see the direct usage in Transformers</summary>
430
+
431
+ </details>
432
+ -->
433
+
434
+ <!--
435
+ ### Downstream Usage (Sentence Transformers)
436
+
437
+ You can finetune this model on your own dataset.
438
+
439
+ <details><summary>Click to expand</summary>
440
+
441
+ </details>
442
+ -->
443
+
444
+ <!--
445
+ ### Out-of-Scope Use
446
+
447
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
448
+ -->
449
+
450
+ ## Evaluation
451
+
452
+ ### Metrics
453
+
454
+ #### Information Retrieval
455
+
456
+ * Dataset: `dev-eval`
457
+ * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
458
+
459
+ | Metric | Value |
460
+ |:--------------------|:-----------|
461
+ | cosine_accuracy@1 | 0.308 |
462
+ | cosine_accuracy@3 | 0.437 |
463
+ | cosine_accuracy@5 | 0.507 |
464
+ | cosine_accuracy@10 | 0.585 |
465
+ | cosine_precision@1 | 0.308 |
466
+ | cosine_precision@3 | 0.1457 |
467
+ | cosine_precision@5 | 0.1014 |
468
+ | cosine_precision@10 | 0.0585 |
469
+ | cosine_recall@1 | 0.308 |
470
+ | cosine_recall@3 | 0.437 |
471
+ | cosine_recall@5 | 0.507 |
472
+ | cosine_recall@10 | 0.585 |
473
+ | **cosine_ndcg@10** | **0.4378** |
474
+ | cosine_mrr@10 | 0.3917 |
475
+ | cosine_map@100 | 0.3995 |
476
+
477
+ <!--
478
+ ## Bias, Risks and Limitations
479
+
480
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
481
+ -->
482
+
483
+ <!--
484
+ ### Recommendations
485
+
486
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
487
+ -->
488
+
489
+ ## Training Details
490
+
491
+ ### Training Dataset
492
+
493
+ #### amazon_2023_items_processed_filtered
494
+
495
+ * Dataset: [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered) at [6b58dd1](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered/tree/6b58dd18854109aac31652e941c667725f6352f0)
496
+ * Size: 441,985 training samples
497
+ * Columns: <code>question</code> and <code>passage_text</code>
498
+ * Approximate statistics based on the first 1000 samples:
499
+ | | question | passage_text |
500
+ |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
501
+ | type | string | string |
502
+ | details | <ul><li>min: 9 tokens</li><li>mean: 25.35 tokens</li><li>max: 78 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 194.73 tokens</li><li>max: 892 tokens</li></ul> |
503
+ * Samples:
504
+ | question | passage_text |
505
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
506
+ | <code>title: <br>My Little Pony Twisty Twirly Hairstyles Applejack</code> | <code>description<br>Style Applejack ponys hair for the best night ever Inspired by the My Little Pony Friendship is Magic television series Applejack is going to the Grand Galloping Gala at Canterlot Castle This Applejack figure is poseable with waxinfused yarn hair that can be styled up in barrettes or fancy braids Theres many style possibilities This pony figure comes with 10 accessories for making Applejack ponys beautiful yellow hair look gorgeous for the gala Also look for Twisty Twirly Hairstyles Rarity and Pinkie Pie figures and mix and match accessories for more hairstyling fun Each sold separately Subject to availability My Little Pony and all related characters are trademarks of HasbroInspired by the My Little Pony Friendship is Magic television series<br>Pretend to do Applejack ponys hair with clips and barrettes<br>Poseable waxinfused yarn hair can be styled up or down<br>Figure scale 3 inches<br>Includes pony figure and 10 accessories</code> |
507
+ | <code>title: <br>TUANTUAN 4 Pcs 16 Scale Dollhouse Miniature Furniture Folding Chair Foldable Chair Model Folding Doll Chairs Decor Black Foldable Chair for Figure Accessories</code> | <code>description<br>MATERIALPlasticAlloyColour BlackSize Folding Size 173 85CmExpanding Size 8895153Cm GOOD QUALITYThese Mini Folding Chairs Are Made Of Plastic And Alloy Materials Which Are Environmentally Friendly And NonToxic Strong And Durable Stand Firm UNIQUE DESIGNMini Folding Chair Design Looks Like A Real Chair Small And Exquisite Realistic Appearance More Interesting To Match With Dolls WIDELY USEDThese Mini Folding Chairs Are 16 Scene Accessories Not For Real People Use And Are Exclusively For 12Inch Dolls Suitable For Doll House Accessories Miniature Furniture House Model Decoration It Can Also Be Used As A Mobile Phone Holder PACKAGE INCLUDE4 Pcs Dolls Folding ChairsMATERIALPlasticAlloyColour BlackSize Folding Size 173 85CmExpanding Size 8895153Cm<br>GOOD QUALITYThese Mini Folding Chairs Are Made Of Plastic And Alloy Materials Which Are Environmentally Friendly And NonToxic Strong And Durable Stand Firm<br>UNIQUE DESIGNMini Folding Chair Design Looks Like A Real Chair Small And Exquis...</code> |
508
+ | <code>title: <br>Transformers Movie RD27 NEST Sky stalker japan import</code> | <code>description<br>Manufactured by Takara Tomy Product name Transformers Revenge of the Fallen Scout class RD27 NEST Sky stalker TRANSFORMERS REVENGE OF THE FALLEN SCUOT CLASS NEST SKYSTALKERb safety standards b ST Mark<br>b target Gender b boy<br>b Age b from 5 years</code> |
509
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
510
+ ```json
511
+ {
512
+ "scale": 20.0,
513
+ "similarity_fct": "cos_sim",
514
+ "mini_batch_size": 32,
515
+ "gather_across_devices": false
516
+ }
517
+ ```
518
+
519
+ ### Evaluation Dataset
520
+
521
+ #### amazon_2023_items_processed_filtered
522
+
523
+ * Dataset: [amazon_2023_items_processed_filtered](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered) at [6b58dd1](https://huggingface.co/datasets/guyhadad01/Amazon_2023_items_processed_filtered/tree/6b58dd18854109aac31652e941c667725f6352f0)
524
+ * Size: 110,497 evaluation samples
525
+ * Columns: <code>question</code> and <code>passage_text</code>
526
+ * Approximate statistics based on the first 1000 samples:
527
+ | | question | passage_text |
528
+ |:--------|:---------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
529
+ | type | string | string |
530
+ | details | <ul><li>min: 7 tokens</li><li>mean: 25.1 tokens</li><li>max: 93 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 189.31 tokens</li><li>max: 948 tokens</li></ul> |
531
+ * Samples:
532
+ | question | passage_text |
533
+ |:----------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
534
+ | <code>title: <br>Reaper Miniatures Tengu Warrior 03774 Dark Heaven Legends RPG DD Mini Figure</code> | <code>description<br>Tengu WarriorBy Artist Derek SchubertDark Heaven Legends25mm Heroic Scale Fantasy MiniaturesIntegral builtin basesUnpainted metal models that may require assembly If assembly is needed glue or putty is required not includedHuge selection of characters and monsters for fantasy roleplayers miniatures painters and wargamersDark Heaven Legends is the premier 25mm Heroic Scale fantasy line for miniature painters roleplayers and wargamers Over the last thirteen years the Dark Heaven line has produced over 1300 fantasy miniatures designed and crafted by the top miniatures sculptors in the worldFound in Reaper Miniatures Category Dark Heaven LegendsFound in Reaper Miniatures Category Dark Heaven Legends<br>Unpainted Metal Miniature<br>Figure designed by artist Derek Schubert</code> |
535
+ | <code>title: <br>Marvel Legends Black Widow Action Figure</code> | <code>description<br>Scale 6 Inch Format Action Figures Packaging Clamshell Manufacturer Toy Biz Natalia Romanova aka Natasha Romanoff is the Black Widow as beautiful as Russian spy as their has ever been and as deadly as her namesake Working with everyone from SHIELD to the Avengers to the Marvel Knights she is widely known as a valuable asset to any crimefighting team In this 6inch Marvel Legends her beauty and power are perfectly captured in the sculpt of her skintight black outfit We receive both US and Canadian Cases You will get either English bilingual or trilingual carded figures based on availability Please understand this before ordering Package condition may vary due to size and weight We do not guarantee mint on cardMarvel Legends 6 Inch Action Figure Man Thing Series Black Widow<br>Marvel<br>Toybiz<br>6 Inch</code> |
536
+ | <code>title: <br>Cabbage Patch Kids Cutie Dash The Deer 9 Collectible Adoptable Baby Doll Toy Officially Licensed Gift for Girls and Boys</code> | <code>description<br>This Holiday complete your collection of Cabbage Patch Cuties by adopting the Dash Reindeer doll Each Cabbage Patch Cutie features a snuggly onesie with adjustable hood and can really suck its thumb Cutie dolls come with the traditional signature baby powder scent that Cabbage Patch Kid fans know and love Take the Oath of Adoption Cabbage Patch Kid Cuties are numbered for collectibility and make a great toy gift for boys and girls who love Cabbage Patch Kids Officially licensed Cabbage Patch Kids merchandise 3 Pack Each Measures approximately 9 tall Comes in sealed polybag packaging with official Cabbage Patch tagThis Holiday complete your collection of Cabbage Patch Cuties by adopting Dash the Reindeer baby doll<br>Its a Cabbage Christmas Each Cabbage Patch Cutie features a snuggly onesie with adjustable hood and can really suck its thumb<br>Cutie baby dolls come with the traditional signature baby powder scent that Cabbage Patch Kid fans know and love<br>Take the Oath of Adoption ...</code> |
537
+ * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
538
+ ```json
539
+ {
540
+ "scale": 20.0,
541
+ "similarity_fct": "cos_sim",
542
+ "mini_batch_size": 32,
543
+ "gather_across_devices": false
544
+ }
545
+ ```
546
+
547
+ ### Training Hyperparameters
548
+ #### Non-Default Hyperparameters
549
+
550
+ - `eval_strategy`: steps
551
+ - `per_device_train_batch_size`: 2
552
+ - `per_device_eval_batch_size`: 1
553
+ - `gradient_accumulation_steps`: 32
554
+ - `torch_empty_cache_steps`: 50
555
+ - `learning_rate`: 2e-05
556
+ - `num_train_epochs`: 1
557
+ - `warmup_ratio`: 0.1
558
+ - `fp16`: True
559
+ - `dataloader_num_workers`: 4
560
+ - `push_to_hub`: True
561
+ - `hub_model_id`: rabaevn/EncodeRec
562
+ - `hub_strategy`: checkpoint
563
+ - `gradient_checkpointing`: True
564
+ - `batch_sampler`: no_duplicates
565
+
566
+ #### All Hyperparameters
567
+ <details><summary>Click to expand</summary>
568
+
569
+ - `overwrite_output_dir`: False
570
+ - `do_predict`: False
571
+ - `eval_strategy`: steps
572
+ - `prediction_loss_only`: True
573
+ - `per_device_train_batch_size`: 2
574
+ - `per_device_eval_batch_size`: 1
575
+ - `per_gpu_train_batch_size`: None
576
+ - `per_gpu_eval_batch_size`: None
577
+ - `gradient_accumulation_steps`: 32
578
+ - `eval_accumulation_steps`: None
579
+ - `torch_empty_cache_steps`: 50
580
+ - `learning_rate`: 2e-05
581
+ - `weight_decay`: 0.0
582
+ - `adam_beta1`: 0.9
583
+ - `adam_beta2`: 0.999
584
+ - `adam_epsilon`: 1e-08
585
+ - `max_grad_norm`: 1.0
586
+ - `num_train_epochs`: 1
587
+ - `max_steps`: -1
588
+ - `lr_scheduler_type`: linear
589
+ - `lr_scheduler_kwargs`: {}
590
+ - `warmup_ratio`: 0.1
591
+ - `warmup_steps`: 0
592
+ - `log_level`: passive
593
+ - `log_level_replica`: warning
594
+ - `log_on_each_node`: True
595
+ - `logging_nan_inf_filter`: True
596
+ - `save_safetensors`: True
597
+ - `save_on_each_node`: False
598
+ - `save_only_model`: False
599
+ - `restore_callback_states_from_checkpoint`: False
600
+ - `no_cuda`: False
601
+ - `use_cpu`: False
602
+ - `use_mps_device`: False
603
+ - `seed`: 42
604
+ - `data_seed`: None
605
+ - `jit_mode_eval`: False
606
+ - `use_ipex`: False
607
+ - `bf16`: False
608
+ - `fp16`: True
609
+ - `fp16_opt_level`: O1
610
+ - `half_precision_backend`: auto
611
+ - `bf16_full_eval`: False
612
+ - `fp16_full_eval`: False
613
+ - `tf32`: None
614
+ - `local_rank`: 0
615
+ - `ddp_backend`: None
616
+ - `tpu_num_cores`: None
617
+ - `tpu_metrics_debug`: False
618
+ - `debug`: []
619
+ - `dataloader_drop_last`: False
620
+ - `dataloader_num_workers`: 4
621
+ - `dataloader_prefetch_factor`: None
622
+ - `past_index`: -1
623
+ - `disable_tqdm`: False
624
+ - `remove_unused_columns`: True
625
+ - `label_names`: None
626
+ - `load_best_model_at_end`: False
627
+ - `ignore_data_skip`: False
628
+ - `fsdp`: []
629
+ - `fsdp_min_num_params`: 0
630
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
631
+ - `fsdp_transformer_layer_cls_to_wrap`: None
632
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
633
+ - `deepspeed`: None
634
+ - `label_smoothing_factor`: 0.0
635
+ - `optim`: adamw_torch_fused
636
+ - `optim_args`: None
637
+ - `adafactor`: False
638
+ - `group_by_length`: False
639
+ - `length_column_name`: length
640
+ - `ddp_find_unused_parameters`: None
641
+ - `ddp_bucket_cap_mb`: None
642
+ - `ddp_broadcast_buffers`: False
643
+ - `dataloader_pin_memory`: True
644
+ - `dataloader_persistent_workers`: False
645
+ - `skip_memory_metrics`: True
646
+ - `use_legacy_prediction_loop`: False
647
+ - `push_to_hub`: True
648
+ - `resume_from_checkpoint`: None
649
+ - `hub_model_id`: rabaevn/EncodeRec
650
+ - `hub_strategy`: checkpoint
651
+ - `hub_private_repo`: None
652
+ - `hub_always_push`: False
653
+ - `hub_revision`: None
654
+ - `gradient_checkpointing`: True
655
+ - `gradient_checkpointing_kwargs`: None
656
+ - `include_inputs_for_metrics`: False
657
+ - `include_for_metrics`: []
658
+ - `eval_do_concat_batches`: True
659
+ - `fp16_backend`: auto
660
+ - `push_to_hub_model_id`: None
661
+ - `push_to_hub_organization`: None
662
+ - `mp_parameters`:
663
+ - `auto_find_batch_size`: False
664
+ - `full_determinism`: False
665
+ - `torchdynamo`: None
666
+ - `ray_scope`: last
667
+ - `ddp_timeout`: 1800
668
+ - `torch_compile`: False
669
+ - `torch_compile_backend`: None
670
+ - `torch_compile_mode`: None
671
+ - `include_tokens_per_second`: False
672
+ - `include_num_input_tokens_seen`: False
673
+ - `neftune_noise_alpha`: None
674
+ - `optim_target_modules`: None
675
+ - `batch_eval_metrics`: False
676
+ - `eval_on_start`: False
677
+ - `use_liger_kernel`: False
678
+ - `liger_kernel_config`: None
679
+ - `eval_use_gather_object`: False
680
+ - `average_tokens_across_devices`: False
681
+ - `prompts`: None
682
+ - `batch_sampler`: no_duplicates
683
+ - `multi_dataset_batch_sampler`: proportional
684
+ - `router_mapping`: {}
685
+ - `learning_rate_mapping`: {}
686
+
687
+ </details>
688
+
689
+ ### Training Logs
690
+ | Epoch | Step | Training Loss | Validation Loss | dev-eval_cosine_ndcg@10 |
691
+ |:------:|:----:|:-------------:|:---------------:|:-----------------------:|
692
+ | 0.0072 | 50 | 0.0428 | - | - |
693
+ | 0.0145 | 100 | 0.0118 | - | - |
694
+ | 0.0217 | 150 | 0.0087 | - | - |
695
+ | 0.0290 | 200 | 0.0064 | - | - |
696
+ | 0.0362 | 250 | 0.0069 | - | - |
697
+ | 0.0434 | 300 | 0.0088 | - | - |
698
+ | 0.0507 | 350 | 0.0055 | - | - |
699
+ | 0.0579 | 400 | 0.0067 | - | - |
700
+ | 0.0652 | 450 | 0.0098 | - | - |
701
+ | 0.0724 | 500 | 0.0096 | - | - |
702
+ | 0.0796 | 550 | 0.0104 | - | - |
703
+ | 0.0869 | 600 | 0.0155 | - | - |
704
+ | 0.0941 | 650 | 0.0109 | - | - |
705
+ | 0.1014 | 700 | 0.0144 | - | - |
706
+ | 0.1086 | 750 | 0.0109 | - | - |
707
+ | 0.1158 | 800 | 0.0107 | 0.0 | 0.4378 |
708
+
709
+
710
+ ### Framework Versions
711
+ - Python: 3.10.18
712
+ - Sentence Transformers: 5.1.0
713
+ - Transformers: 4.55.2
714
+ - PyTorch: 2.8.0+cu126
715
+ - Accelerate: 1.10.0
716
+ - Datasets: 4.1.1
717
+ - Tokenizers: 0.21.4
718
+
719
+ ## Citation
720
+
721
+ ### BibTeX
722
+
723
+ #### Sentence Transformers
724
+ ```bibtex
725
+ @inproceedings{reimers-2019-sentence-bert,
726
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
727
+ author = "Reimers, Nils and Gurevych, Iryna",
728
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
729
+ month = "11",
730
+ year = "2019",
731
+ publisher = "Association for Computational Linguistics",
732
+ url = "https://arxiv.org/abs/1908.10084",
733
+ }
734
+ ```
735
+
736
+ #### CachedMultipleNegativesRankingLoss
737
+ ```bibtex
738
+ @misc{gao2021scaling,
739
+ title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
740
+ author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
741
+ year={2021},
742
+ eprint={2101.06983},
743
+ archivePrefix={arXiv},
744
+ primaryClass={cs.LG}
745
+ }
746
+ ```
747
+
748
+ <!--
749
+ ## Glossary
750
+
751
+ *Clearly define terms in order to be accessible across audiences.*
752
+ -->
753
+
754
+ <!--
755
+ ## Model Card Authors
756
+
757
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
758
+ -->
759
+
760
+ <!--
761
+ ## Model Card Contact
762
+
763
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
764
+ -->
last-checkpoint/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "Gemma3TextModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "dtype": "float32",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 768,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 1152,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention",
37
+ "sliding_attention",
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 2048,
45
+ "model_type": "gemma3_text",
46
+ "num_attention_heads": 3,
47
+ "num_hidden_layers": 24,
48
+ "num_key_value_heads": 1,
49
+ "pad_token_id": 0,
50
+ "query_pre_attn_scalar": 256,
51
+ "rms_norm_eps": 1e-06,
52
+ "rope_local_base_freq": 10000.0,
53
+ "rope_scaling": null,
54
+ "rope_theta": 1000000.0,
55
+ "sliding_window": 512,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.55.2",
58
+ "use_bidirectional_attention": true,
59
+ "use_cache": true,
60
+ "vocab_size": 262144
61
+ }
last-checkpoint/config_sentence_transformers.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.55.2",
6
+ "pytorch": "2.8.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "task: search result | query: ",
10
+ "document": "title: none | text: ",
11
+ "BitextMining": "task: search result | query: ",
12
+ "Clustering": "task: clustering | query: ",
13
+ "Classification": "task: classification | query: ",
14
+ "InstructionRetrieval": "task: code retrieval | query: ",
15
+ "MultilabelClassification": "task: classification | query: ",
16
+ "PairClassification": "task: sentence similarity | query: ",
17
+ "Reranking": "task: search result | query: ",
18
+ "Retrieval": "task: search result | query: ",
19
+ "Retrieval-query": "task: search result | query: ",
20
+ "Retrieval-document": "title: none | text: ",
21
+ "STS": "task: sentence similarity | query: ",
22
+ "Summarization": "task: summarization | query: "
23
+ },
24
+ "default_prompt_name": null,
25
+ "similarity_fn_name": "cosine"
26
+ }
last-checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d0a8d7503d1af435df29849c4817ef18fb42a003525d9423e93223b5249494c
3
+ size 1211486072
last-checkpoint/modules.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ },
20
+ {
21
+ "idx": 3,
22
+ "name": "3",
23
+ "path": "3_Dense",
24
+ "type": "sentence_transformers.models.Dense"
25
+ },
26
+ {
27
+ "idx": 4,
28
+ "name": "4",
29
+ "path": "4_Normalize",
30
+ "type": "sentence_transformers.models.Normalize"
31
+ }
32
+ ]
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05af532ff1f0f7d8764ee43ca0b08010a02c69b43e885b25420ac54739ee75cb
3
+ size 2460923467
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f19a64f9dc63044de5725a3b3c87cc7f257545ada76d14adf27dea1b135b987
3
+ size 14645
last-checkpoint/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210dfdf1da0d53fc65a8c2f0637cf11667fbe7cbcb55638a3de7071dfc9d0989
3
+ size 1383
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed5eff91f51023e0d95c626bbf8be8d78905592f0d49dc7de10a1636eeea602
3
+ size 1465
last-checkpoint/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 2048,
3
+ "do_lower_case": false
4
+ }
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
last-checkpoint/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216e2a79606fe879c9f17c529c71cd241338407fd5646b595ffd3c4b9ea1d503
3
+ size 33385262
last-checkpoint/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.11584077323716135,
6
+ "eval_steps": 800,
7
+ "global_step": 800,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.007240048327322585,
14
+ "grad_norm": 2.589949131011963,
15
+ "learning_rate": 1.4182344428364688e-06,
16
+ "loss": 0.0428,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.01448009665464517,
21
+ "grad_norm": 10.996792793273926,
22
+ "learning_rate": 2.8654124457308254e-06,
23
+ "loss": 0.0118,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.021720144981967754,
28
+ "grad_norm": 1.4595743417739868,
29
+ "learning_rate": 4.3125904486251816e-06,
30
+ "loss": 0.0087,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.02896019330929034,
35
+ "grad_norm": 1.1282789707183838,
36
+ "learning_rate": 5.759768451519537e-06,
37
+ "loss": 0.0064,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.03620024163661292,
42
+ "grad_norm": 6.456222057342529,
43
+ "learning_rate": 7.206946454413893e-06,
44
+ "loss": 0.0069,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.04344028996393551,
49
+ "grad_norm": 0.03427216783165932,
50
+ "learning_rate": 8.65412445730825e-06,
51
+ "loss": 0.0088,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.05068033829125809,
56
+ "grad_norm": 1.5523200035095215,
57
+ "learning_rate": 1.0101302460202607e-05,
58
+ "loss": 0.0055,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.05792038661858068,
63
+ "grad_norm": 0.37769994139671326,
64
+ "learning_rate": 1.1548480463096963e-05,
65
+ "loss": 0.0067,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.06516043494590326,
70
+ "grad_norm": 0.15266789495944977,
71
+ "learning_rate": 1.2995658465991319e-05,
72
+ "loss": 0.0098,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.07240048327322585,
77
+ "grad_norm": 0.2500436305999756,
78
+ "learning_rate": 1.4442836468885674e-05,
79
+ "loss": 0.0096,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.07964053160054843,
84
+ "grad_norm": 0.6040284037590027,
85
+ "learning_rate": 1.5890014471780032e-05,
86
+ "loss": 0.0104,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.08688057992787102,
91
+ "grad_norm": 8.891999244689941,
92
+ "learning_rate": 1.7337192474674386e-05,
93
+ "loss": 0.0155,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.0941206282551936,
98
+ "grad_norm": 0.25315049290657043,
99
+ "learning_rate": 1.8784370477568743e-05,
100
+ "loss": 0.0109,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.10136067658251618,
105
+ "grad_norm": 8.926114082336426,
106
+ "learning_rate": 1.9974259974259977e-05,
107
+ "loss": 0.0144,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.10860072490983877,
112
+ "grad_norm": 4.814042091369629,
113
+ "learning_rate": 1.9813384813384816e-05,
114
+ "loss": 0.0109,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.11584077323716135,
119
+ "grad_norm": 1.084911823272705,
120
+ "learning_rate": 1.9652509652509656e-05,
121
+ "loss": 0.0107,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.11584077323716135,
126
+ "eval_dev-eval_cosine_accuracy@1": 0.308,
127
+ "eval_dev-eval_cosine_accuracy@10": 0.585,
128
+ "eval_dev-eval_cosine_accuracy@3": 0.437,
129
+ "eval_dev-eval_cosine_accuracy@5": 0.507,
130
+ "eval_dev-eval_cosine_map@100": 0.3994706245247452,
131
+ "eval_dev-eval_cosine_mrr@10": 0.39167420634920597,
132
+ "eval_dev-eval_cosine_ndcg@10": 0.43779247661127096,
133
+ "eval_dev-eval_cosine_precision@1": 0.308,
134
+ "eval_dev-eval_cosine_precision@10": 0.0585,
135
+ "eval_dev-eval_cosine_precision@3": 0.1456666666666667,
136
+ "eval_dev-eval_cosine_precision@5": 0.10139999999999998,
137
+ "eval_dev-eval_cosine_recall@1": 0.308,
138
+ "eval_dev-eval_cosine_recall@10": 0.585,
139
+ "eval_dev-eval_cosine_recall@3": 0.437,
140
+ "eval_dev-eval_cosine_recall@5": 0.507,
141
+ "eval_loss": 0.0,
142
+ "eval_runtime": 20003.9509,
143
+ "eval_samples_per_second": 5.524,
144
+ "eval_steps_per_second": 5.524,
145
+ "step": 800
146
+ }
147
+ ],
148
+ "logging_steps": 50,
149
+ "max_steps": 6907,
150
+ "num_input_tokens_seen": 0,
151
+ "num_train_epochs": 1,
152
+ "save_steps": 800,
153
+ "stateful_callbacks": {
154
+ "TrainerControl": {
155
+ "args": {
156
+ "should_epoch_stop": false,
157
+ "should_evaluate": false,
158
+ "should_log": false,
159
+ "should_save": true,
160
+ "should_training_stop": false
161
+ },
162
+ "attributes": {}
163
+ }
164
+ },
165
+ "total_flos": 0.0,
166
+ "train_batch_size": 2,
167
+ "trial_name": null,
168
+ "trial_params": null
169
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78dae8bdc5118e92f8c18802b03dd66fed14d0fbdddccbecbd325018996bc672
3
+ size 6161