Add new SentenceTransformer model.
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +547 -0
- config.json +67 -0
- config_sentence_transformers.json +16 -0
- custom_st.py +229 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +61 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,547 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:50000
|
8 |
+
- loss:OnlineContrastiveLoss
|
9 |
+
base_model: ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'my brain was screaming why do you keep watching. turn it off and
|
12 |
+
go to bed. but couch potatoness won out and and i watched until the predictable
|
13 |
+
ending. i guess when it bruce campbell i need to give it a chance. i find it hard
|
14 |
+
to complain about a low budget movie purely because of the low budget. time and
|
15 |
+
time again we see low budget movies proving that a good story and good writing
|
16 |
+
and good acting are enough to make a good movie. ted and bruce got their start
|
17 |
+
on just such a movie and but they do not seem to learn from sam that it takes
|
18 |
+
a bit more than slapping it on film to make a movie. it sad and too and because
|
19 |
+
bruce has always been a favorite. after the 70 and 80 and i just can not believe
|
20 |
+
movies this bad are still being made. bruce and i am really disappointed. '
|
21 |
+
sentences:
|
22 |
+
- negative negative negative negative negative
|
23 |
+
- negative negative negative negative negative
|
24 |
+
- negative negative negative negative negative
|
25 |
+
- source_sentence: 'despite having a very pretty leading lady (rosita arenas and one
|
26 |
+
of my boy crushes) and the acting and the direction are examples of what not to
|
27 |
+
do while making a movie. placed in southern mexico and popoca and the aztec mummy
|
28 |
+
(real aztecs and by the way and did not made mummies) has been waken up by the
|
29 |
+
lead characters and starts making trouble in mexico city suburbia and during the
|
30 |
+
first movie (the aztec mummy). in this second part and the leading man and woman
|
31 |
+
want to find th mummy and put it in its final resting place (a fireplace would
|
32 |
+
have been my first choice. )into this appears the bat and a criminal master mindless
|
33 |
+
stereotype of a criminal genius who creates a human robot (some idiot inside a
|
34 |
+
robot suit) to control popoca and (get this) take over the world. the final match
|
35 |
+
between the robot and the mummy is hilarious and some of the worst choreography
|
36 |
+
ever witnessed. the funniest part is that this movie was made and released by
|
37 |
+
a serious mexican movie studio. the acting is just as awful hearing the movie
|
38 |
+
in spanish as it is in english (they dubbed the over acting. ). you should watch
|
39 |
+
this movie through mst represent3000. the comments are even funnier. '
|
40 |
+
sentences:
|
41 |
+
- positive positive positive positive positive
|
42 |
+
- negative negative negative negative negative
|
43 |
+
- positive positive positive positive positive
|
44 |
+
- source_sentence: 'let describe larry as an interviewer represent a complete suckhole
|
45 |
+
and in every way possible. he laughs at all his guests jokes and he asks the most
|
46 |
+
boring questions and he would never dare contradict them. he hits me as the type
|
47 |
+
of person who wants to be liked by. everyone. friendly and boring and olpredictable
|
48 |
+
harry. he probably owes his success to being a dream interviewer for celebrities
|
49 |
+
because they do not get bombarded with what we and the people and want to know
|
50 |
+
and have a right to know. let put it this way represent he interviews as if he
|
51 |
+
in a red country. 0 negative and 2 for the guests that come on the show yet it
|
52 |
+
all seems pointless when larry starts asking his mind numbing questions such as
|
53 |
+
what it like to be a mother. followed by the usual answer along the lines of being
|
54 |
+
a mother is the greatest thing that ever happened to me represent it wonderful
|
55 |
+
and but tiring. '
|
56 |
+
sentences:
|
57 |
+
- negative negative negative negative negative
|
58 |
+
- positive positive positive positive positive
|
59 |
+
- negative negative negative negative negative
|
60 |
+
- source_sentence: 'i was excited about this movie after reading other reviews. what
|
61 |
+
a disappointment. there are so many ways that this movie is bad. the computer
|
62 |
+
graphics were lacking to say the least. i found the acting stiff and unbelievable.
|
63 |
+
watch the sand as the lost e pods (what an original name. )are found. where did
|
64 |
+
all the tracks come from. i immediately recognized portions similar to other movies
|
65 |
+
and ie alien and pitch black. come on and one huge ship to transport one prisoner.
|
66 |
+
and what is with the prisoner. does he speak and can he speak. i kept waiting
|
67 |
+
for something to tie the bits of the story together and but it never came. if
|
68 |
+
this movie was made on a low budget and it shows. the only part of the movie i
|
69 |
+
liked was when it finally ended. i do not mean that i liked the ending and i do
|
70 |
+
not. i just liked the fact that it was over. a trip to the dentist would have
|
71 |
+
been more enjoyable. in my opinion. do not waste your time on this one. '
|
72 |
+
sentences:
|
73 |
+
- positive positive positive positive positive
|
74 |
+
- positive positive positive positive positive
|
75 |
+
- negative negative negative negative negative
|
76 |
+
- source_sentence: 'rose does anything actually happen in this episode. it introduces
|
77 |
+
our two leads and a slow witted grinning idiot of a doctor and an utterly un interesting
|
78 |
+
companion. there no plot to speak of and childish humour and mixed with some extremely
|
79 |
+
bad pacing and incidental music. what else is there to say and really. the end
|
80 |
+
of the world a marginal improvement and in that we see our first outer space
|
81 |
+
scenario. subsequently brought down by poor contemporary humour and paper thin
|
82 |
+
logic and very poor pacing and and tired sf clichés. the unquiet dead best episode
|
83 |
+
to date showing what can happen when someone knows how to structure an episode
|
84 |
+
and write interesting character dialogue and and integrate an intriguing plot.
|
85 |
+
let down solely by the doctor and rose. aliens of london or world war three doctor
|
86 |
+
who degenerates into farce. what more can be said. penelope wilton brings the
|
87 |
+
proceedings a little gravity and trying her best in dire circumstances. some poorly
|
88 |
+
written and and out of place soap opera elements come to the fore in these two
|
89 |
+
episodes and and a return to poor pacing and bad plotting and cringe worthy humour
|
90 |
+
or satire. dalek not great and however still far above the rtd fare to date.
|
91 |
+
the pacing and script are all fine (though the doctor and rose still irritate).
|
92 |
+
the effects and menace of the dalek are introduced well. the finale and however
|
93 |
+
and took an interesting premise that reduced the doctor most notorious foe and
|
94 |
+
into a cuddly touchy feely mess and and turning a previously un seen menace and
|
95 |
+
to a blue rubber squid that looked like a child toy. the long game the first
|
96 |
+
rtd script to show any plot and even if it was in a clichéd 80s style. still and
|
97 |
+
it was marred somewhat by his usual over reliance on juvenile jokes and placing
|
98 |
+
it too far in the future to make logical sense and and again poor pacing. not
|
99 |
+
as bad as his previous efforts and but instantly forgettable. father day the
|
100 |
+
initial premise could have been vaguely interesting and but common sense and logic
|
101 |
+
abandon this episode from the very beginning. also and we are treated to a whole
|
102 |
+
episode of soap opera. before you start thinking this is all about characterization
|
103 |
+
and remember and there a big difference between lame soap opera and characterization.
|
104 |
+
on the plus side and it does prove rtd isn not the worst script writer so far.
|
105 |
+
the empty child or the doctor dances this started off in a mediocre way and with
|
106 |
+
some cringe worthy moments and and some illogical mistakes that even a primary
|
107 |
+
school pupil wouldn not make (well lit windows in a blackout and anyone. ). after
|
108 |
+
this and the first part takes a more interesting and sinister turn. florence hoath
|
109 |
+
truly steals these episodes and showing us what an interesting companion could
|
110 |
+
have been like. she could also act. instead we get the annoying and politically
|
111 |
+
correct captain jack as the new companion. the conclusion was a little hasty and
|
112 |
+
but sufficient. the pacing and script improved with a reasonably good storyline
|
113 |
+
and making these two episodes quite atmospheric and intriguing. boom town i have
|
114 |
+
to be honest and except for a few examples and i had been so disillusioned by
|
115 |
+
the current series and that upon seeing the trailer for another slitheen episode
|
116 |
+
and i gave up and do not subject myself to the torture. bad wolf reality tv and
|
117 |
+
arguably the worst facet of the modern media and is basically used as the premise.
|
118 |
+
there no subtlety whatsoever. do we get any interesting social commentary as in
|
119 |
+
the likes of the running man or truman show. no and of course not. this in an
|
120 |
+
rtd episode and so theyre basically here to cynically try and pull in the audience
|
121 |
+
of said shows. once again and logic goes out the window and as were placed 200
|
122 |
+
and 000 something years in the future. rtd tries pointlessly to shoe horn in some
|
123 |
+
over arcing story here and with no relevance other than it own existence and when
|
124 |
+
the villains are revealed at the end. they make empty threats and and the doctor
|
125 |
+
grins once more like an idiot for the climax. faster paced for the most part and
|
126 |
+
than rtd other efforts and this has one or two interesting moments. otherwise
|
127 |
+
and another lacklustre instalment. the parting of the ways the big finale. more
|
128 |
+
of a damp squid and literally. all of the dalek menace set up in dalek is brought
|
129 |
+
crashing down and as they become rather pathetic. so many plot holes riddle this
|
130 |
+
episode and with typically poor contrivances. daleks want to harvest humans as
|
131 |
+
daleks and but then vaporize entire continents. dalek can vaporize said continents
|
132 |
+
and but not destroy the tardis in space. the tardis is now indestructible and
|
133 |
+
can land anywhere and even over people so they can be saved in it. this ability
|
134 |
+
can not be used to easily destroy the dalek god. the daleks can vaporize entire
|
135 |
+
continents and but do not just nuke satellite 5 to destroy the doctor and and
|
136 |
+
instead let him play around. the doctor is a pathetic coward without the conviction
|
137 |
+
of his actions and after eradicating his whole species to try and eliminate the
|
138 |
+
daleks. these and many other holes aside and we are treated to the lamest dues
|
139 |
+
ex machina solution ever conceived and joined with a near pointless story arc.
|
140 |
+
so what can we say about the new series and all in all. would this have gained
|
141 |
+
a second series if it were anything other than doctor who and with rtd behind
|
142 |
+
it. would most of the episodes have been seen as anything other than un original
|
143 |
+
and forgettable and if they were anything other than doctor who and and had rtd
|
144 |
+
name attached. i think not. some people would have us think we can not say anything
|
145 |
+
against rtd and since we owe him for bringing doctor who back to our screens.
|
146 |
+
however and this at the expense of good characters and stories. personally and
|
147 |
+
i would rather not have a poorly planned and ill conceived product and churned
|
148 |
+
out at that price. i would rather wait till someone could come along and make
|
149 |
+
a genuine effort. for the most part and this is the kind of puerile rubbish that
|
150 |
+
gives sf a bad name and marring what is otherwise the most creative genre. '
|
151 |
+
sentences:
|
152 |
+
- negative negative negative negative negative
|
153 |
+
- positive positive positive positive positive
|
154 |
+
- negative negative negative negative negative
|
155 |
+
pipeline_tag: sentence-similarity
|
156 |
+
library_name: sentence-transformers
|
157 |
+
---
|
158 |
+
|
159 |
+
# SentenceTransformer based on ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02
|
160 |
+
|
161 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02](https://huggingface.co/ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
162 |
+
|
163 |
+
## Model Details
|
164 |
+
|
165 |
+
### Model Description
|
166 |
+
- **Model Type:** Sentence Transformer
|
167 |
+
- **Base model:** [ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02](https://huggingface.co/ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02) <!-- at revision 3cda0a0ca15fe6b796bec4133ec193c86ed02ca5 -->
|
168 |
+
- **Maximum Sequence Length:** 8194 tokens
|
169 |
+
- **Output Dimensionality:** 1024 tokens
|
170 |
+
- **Similarity Function:** Cosine Similarity
|
171 |
+
<!-- - **Training Dataset:** Unknown -->
|
172 |
+
<!-- - **Language:** Unknown -->
|
173 |
+
<!-- - **License:** Unknown -->
|
174 |
+
|
175 |
+
### Model Sources
|
176 |
+
|
177 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
178 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
179 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
180 |
+
|
181 |
+
### Full Model Architecture
|
182 |
+
|
183 |
+
```
|
184 |
+
SentenceTransformer(
|
185 |
+
(transformer): Transformer(
|
186 |
+
(auto_model): XLMRobertaLoRA(
|
187 |
+
(roberta): XLMRobertaModel(
|
188 |
+
(embeddings): XLMRobertaEmbeddings(
|
189 |
+
(word_embeddings): ParametrizedEmbedding(
|
190 |
+
250002, 1024, padding_idx=1
|
191 |
+
(parametrizations): ModuleDict(
|
192 |
+
(weight): ParametrizationList(
|
193 |
+
(0): LoRAParametrization()
|
194 |
+
)
|
195 |
+
)
|
196 |
+
)
|
197 |
+
(token_type_embeddings): ParametrizedEmbedding(
|
198 |
+
1, 1024
|
199 |
+
(parametrizations): ModuleDict(
|
200 |
+
(weight): ParametrizationList(
|
201 |
+
(0): LoRAParametrization()
|
202 |
+
)
|
203 |
+
)
|
204 |
+
)
|
205 |
+
)
|
206 |
+
(emb_drop): Dropout(p=0.1, inplace=False)
|
207 |
+
(emb_ln): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
208 |
+
(encoder): XLMRobertaEncoder(
|
209 |
+
(layers): ModuleList(
|
210 |
+
(0-23): 24 x Block(
|
211 |
+
(mixer): MHA(
|
212 |
+
(rotary_emb): RotaryEmbedding()
|
213 |
+
(Wqkv): ParametrizedLinearResidual(
|
214 |
+
in_features=1024, out_features=3072, bias=True
|
215 |
+
(parametrizations): ModuleDict(
|
216 |
+
(weight): ParametrizationList(
|
217 |
+
(0): LoRAParametrization()
|
218 |
+
)
|
219 |
+
)
|
220 |
+
)
|
221 |
+
(inner_attn): FlashSelfAttention(
|
222 |
+
(drop): Dropout(p=0.1, inplace=False)
|
223 |
+
)
|
224 |
+
(inner_cross_attn): FlashCrossAttention(
|
225 |
+
(drop): Dropout(p=0.1, inplace=False)
|
226 |
+
)
|
227 |
+
(out_proj): ParametrizedLinear(
|
228 |
+
in_features=1024, out_features=1024, bias=True
|
229 |
+
(parametrizations): ModuleDict(
|
230 |
+
(weight): ParametrizationList(
|
231 |
+
(0): LoRAParametrization()
|
232 |
+
)
|
233 |
+
)
|
234 |
+
)
|
235 |
+
)
|
236 |
+
(dropout1): Dropout(p=0.1, inplace=False)
|
237 |
+
(drop_path1): StochasticDepth(p=0.0, mode=row)
|
238 |
+
(norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
239 |
+
(mlp): Mlp(
|
240 |
+
(fc1): ParametrizedLinear(
|
241 |
+
in_features=1024, out_features=4096, bias=True
|
242 |
+
(parametrizations): ModuleDict(
|
243 |
+
(weight): ParametrizationList(
|
244 |
+
(0): LoRAParametrization()
|
245 |
+
)
|
246 |
+
)
|
247 |
+
)
|
248 |
+
(fc2): ParametrizedLinear(
|
249 |
+
in_features=4096, out_features=1024, bias=True
|
250 |
+
(parametrizations): ModuleDict(
|
251 |
+
(weight): ParametrizationList(
|
252 |
+
(0): LoRAParametrization()
|
253 |
+
)
|
254 |
+
)
|
255 |
+
)
|
256 |
+
)
|
257 |
+
(dropout2): Dropout(p=0.1, inplace=False)
|
258 |
+
(drop_path2): StochasticDepth(p=0.0, mode=row)
|
259 |
+
(norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
|
260 |
+
)
|
261 |
+
)
|
262 |
+
)
|
263 |
+
(pooler): XLMRobertaPooler(
|
264 |
+
(dense): ParametrizedLinear(
|
265 |
+
in_features=1024, out_features=1024, bias=True
|
266 |
+
(parametrizations): ModuleDict(
|
267 |
+
(weight): ParametrizationList(
|
268 |
+
(0): LoRAParametrization()
|
269 |
+
)
|
270 |
+
)
|
271 |
+
)
|
272 |
+
(activation): Tanh()
|
273 |
+
)
|
274 |
+
)
|
275 |
+
)
|
276 |
+
)
|
277 |
+
(pooler): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
278 |
+
(normalizer): Normalize()
|
279 |
+
)
|
280 |
+
```
|
281 |
+
|
282 |
+
## Usage
|
283 |
+
|
284 |
+
### Direct Usage (Sentence Transformers)
|
285 |
+
|
286 |
+
First install the Sentence Transformers library:
|
287 |
+
|
288 |
+
```bash
|
289 |
+
pip install -U sentence-transformers
|
290 |
+
```
|
291 |
+
|
292 |
+
Then you can load this model and run inference.
|
293 |
+
```python
|
294 |
+
from sentence_transformers import SentenceTransformer
|
295 |
+
|
296 |
+
# Download from the 🤗 Hub
|
297 |
+
model = SentenceTransformer("ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_03")
|
298 |
+
# Run inference
|
299 |
+
sentences = [
|
300 |
+
'rose does anything actually happen in this episode. it introduces our two leads and a slow witted grinning idiot of a doctor and an utterly un interesting companion. there no plot to speak of and childish humour and mixed with some extremely bad pacing and incidental music. what else is there to say and really. the end of the world a marginal improvement and in that we see our first outer space scenario. subsequently brought down by poor contemporary humour and paper thin logic and very poor pacing and and tired sf clichés. the unquiet dead best episode to date showing what can happen when someone knows how to structure an episode and write interesting character dialogue and and integrate an intriguing plot. let down solely by the doctor and rose. aliens of london or world war three doctor who degenerates into farce. what more can be said. penelope wilton brings the proceedings a little gravity and trying her best in dire circumstances. some poorly written and and out of place soap opera elements come to the fore in these two episodes and and a return to poor pacing and bad plotting and cringe worthy humour or satire. dalek not great and however still far above the rtd fare to date. the pacing and script are all fine (though the doctor and rose still irritate). the effects and menace of the dalek are introduced well. the finale and however and took an interesting premise that reduced the doctor most notorious foe and into a cuddly touchy feely mess and and turning a previously un seen menace and to a blue rubber squid that looked like a child toy. the long game the first rtd script to show any plot and even if it was in a clichéd 80s style. still and it was marred somewhat by his usual over reliance on juvenile jokes and placing it too far in the future to make logical sense and and again poor pacing. not as bad as his previous efforts and but instantly forgettable. father day the initial premise could have been vaguely interesting and but common sense and logic abandon this episode from the very beginning. also and we are treated to a whole episode of soap opera. before you start thinking this is all about characterization and remember and there a big difference between lame soap opera and characterization. on the plus side and it does prove rtd isn not the worst script writer so far. the empty child or the doctor dances this started off in a mediocre way and with some cringe worthy moments and and some illogical mistakes that even a primary school pupil wouldn not make (well lit windows in a blackout and anyone. ). after this and the first part takes a more interesting and sinister turn. florence hoath truly steals these episodes and showing us what an interesting companion could have been like. she could also act. instead we get the annoying and politically correct captain jack as the new companion. the conclusion was a little hasty and but sufficient. the pacing and script improved with a reasonably good storyline and making these two episodes quite atmospheric and intriguing. boom town i have to be honest and except for a few examples and i had been so disillusioned by the current series and that upon seeing the trailer for another slitheen episode and i gave up and do not subject myself to the torture. bad wolf reality tv and arguably the worst facet of the modern media and is basically used as the premise. there no subtlety whatsoever. do we get any interesting social commentary as in the likes of the running man or truman show. no and of course not. this in an rtd episode and so theyre basically here to cynically try and pull in the audience of said shows. once again and logic goes out the window and as were placed 200 and 000 something years in the future. rtd tries pointlessly to shoe horn in some over arcing story here and with no relevance other than it own existence and when the villains are revealed at the end. they make empty threats and and the doctor grins once more like an idiot for the climax. faster paced for the most part and than rtd other efforts and this has one or two interesting moments. otherwise and another lacklustre instalment. the parting of the ways the big finale. more of a damp squid and literally. all of the dalek menace set up in dalek is brought crashing down and as they become rather pathetic. so many plot holes riddle this episode and with typically poor contrivances. daleks want to harvest humans as daleks and but then vaporize entire continents. dalek can vaporize said continents and but not destroy the tardis in space. the tardis is now indestructible and can land anywhere and even over people so they can be saved in it. this ability can not be used to easily destroy the dalek god. the daleks can vaporize entire continents and but do not just nuke satellite 5 to destroy the doctor and and instead let him play around. the doctor is a pathetic coward without the conviction of his actions and after eradicating his whole species to try and eliminate the daleks. these and many other holes aside and we are treated to the lamest dues ex machina solution ever conceived and joined with a near pointless story arc. so what can we say about the new series and all in all. would this have gained a second series if it were anything other than doctor who and with rtd behind it. would most of the episodes have been seen as anything other than un original and forgettable and if they were anything other than doctor who and and had rtd name attached. i think not. some people would have us think we can not say anything against rtd and since we owe him for bringing doctor who back to our screens. however and this at the expense of good characters and stories. personally and i would rather not have a poorly planned and ill conceived product and churned out at that price. i would rather wait till someone could come along and make a genuine effort. for the most part and this is the kind of puerile rubbish that gives sf a bad name and marring what is otherwise the most creative genre. ',
|
301 |
+
'positive positive positive positive positive',
|
302 |
+
'negative negative negative negative negative',
|
303 |
+
]
|
304 |
+
embeddings = model.encode(sentences)
|
305 |
+
print(embeddings.shape)
|
306 |
+
# [3, 1024]
|
307 |
+
|
308 |
+
# Get the similarity scores for the embeddings
|
309 |
+
similarities = model.similarity(embeddings, embeddings)
|
310 |
+
print(similarities.shape)
|
311 |
+
# [3, 3]
|
312 |
+
```
|
313 |
+
|
314 |
+
<!--
|
315 |
+
### Direct Usage (Transformers)
|
316 |
+
|
317 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
318 |
+
|
319 |
+
</details>
|
320 |
+
-->
|
321 |
+
|
322 |
+
<!--
|
323 |
+
### Downstream Usage (Sentence Transformers)
|
324 |
+
|
325 |
+
You can finetune this model on your own dataset.
|
326 |
+
|
327 |
+
<details><summary>Click to expand</summary>
|
328 |
+
|
329 |
+
</details>
|
330 |
+
-->
|
331 |
+
|
332 |
+
<!--
|
333 |
+
### Out-of-Scope Use
|
334 |
+
|
335 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
336 |
+
-->
|
337 |
+
|
338 |
+
<!--
|
339 |
+
## Bias, Risks and Limitations
|
340 |
+
|
341 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
342 |
+
-->
|
343 |
+
|
344 |
+
<!--
|
345 |
+
### Recommendations
|
346 |
+
|
347 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
348 |
+
-->
|
349 |
+
|
350 |
+
## Training Details
|
351 |
+
|
352 |
+
### Training Dataset
|
353 |
+
|
354 |
+
#### Unnamed Dataset
|
355 |
+
|
356 |
+
|
357 |
+
* Size: 50,000 training samples
|
358 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
359 |
+
* Approximate statistics based on the first 1000 samples:
|
360 |
+
| | sentence1 | sentence2 | label |
|
361 |
+
|:--------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
362 |
+
| type | string | string | float |
|
363 |
+
| details | <ul><li>min: 15 tokens</li><li>mean: 309.26 tokens</li><li>max: 1377 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 7.0 tokens</li><li>max: 7 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.5</li><li>max: 1.0</li></ul> |
|
364 |
+
* Samples:
|
365 |
+
| sentence1 | sentence2 | label |
|
366 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------|:-----------------|
|
367 |
+
| <code>i love sci fi and am willing to put up with a lot. sci fi movies or tv are usually underfunded and under appreciated and misunderstood. i tried to like this and i really did and but it is to good tv sci fi as babylon 5 is to star trek (the original). silly prosthetics and cheap cardboard sets and stilted dialogues and cg that doesn not match the background and and painfully one dimensional characters cannot be overcome with a ci fi setting. (i am sure there are those of you out there who think babylon 5 is good sci fi tv. it not. it clichéd and uninspiring. ) while us viewers might like emotion and character development and sci fi is a genre that does not take itself seriously (cf. star trek). it may treat important issues and yet not as a serious philosophy. it really difficult to care about the characters here as they are not simply foolish and just missing a spark of life. their actions and reactions are wooden and predictable and often painful to watch. the makers of earth know it rubbish as they have to always say gene roddenberry earth. otherwise people would not continue watching. roddenberry ashes must be turning in their orbit as this dull and cheap and poorly edited (watching it without advert breaks really brings this home) trudging trabant of a show lumbers into space. spoiler. so and kill off a main character. and then bring him back as another actor. jeeez. dallas all over again. </code> | <code>negative negative negative negative negative</code> | <code>1.0</code> |
|
368 |
+
| <code>i love sci fi and am willing to put up with a lot. sci fi movies or tv are usually underfunded and under appreciated and misunderstood. i tried to like this and i really did and but it is to good tv sci fi as babylon 5 is to star trek (the original). silly prosthetics and cheap cardboard sets and stilted dialogues and cg that doesn not match the background and and painfully one dimensional characters cannot be overcome with a ci fi setting. (i am sure there are those of you out there who think babylon 5 is good sci fi tv. it not. it clichéd and uninspiring. ) while us viewers might like emotion and character development and sci fi is a genre that does not take itself seriously (cf. star trek). it may treat important issues and yet not as a serious philosophy. it really difficult to care about the characters here as they are not simply foolish and just missing a spark of life. their actions and reactions are wooden and predictable and often painful to watch. the makers of earth know it rubbish as they have to always say gene roddenberry earth. otherwise people would not continue watching. roddenberry ashes must be turning in their orbit as this dull and cheap and poorly edited (watching it without advert breaks really brings this home) trudging trabant of a show lumbers into space. spoiler. so and kill off a main character. and then bring him back as another actor. jeeez. dallas all over again. </code> | <code>positive positive positive positive positive</code> | <code>0.0</code> |
|
369 |
+
| <code>worth the entertainment value of a rental and especially if you like action movies. this one features the usual car chases and fights with the great van damme kick style and shooting battles with the 40 shell load shotgun and and even terrorist style bombs. all of this is entertaining and competently handled but there is nothing that really blows you away if you have seen your share before. the plot is made interesting by the inclusion of a rabbit and which is clever but hardly profound. many of the characters are heavily stereotyped the angry veterans and the terrified illegal aliens and the crooked cops and the indifferent feds and the bitchy tough lady station head and the crooked politician and the fat federale who looks like he was typecast as the mexican in a hollywood movie from the 1940s. all passably acted but again nothing special. i thought the main villains were pretty well done and fairly well acted. by the end of the movie you certainly knew who the good guys were and weren not. there was an emotional lift as the really bad ones got their just deserts. very simplistic and but then you weren not expecting hamlet and right. the only thing i found really annoying was the constant cuts to vds daughter during the last fight scene. not bad. not good. passable 4. </code> | <code>negative negative negative negative negative</code> | <code>1.0</code> |
|
370 |
+
* Loss: [<code>OnlineContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#onlinecontrastiveloss)
|
371 |
+
|
372 |
+
### Training Hyperparameters
|
373 |
+
#### Non-Default Hyperparameters
|
374 |
+
|
375 |
+
- `per_device_train_batch_size`: 64
|
376 |
+
- `per_device_eval_batch_size`: 64
|
377 |
+
|
378 |
+
#### All Hyperparameters
|
379 |
+
<details><summary>Click to expand</summary>
|
380 |
+
|
381 |
+
- `overwrite_output_dir`: False
|
382 |
+
- `do_predict`: False
|
383 |
+
- `eval_strategy`: no
|
384 |
+
- `prediction_loss_only`: True
|
385 |
+
- `per_device_train_batch_size`: 64
|
386 |
+
- `per_device_eval_batch_size`: 64
|
387 |
+
- `per_gpu_train_batch_size`: None
|
388 |
+
- `per_gpu_eval_batch_size`: None
|
389 |
+
- `gradient_accumulation_steps`: 1
|
390 |
+
- `eval_accumulation_steps`: None
|
391 |
+
- `torch_empty_cache_steps`: None
|
392 |
+
- `learning_rate`: 5e-05
|
393 |
+
- `weight_decay`: 0.0
|
394 |
+
- `adam_beta1`: 0.9
|
395 |
+
- `adam_beta2`: 0.999
|
396 |
+
- `adam_epsilon`: 1e-08
|
397 |
+
- `max_grad_norm`: 1.0
|
398 |
+
- `num_train_epochs`: 3.0
|
399 |
+
- `max_steps`: -1
|
400 |
+
- `lr_scheduler_type`: linear
|
401 |
+
- `lr_scheduler_kwargs`: {}
|
402 |
+
- `warmup_ratio`: 0.0
|
403 |
+
- `warmup_steps`: 0
|
404 |
+
- `log_level`: passive
|
405 |
+
- `log_level_replica`: warning
|
406 |
+
- `log_on_each_node`: True
|
407 |
+
- `logging_nan_inf_filter`: True
|
408 |
+
- `save_safetensors`: True
|
409 |
+
- `save_on_each_node`: False
|
410 |
+
- `save_only_model`: False
|
411 |
+
- `restore_callback_states_from_checkpoint`: False
|
412 |
+
- `no_cuda`: False
|
413 |
+
- `use_cpu`: False
|
414 |
+
- `use_mps_device`: False
|
415 |
+
- `seed`: 42
|
416 |
+
- `data_seed`: None
|
417 |
+
- `jit_mode_eval`: False
|
418 |
+
- `use_ipex`: False
|
419 |
+
- `bf16`: False
|
420 |
+
- `fp16`: False
|
421 |
+
- `fp16_opt_level`: O1
|
422 |
+
- `half_precision_backend`: auto
|
423 |
+
- `bf16_full_eval`: False
|
424 |
+
- `fp16_full_eval`: False
|
425 |
+
- `tf32`: None
|
426 |
+
- `local_rank`: 0
|
427 |
+
- `ddp_backend`: None
|
428 |
+
- `tpu_num_cores`: None
|
429 |
+
- `tpu_metrics_debug`: False
|
430 |
+
- `debug`: []
|
431 |
+
- `dataloader_drop_last`: False
|
432 |
+
- `dataloader_num_workers`: 0
|
433 |
+
- `dataloader_prefetch_factor`: None
|
434 |
+
- `past_index`: -1
|
435 |
+
- `disable_tqdm`: False
|
436 |
+
- `remove_unused_columns`: True
|
437 |
+
- `label_names`: None
|
438 |
+
- `load_best_model_at_end`: False
|
439 |
+
- `ignore_data_skip`: False
|
440 |
+
- `fsdp`: []
|
441 |
+
- `fsdp_min_num_params`: 0
|
442 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
443 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
444 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
445 |
+
- `deepspeed`: None
|
446 |
+
- `label_smoothing_factor`: 0.0
|
447 |
+
- `optim`: adamw_torch
|
448 |
+
- `optim_args`: None
|
449 |
+
- `adafactor`: False
|
450 |
+
- `group_by_length`: False
|
451 |
+
- `length_column_name`: length
|
452 |
+
- `ddp_find_unused_parameters`: None
|
453 |
+
- `ddp_bucket_cap_mb`: None
|
454 |
+
- `ddp_broadcast_buffers`: False
|
455 |
+
- `dataloader_pin_memory`: True
|
456 |
+
- `dataloader_persistent_workers`: False
|
457 |
+
- `skip_memory_metrics`: True
|
458 |
+
- `use_legacy_prediction_loop`: False
|
459 |
+
- `push_to_hub`: False
|
460 |
+
- `resume_from_checkpoint`: None
|
461 |
+
- `hub_model_id`: None
|
462 |
+
- `hub_strategy`: every_save
|
463 |
+
- `hub_private_repo`: False
|
464 |
+
- `hub_always_push`: False
|
465 |
+
- `gradient_checkpointing`: False
|
466 |
+
- `gradient_checkpointing_kwargs`: None
|
467 |
+
- `include_inputs_for_metrics`: False
|
468 |
+
- `eval_do_concat_batches`: True
|
469 |
+
- `fp16_backend`: auto
|
470 |
+
- `push_to_hub_model_id`: None
|
471 |
+
- `push_to_hub_organization`: None
|
472 |
+
- `mp_parameters`:
|
473 |
+
- `auto_find_batch_size`: False
|
474 |
+
- `full_determinism`: False
|
475 |
+
- `torchdynamo`: None
|
476 |
+
- `ray_scope`: last
|
477 |
+
- `ddp_timeout`: 1800
|
478 |
+
- `torch_compile`: False
|
479 |
+
- `torch_compile_backend`: None
|
480 |
+
- `torch_compile_mode`: None
|
481 |
+
- `dispatch_batches`: None
|
482 |
+
- `split_batches`: None
|
483 |
+
- `include_tokens_per_second`: False
|
484 |
+
- `include_num_input_tokens_seen`: False
|
485 |
+
- `neftune_noise_alpha`: None
|
486 |
+
- `optim_target_modules`: None
|
487 |
+
- `batch_eval_metrics`: False
|
488 |
+
- `eval_on_start`: False
|
489 |
+
- `use_liger_kernel`: False
|
490 |
+
- `eval_use_gather_object`: False
|
491 |
+
- `batch_sampler`: batch_sampler
|
492 |
+
- `multi_dataset_batch_sampler`: proportional
|
493 |
+
|
494 |
+
</details>
|
495 |
+
|
496 |
+
### Training Logs
|
497 |
+
| Epoch | Step | Training Loss |
|
498 |
+
|:------:|:----:|:-------------:|
|
499 |
+
| 0.6394 | 500 | 0.5881 |
|
500 |
+
| 1.2788 | 1000 | 0.5958 |
|
501 |
+
| 1.9182 | 1500 | 0.5797 |
|
502 |
+
| 2.5575 | 2000 | 0.5847 |
|
503 |
+
|
504 |
+
|
505 |
+
### Framework Versions
|
506 |
+
- Python: 3.10.12
|
507 |
+
- Sentence Transformers: 3.1.1
|
508 |
+
- Transformers: 4.45.2
|
509 |
+
- PyTorch: 2.5.1+cu121
|
510 |
+
- Accelerate: 1.1.1
|
511 |
+
- Datasets: 3.1.0
|
512 |
+
- Tokenizers: 0.20.3
|
513 |
+
|
514 |
+
## Citation
|
515 |
+
|
516 |
+
### BibTeX
|
517 |
+
|
518 |
+
#### Sentence Transformers
|
519 |
+
```bibtex
|
520 |
+
@inproceedings{reimers-2019-sentence-bert,
|
521 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
522 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
523 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
524 |
+
month = "11",
|
525 |
+
year = "2019",
|
526 |
+
publisher = "Association for Computational Linguistics",
|
527 |
+
url = "https://arxiv.org/abs/1908.10084",
|
528 |
+
}
|
529 |
+
```
|
530 |
+
|
531 |
+
<!--
|
532 |
+
## Glossary
|
533 |
+
|
534 |
+
*Clearly define terms in order to be accessible across audiences.*
|
535 |
+
-->
|
536 |
+
|
537 |
+
<!--
|
538 |
+
## Model Card Authors
|
539 |
+
|
540 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
541 |
+
-->
|
542 |
+
|
543 |
+
<!--
|
544 |
+
## Model Card Contact
|
545 |
+
|
546 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
547 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ELVISIO/jina_embeddings_v3_finetuned_online_contrastive_02",
|
3 |
+
"architectures": [
|
4 |
+
"XLMRobertaLoRA"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "jinaai/xlm-roberta-flash-implementation--configuration_xlm_roberta.XLMRobertaFlashConfig",
|
9 |
+
"AutoModel": "jinaai/xlm-roberta-flash-implementation--modeling_lora.XLMRobertaLoRA",
|
10 |
+
"AutoModelForMaskedLM": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForMaskedLM",
|
11 |
+
"AutoModelForPreTraining": "jinaai/xlm-roberta-flash-implementation--modeling_xlm_roberta.XLMRobertaForPreTraining"
|
12 |
+
},
|
13 |
+
"bos_token_id": 0,
|
14 |
+
"classifier_dropout": null,
|
15 |
+
"emb_pooler": null,
|
16 |
+
"eos_token_id": 2,
|
17 |
+
"hidden_act": "gelu",
|
18 |
+
"hidden_dropout_prob": 0.1,
|
19 |
+
"hidden_size": 1024,
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 4096,
|
22 |
+
"layer_norm_eps": 1e-05,
|
23 |
+
"load_trained_adapters": true,
|
24 |
+
"lora_adaptations": [
|
25 |
+
"retrieval.query",
|
26 |
+
"retrieval.passage",
|
27 |
+
"separation",
|
28 |
+
"classification",
|
29 |
+
"text-matching"
|
30 |
+
],
|
31 |
+
"lora_alpha": 1,
|
32 |
+
"lora_dropout_p": 0.0,
|
33 |
+
"lora_main_params_trainable": false,
|
34 |
+
"lora_rank": 4,
|
35 |
+
"matryoshka_dimensions": [
|
36 |
+
32,
|
37 |
+
64,
|
38 |
+
128,
|
39 |
+
256,
|
40 |
+
512,
|
41 |
+
768,
|
42 |
+
1024
|
43 |
+
],
|
44 |
+
"max_position_embeddings": 8194,
|
45 |
+
"model_type": "xlm-roberta",
|
46 |
+
"num_attention_heads": 16,
|
47 |
+
"num_hidden_layers": 24,
|
48 |
+
"output_past": true,
|
49 |
+
"pad_token_id": 1,
|
50 |
+
"position_embedding_type": "rotary",
|
51 |
+
"rotary_emb_base": 20000.0,
|
52 |
+
"task_instructions": {
|
53 |
+
"classification": "",
|
54 |
+
"retrieval.passage": "Represent the document for retrieval: ",
|
55 |
+
"retrieval.query": "Represent the query for retrieving evidence documents: ",
|
56 |
+
"separation": "",
|
57 |
+
"text-matching": ""
|
58 |
+
},
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"transformers_version": "4.45.2",
|
61 |
+
"truncate_dim": null,
|
62 |
+
"type_vocab_size": 1,
|
63 |
+
"use_cache": true,
|
64 |
+
"use_flash_attn": true,
|
65 |
+
"use_reentrant": false,
|
66 |
+
"vocab_size": 250002
|
67 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.5.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"retrieval.query": "Represent the query for retrieving evidence documents: ",
|
9 |
+
"retrieval.passage": "Represent the document for retrieval: ",
|
10 |
+
"separation": "",
|
11 |
+
"classification": "",
|
12 |
+
"text-matching": ""
|
13 |
+
},
|
14 |
+
"default_prompt_name": null,
|
15 |
+
"similarity_fn_name": "cosine"
|
16 |
+
}
|
custom_st.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from io import BytesIO
|
5 |
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
6 |
+
|
7 |
+
import torch
|
8 |
+
from torch import nn
|
9 |
+
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
class Transformer(nn.Module):
|
15 |
+
"""Huggingface AutoModel to generate token embeddings.
|
16 |
+
Loads the correct class, e.g. BERT / RoBERTa etc.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
model_name_or_path: Huggingface models name
|
20 |
+
(https://huggingface.co/models)
|
21 |
+
max_seq_length: Truncate any inputs longer than max_seq_length
|
22 |
+
model_args: Keyword arguments passed to the Huggingface
|
23 |
+
Transformers model
|
24 |
+
tokenizer_args: Keyword arguments passed to the Huggingface
|
25 |
+
Transformers tokenizer
|
26 |
+
config_args: Keyword arguments passed to the Huggingface
|
27 |
+
Transformers config
|
28 |
+
cache_dir: Cache dir for Huggingface Transformers to store/load
|
29 |
+
models
|
30 |
+
do_lower_case: If true, lowercases the input (independent if the
|
31 |
+
model is cased or not)
|
32 |
+
tokenizer_name_or_path: Name or path of the tokenizer. When
|
33 |
+
None, then model_name_or_path is used
|
34 |
+
"""
|
35 |
+
|
36 |
+
save_in_root: bool = True
|
37 |
+
|
38 |
+
def __init__(
|
39 |
+
self,
|
40 |
+
model_name_or_path: str,
|
41 |
+
max_seq_length: int = None,
|
42 |
+
model_args: Dict[str, Any] = None,
|
43 |
+
tokenizer_args: Dict[str, Any] = None,
|
44 |
+
config_args: Dict[str, Any] = None,
|
45 |
+
cache_dir: str = None,
|
46 |
+
do_lower_case: bool = False,
|
47 |
+
tokenizer_name_or_path: str = None,
|
48 |
+
**kwargs,
|
49 |
+
) -> None:
|
50 |
+
super().__init__()
|
51 |
+
self.config_keys = ["max_seq_length", "do_lower_case"]
|
52 |
+
self.do_lower_case = do_lower_case
|
53 |
+
if model_args is None:
|
54 |
+
model_args = {}
|
55 |
+
if tokenizer_args is None:
|
56 |
+
tokenizer_args = {}
|
57 |
+
if config_args is None:
|
58 |
+
config_args = {}
|
59 |
+
|
60 |
+
if kwargs.get("backend", "torch") != "torch":
|
61 |
+
logger.warning(
|
62 |
+
f'"jinaai/jina-embeddings-v3" is currently not compatible with the {kwargs["backend"]} backend. '
|
63 |
+
'Continuing with the "torch" backend.'
|
64 |
+
)
|
65 |
+
|
66 |
+
self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
|
67 |
+
|
68 |
+
self._lora_adaptations = self.config.lora_adaptations
|
69 |
+
if (
|
70 |
+
not isinstance(self._lora_adaptations, list)
|
71 |
+
or len(self._lora_adaptations) < 1
|
72 |
+
):
|
73 |
+
raise ValueError(
|
74 |
+
f"`lora_adaptations` must be a list and contain at least one element"
|
75 |
+
)
|
76 |
+
self._adaptation_map = {
|
77 |
+
name: idx for idx, name in enumerate(self._lora_adaptations)
|
78 |
+
}
|
79 |
+
|
80 |
+
self.default_task = model_args.pop('default_task', None)
|
81 |
+
|
82 |
+
self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=self.config, cache_dir=cache_dir, **model_args)
|
83 |
+
|
84 |
+
if max_seq_length is not None and "model_max_length" not in tokenizer_args:
|
85 |
+
tokenizer_args["model_max_length"] = max_seq_length
|
86 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
87 |
+
tokenizer_name_or_path if tokenizer_name_or_path is not None else model_name_or_path,
|
88 |
+
cache_dir=cache_dir,
|
89 |
+
**tokenizer_args,
|
90 |
+
)
|
91 |
+
|
92 |
+
# No max_seq_length set. Try to infer from model
|
93 |
+
if max_seq_length is None:
|
94 |
+
if (
|
95 |
+
hasattr(self.auto_model, "config")
|
96 |
+
and hasattr(self.auto_model.config, "max_position_embeddings")
|
97 |
+
and hasattr(self.tokenizer, "model_max_length")
|
98 |
+
):
|
99 |
+
max_seq_length = min(self.auto_model.config.max_position_embeddings, self.tokenizer.model_max_length)
|
100 |
+
|
101 |
+
self.max_seq_length = max_seq_length
|
102 |
+
|
103 |
+
if tokenizer_name_or_path is not None:
|
104 |
+
self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
|
105 |
+
|
106 |
+
|
107 |
+
@property
|
108 |
+
def default_task(self):
|
109 |
+
return self._default_task
|
110 |
+
|
111 |
+
@default_task.setter
|
112 |
+
def default_task(self, task: Union[None, str]):
|
113 |
+
self._validate_task(task)
|
114 |
+
self._default_task = task
|
115 |
+
|
116 |
+
|
117 |
+
def _validate_task(self, task: str):
|
118 |
+
if task and task not in self._lora_adaptations:
|
119 |
+
raise ValueError(
|
120 |
+
f"Unsupported task '{task}'. "
|
121 |
+
f"Supported tasks are: {', '.join(self.config.lora_adaptations)}. "
|
122 |
+
f"Alternatively, don't pass the `task` argument to disable LoRA."
|
123 |
+
)
|
124 |
+
|
125 |
+
def forward(
|
126 |
+
self, features: Dict[str, torch.Tensor], task: Optional[str] = None
|
127 |
+
) -> Dict[str, torch.Tensor]:
|
128 |
+
"""Returns token_embeddings, cls_token"""
|
129 |
+
self._validate_task(task)
|
130 |
+
task = task or self.default_task
|
131 |
+
adapter_mask = None
|
132 |
+
if task:
|
133 |
+
task_id = self._adaptation_map[task]
|
134 |
+
num_examples = features['input_ids'].size(0)
|
135 |
+
adapter_mask = torch.full(
|
136 |
+
(num_examples,), task_id, dtype=torch.int32, device=features['input_ids'].device
|
137 |
+
)
|
138 |
+
|
139 |
+
lora_arguments = (
|
140 |
+
{"adapter_mask": adapter_mask} if adapter_mask is not None else {}
|
141 |
+
)
|
142 |
+
features.pop('prompt_length', None)
|
143 |
+
output_states = self.auto_model.forward(**features, **lora_arguments, return_dict=False)
|
144 |
+
output_tokens = output_states[0]
|
145 |
+
features.update({"token_embeddings": output_tokens, "attention_mask": features["attention_mask"]})
|
146 |
+
return features
|
147 |
+
|
148 |
+
def get_word_embedding_dimension(self) -> int:
|
149 |
+
return self.auto_model.config.hidden_size
|
150 |
+
|
151 |
+
def tokenize(
|
152 |
+
self,
|
153 |
+
texts: Union[List[str], List[dict], List[Tuple[str, str]]],
|
154 |
+
padding: Union[str, bool] = True
|
155 |
+
) -> Dict[str, torch.Tensor]:
|
156 |
+
"""Tokenizes a text and maps tokens to token-ids"""
|
157 |
+
output = {}
|
158 |
+
if isinstance(texts[0], str):
|
159 |
+
to_tokenize = [texts]
|
160 |
+
elif isinstance(texts[0], dict):
|
161 |
+
to_tokenize = []
|
162 |
+
output["text_keys"] = []
|
163 |
+
for lookup in texts:
|
164 |
+
text_key, text = next(iter(lookup.items()))
|
165 |
+
to_tokenize.append(text)
|
166 |
+
output["text_keys"].append(text_key)
|
167 |
+
to_tokenize = [to_tokenize]
|
168 |
+
else:
|
169 |
+
batch1, batch2 = [], []
|
170 |
+
for text_tuple in texts:
|
171 |
+
batch1.append(text_tuple[0])
|
172 |
+
batch2.append(text_tuple[1])
|
173 |
+
to_tokenize = [batch1, batch2]
|
174 |
+
|
175 |
+
# strip
|
176 |
+
to_tokenize = [[str(s).strip() for s in col] for col in to_tokenize]
|
177 |
+
|
178 |
+
# Lowercase
|
179 |
+
if self.do_lower_case:
|
180 |
+
to_tokenize = [[s.lower() for s in col] for col in to_tokenize]
|
181 |
+
|
182 |
+
output.update(
|
183 |
+
self.tokenizer(
|
184 |
+
*to_tokenize,
|
185 |
+
padding=padding,
|
186 |
+
truncation="longest_first",
|
187 |
+
return_tensors="pt",
|
188 |
+
max_length=self.max_seq_length,
|
189 |
+
)
|
190 |
+
)
|
191 |
+
return output
|
192 |
+
|
193 |
+
def get_config_dict(self) -> Dict[str, Any]:
|
194 |
+
return {key: self.__dict__[key] for key in self.config_keys}
|
195 |
+
|
196 |
+
def save(self, output_path: str, safe_serialization: bool = True) -> None:
|
197 |
+
self.auto_model.save_pretrained(output_path, safe_serialization=safe_serialization)
|
198 |
+
self.tokenizer.save_pretrained(output_path)
|
199 |
+
|
200 |
+
with open(os.path.join(output_path, "sentence_bert_config.json"), "w") as fOut:
|
201 |
+
json.dump(self.get_config_dict(), fOut, indent=2)
|
202 |
+
|
203 |
+
|
204 |
+
@classmethod
|
205 |
+
def load(cls, input_path: str) -> "Transformer":
|
206 |
+
# Old classes used other config names than 'sentence_bert_config.json'
|
207 |
+
for config_name in [
|
208 |
+
"sentence_bert_config.json",
|
209 |
+
"sentence_roberta_config.json",
|
210 |
+
"sentence_distilbert_config.json",
|
211 |
+
"sentence_camembert_config.json",
|
212 |
+
"sentence_albert_config.json",
|
213 |
+
"sentence_xlm-roberta_config.json",
|
214 |
+
"sentence_xlnet_config.json",
|
215 |
+
]:
|
216 |
+
sbert_config_path = os.path.join(input_path, config_name)
|
217 |
+
if os.path.exists(sbert_config_path):
|
218 |
+
break
|
219 |
+
|
220 |
+
with open(sbert_config_path) as fIn:
|
221 |
+
config = json.load(fIn)
|
222 |
+
# Don't allow configs to set trust_remote_code
|
223 |
+
if "model_args" in config and "trust_remote_code" in config["model_args"]:
|
224 |
+
config["model_args"].pop("trust_remote_code")
|
225 |
+
if "tokenizer_args" in config and "trust_remote_code" in config["tokenizer_args"]:
|
226 |
+
config["tokenizer_args"].pop("trust_remote_code")
|
227 |
+
if "config_args" in config and "trust_remote_code" in config["config_args"]:
|
228 |
+
config["config_args"].pop("trust_remote_code")
|
229 |
+
return cls(model_name_or_path=input_path, **config)
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1159a035b92d9a239ff49e9f244059d5dbafccd826769bcff554c01922634736
|
3 |
+
size 1144685320
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "transformer",
|
5 |
+
"path": "",
|
6 |
+
"type": "custom_st.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "pooler",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "normalizer",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8194,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e19cd8c08f528b481e909f73dbd1fd62b1e8b1117579ba205e477801237f9e0
|
3 |
+
size 17082988
|
tokenizer_config.json
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"mask_token": "<mask>",
|
49 |
+
"max_length": 8194,
|
50 |
+
"model_max_length": 8194,
|
51 |
+
"pad_to_multiple_of": null,
|
52 |
+
"pad_token": "<pad>",
|
53 |
+
"pad_token_type_id": 0,
|
54 |
+
"padding_side": "right",
|
55 |
+
"sep_token": "</s>",
|
56 |
+
"stride": 0,
|
57 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
58 |
+
"truncation_side": "right",
|
59 |
+
"truncation_strategy": "longest_first",
|
60 |
+
"unk_token": "<unk>"
|
61 |
+
}
|