wu981526092 commited on
Commit
2a69b31
1 Parent(s): fea35f8

Upload DistilBertForSequenceClassification

Browse files
Files changed (2) hide show
  1. config.json +16 -1040
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,519 +1,13 @@
1
  {
2
- "_name_or_path": "model/sileod/deberta-v3-base-tasksource-nli/checkpoint-378",
 
3
  "architectures": [
4
- "DebertaV2ForSequenceClassification"
5
  ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "classifiers_size": [
8
- 3,
9
- 2,
10
- 2,
11
- 2,
12
- 2,
13
- 2,
14
- 1,
15
- 2,
16
- 3,
17
- 2,
18
- 2,
19
- 2,
20
- 3,
21
- 3,
22
- 3,
23
- 3,
24
- 1,
25
- 3,
26
- 3,
27
- 2,
28
- 2,
29
- 3,
30
- 2,
31
- 6,
32
- 2,
33
- 2,
34
- 2,
35
- 2,
36
- 2,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 3,
42
- 3,
43
- 3,
44
- 3,
45
- 3,
46
- 3,
47
- 3,
48
- 2,
49
- 2,
50
- 2,
51
- 2,
52
- 5,
53
- 3,
54
- 3,
55
- 3,
56
- 3,
57
- 3,
58
- 3,
59
- 3,
60
- 3,
61
- 2,
62
- 2,
63
- 2,
64
- 3,
65
- 3,
66
- 3,
67
- 3,
68
- 3,
69
- 3,
70
- 3,
71
- 3,
72
- 2,
73
- 2,
74
- 2,
75
- 2,
76
- 47,
77
- 23,
78
- 9,
79
- 1,
80
- 1,
81
- 1,
82
- 1,
83
- 1,
84
- 1,
85
- 1,
86
- 1,
87
- 1,
88
- 1,
89
- 1,
90
- 1,
91
- 1,
92
- 1,
93
- 1,
94
- 1,
95
- 1,
96
- 1,
97
- 1,
98
- 1,
99
- 1,
100
- 1,
101
- 1,
102
- 1,
103
- 1,
104
- 1,
105
- 1,
106
- 1,
107
- 1,
108
- 1,
109
- 1,
110
- 1,
111
- 1,
112
- 1,
113
- 1,
114
- 1,
115
- 1,
116
- 1,
117
- 1,
118
- 1,
119
- 1,
120
- 1,
121
- 1,
122
- 1,
123
- 1,
124
- 1,
125
- 1,
126
- 1,
127
- 1,
128
- 1,
129
- 1,
130
- 1,
131
- 1,
132
- 1,
133
- 1,
134
- 1,
135
- 1,
136
- 1,
137
- 1,
138
- 1,
139
- 1,
140
- 1,
141
- 1,
142
- 1,
143
- 1,
144
- 1,
145
- 1,
146
- 1,
147
- 1,
148
- 1,
149
- 1,
150
- 1,
151
- 1,
152
- 1,
153
- 1,
154
- 1,
155
- 1,
156
- 1,
157
- 1,
158
- 1,
159
- 1,
160
- 1,
161
- 1,
162
- 1,
163
- 1,
164
- 1,
165
- 1,
166
- 1,
167
- 1,
168
- 1,
169
- 1,
170
- 1,
171
- 1,
172
- 1,
173
- 1,
174
- 1,
175
- 1,
176
- 1,
177
- 1,
178
- 1,
179
- 1,
180
- 1,
181
- 1,
182
- 1,
183
- 1,
184
- 1,
185
- 1,
186
- 1,
187
- 1,
188
- 1,
189
- 1,
190
- 1,
191
- 1,
192
- 1,
193
- 1,
194
- 1,
195
- 1,
196
- 1,
197
- 1,
198
- 1,
199
- 1,
200
- 1,
201
- 1,
202
- 1,
203
- 1,
204
- 1,
205
- 1,
206
- 1,
207
- 1,
208
- 2,
209
- 2,
210
- 2,
211
- 2,
212
- 2,
213
- 2,
214
- 20,
215
- 50,
216
- 3,
217
- 3,
218
- 4,
219
- 2,
220
- 8,
221
- 3,
222
- 2,
223
- 2,
224
- 2,
225
- 4,
226
- 20,
227
- 3,
228
- 3,
229
- 3,
230
- 3,
231
- 3,
232
- 174,
233
- 2,
234
- 2,
235
- 41,
236
- 2,
237
- 2,
238
- 51,
239
- 2,
240
- 3,
241
- 2,
242
- 2,
243
- 2,
244
- 3,
245
- 16,
246
- 2,
247
- 18,
248
- 8,
249
- 2,
250
- 17,
251
- 3,
252
- 2,
253
- 4,
254
- 7,
255
- 12,
256
- 7,
257
- 3,
258
- 3,
259
- 42,
260
- 11,
261
- 100,
262
- 13,
263
- 100,
264
- 8,
265
- 1,
266
- 20,
267
- 2,
268
- 2,
269
- 4,
270
- 5,
271
- 3,
272
- 4,
273
- 14,
274
- 2,
275
- 6,
276
- 4,
277
- 2,
278
- 1,
279
- 3,
280
- 10,
281
- 3,
282
- 10,
283
- 4,
284
- 2,
285
- 7,
286
- 6,
287
- 28,
288
- 3,
289
- 6,
290
- 3,
291
- 6,
292
- 5,
293
- 7,
294
- 4,
295
- 2,
296
- 2,
297
- 2,
298
- 6,
299
- 2,
300
- 2,
301
- 7,
302
- 20,
303
- 2,
304
- 9,
305
- 2,
306
- 3,
307
- 13,
308
- 2,
309
- 3,
310
- 2,
311
- 4,
312
- 4,
313
- 2,
314
- 2,
315
- 2,
316
- 2,
317
- 4,
318
- 1,
319
- 2,
320
- 1,
321
- 13,
322
- 3,
323
- 5,
324
- 11,
325
- 37,
326
- 2,
327
- 49,
328
- 40,
329
- 10,
330
- 4,
331
- 1,
332
- 2,
333
- 2,
334
- 1,
335
- 5,
336
- 2,
337
- 3,
338
- 2,
339
- 2,
340
- 12,
341
- 3,
342
- 3,
343
- 2,
344
- 19,
345
- 3,
346
- 1,
347
- 2,
348
- 2,
349
- 2,
350
- 2,
351
- 2,
352
- 1,
353
- 2,
354
- 2,
355
- 1,
356
- 1,
357
- 2,
358
- 3,
359
- 2,
360
- 1,
361
- 4,
362
- 3,
363
- 1,
364
- 1,
365
- 1,
366
- 2,
367
- 3,
368
- 2,
369
- 3,
370
- 1,
371
- 1,
372
- 2,
373
- 1,
374
- 3,
375
- 2,
376
- 2,
377
- 2,
378
- 2,
379
- 2,
380
- 3,
381
- 2,
382
- 2,
383
- 2,
384
- 1,
385
- 3,
386
- 2,
387
- 2,
388
- 1,
389
- 1,
390
- 1,
391
- 1,
392
- 2,
393
- 1,
394
- 1,
395
- 1,
396
- 1,
397
- 4,
398
- 1,
399
- 1,
400
- 1,
401
- 1,
402
- 3,
403
- 1,
404
- 3,
405
- 1,
406
- 2,
407
- 2,
408
- 1,
409
- 2,
410
- 3,
411
- 3,
412
- 2,
413
- 1,
414
- 3,
415
- 1,
416
- 1,
417
- 3,
418
- 1,
419
- 3,
420
- 2,
421
- 1,
422
- 1,
423
- 1,
424
- 2,
425
- 2,
426
- 50,
427
- 50,
428
- 50,
429
- 50,
430
- 2,
431
- 1,
432
- 1,
433
- 1,
434
- 1,
435
- 1,
436
- 1,
437
- 1,
438
- 1,
439
- 1,
440
- 1,
441
- 1,
442
- 2,
443
- 2,
444
- 2,
445
- 2,
446
- 77,
447
- 2,
448
- 1,
449
- 3,
450
- 2,
451
- 2,
452
- 1,
453
- 1,
454
- 2,
455
- 2,
456
- 2,
457
- 2,
458
- 2,
459
- 2,
460
- 2,
461
- 2,
462
- 2,
463
- 3,
464
- 18,
465
- 13,
466
- 2,
467
- 2,
468
- 2,
469
- 2,
470
- 2,
471
- 2,
472
- 4,
473
- 2,
474
- 24,
475
- 23,
476
- 67,
477
- 279,
478
- 3,
479
- 2,
480
- 2,
481
- 1,
482
- 2,
483
- 2,
484
- 3,
485
- 1,
486
- 2,
487
- 3,
488
- 2,
489
- 3,
490
- 3,
491
- 2,
492
- 2,
493
- 4,
494
- 1,
495
- 17,
496
- 3,
497
- 2,
498
- 3,
499
- 2,
500
- 3,
501
- 3,
502
- 2,
503
- 1,
504
- 1,
505
- 3,
506
- 2,
507
- 2,
508
- 3,
509
- 3,
510
- 3,
511
- 1,
512
- 1
513
- ],
514
- "hidden_act": "gelu",
515
- "hidden_dropout_prob": 0.1,
516
- "hidden_size": 768,
517
  "id2label": {
518
  "0": "incident",
519
  "1": "legislation",
@@ -521,541 +15,23 @@
521
  "3": "unrelated"
522
  },
523
  "initializer_range": 0.02,
524
- "intermediate_size": 3072,
525
  "label2id": {
526
  "incident": 0,
527
  "legislation": 1,
528
  "penalty": 2,
529
  "unrelated": 3
530
  },
531
- "layer_norm_eps": 1e-07,
532
  "max_position_embeddings": 512,
533
- "max_relative_positions": -1,
534
- "model_type": "deberta-v2",
535
- "norm_rel_ebd": "layer_norm",
536
- "num_attention_heads": 12,
537
- "num_hidden_layers": 12,
538
  "pad_token_id": 0,
539
- "pooler_dropout": 0,
540
- "pooler_hidden_act": "gelu",
541
- "pooler_hidden_size": 768,
542
- "pos_att_type": [
543
- "p2c",
544
- "c2p"
545
- ],
546
- "position_biased_input": false,
547
- "position_buckets": 256,
548
- "relative_attention": true,
549
- "share_att_key": true,
550
- "tasks": [
551
- "glue/mnli",
552
- "glue/qnli",
553
- "glue/rte",
554
- "glue/wnli",
555
- "glue/mrpc",
556
- "glue/qqp",
557
- "glue/stsb",
558
- "super_glue/boolq",
559
- "super_glue/cb",
560
- "super_glue/multirc",
561
- "super_glue/wic",
562
- "super_glue/axg",
563
- "anli/a1",
564
- "anli/a2",
565
- "anli/a3",
566
- "sick/label",
567
- "sick/relatedness",
568
- "sick/entailment_AB",
569
- "snli",
570
- "scitail/snli_format",
571
- "hans",
572
- "WANLI",
573
- "recast/recast_verbnet",
574
- "recast/recast_kg_relations",
575
- "recast/recast_ner",
576
- "recast/recast_factuality",
577
- "recast/recast_puns",
578
- "recast/recast_megaveridicality",
579
- "recast/recast_sentiment",
580
- "recast/recast_verbcorner",
581
- "probability_words_nli/usnli",
582
- "probability_words_nli/reasoning_1hop",
583
- "probability_words_nli/reasoning_2hop",
584
- "nan-nli/joey234--nan-nli",
585
- "nli_fever",
586
- "breaking_nli",
587
- "conj_nli",
588
- "fracas",
589
- "dialogue_nli",
590
- "mpe",
591
- "dnc",
592
- "recast_white/fnplus",
593
- "recast_white/sprl",
594
- "recast_white/dpr",
595
- "joci",
596
- "robust_nli/IS_CS",
597
- "robust_nli/LI_LI",
598
- "robust_nli/ST_WO",
599
- "robust_nli/PI_SP",
600
- "robust_nli/PI_CD",
601
- "robust_nli/ST_SE",
602
- "robust_nli/ST_NE",
603
- "robust_nli/ST_LM",
604
- "robust_nli_is_sd",
605
- "robust_nli_li_ts",
606
- "add_one_rte",
607
- "imppres/implicature_numerals_10_100/log",
608
- "imppres/implicature_connectives/log",
609
- "imppres/implicature_modals/log",
610
- "imppres/implicature_gradable_verb/log",
611
- "imppres/implicature_gradable_adjective/log",
612
- "imppres/implicature_numerals_2_3/log",
613
- "imppres/implicature_quantifiers/log",
614
- "glue_diagnostics/diagnostics",
615
- "hlgd",
616
- "paws/labeled_final",
617
- "paws/labeled_swap",
618
- "medical_questions_pairs",
619
- "conll2003/pos_tags",
620
- "conll2003/chunk_tags",
621
- "conll2003/ner_tags",
622
- "hh-rlhf",
623
- "model-written-evals",
624
- "truthful_qa/multiple_choice",
625
- "fig-qa",
626
- "bigbench/physical_intuition",
627
- "bigbench/authorship_verification",
628
- "bigbench/implicit_relations",
629
- "bigbench/dyck_languages",
630
- "bigbench/novel_concepts",
631
- "bigbench/moral_permissibility",
632
- "bigbench/metaphor_understanding",
633
- "bigbench/temporal_sequences",
634
- "bigbench/sports_understanding",
635
- "bigbench/analytic_entailment",
636
- "bigbench/social_support",
637
- "bigbench/emoji_movie",
638
- "bigbench/dark_humor_detection",
639
- "bigbench/suicide_risk",
640
- "bigbench/fact_checker",
641
- "bigbench/hhh_alignment",
642
- "bigbench/formal_fallacies_syllogisms_negation",
643
- "bigbench/bbq_lite_json",
644
- "bigbench/cause_and_effect",
645
- "bigbench/logic_grid_puzzle",
646
- "bigbench/empirical_judgments",
647
- "bigbench/human_organs_senses",
648
- "bigbench/misconceptions",
649
- "bigbench/strange_stories",
650
- "bigbench/logical_args",
651
- "bigbench/known_unknowns",
652
- "bigbench/cs_algorithms",
653
- "bigbench/emojis_emotion_prediction",
654
- "bigbench/cifar10_classification",
655
- "bigbench/penguins_in_a_table",
656
- "bigbench/odd_one_out",
657
- "bigbench/intent_recognition",
658
- "bigbench/physics",
659
- "bigbench/conceptual_combinations",
660
- "bigbench/logical_deduction",
661
- "bigbench/causal_judgment",
662
- "bigbench/winowhy",
663
- "bigbench/arithmetic",
664
- "bigbench/undo_permutation",
665
- "bigbench/analogical_similarity",
666
- "bigbench/social_iqa",
667
- "bigbench/key_value_maps",
668
- "bigbench/implicatures",
669
- "bigbench/real_or_fake_text",
670
- "bigbench/disambiguation_qa",
671
- "bigbench/similarities_abstraction",
672
- "bigbench/movie_dialog_same_or_different",
673
- "bigbench/english_proverbs",
674
- "bigbench/presuppositions_as_nli",
675
- "bigbench/entailed_polarity",
676
- "bigbench/snarks",
677
- "bigbench/goal_step_wikihow",
678
- "bigbench/crass_ai",
679
- "bigbench/play_dialog_same_or_different",
680
- "bigbench/hindu_knowledge",
681
- "bigbench/international_phonetic_alphabet_nli",
682
- "bigbench/understanding_fables",
683
- "bigbench/geometric_shapes",
684
- "bigbench/code_line_description",
685
- "bigbench/riddle_sense",
686
- "bigbench/symbol_interpretation",
687
- "bigbench/irony_identification",
688
- "bigbench/anachronisms",
689
- "bigbench/navigate",
690
- "bigbench/crash_blossom",
691
- "bigbench/identify_odd_metaphor",
692
- "bigbench/simple_ethical_questions",
693
- "bigbench/contextual_parametric_knowledge_conflicts",
694
- "bigbench/date_understanding",
695
- "bigbench/figure_of_speech_detection",
696
- "bigbench/question_selection",
697
- "bigbench/elementary_math_qa",
698
- "bigbench/nonsense_words_grammar",
699
- "bigbench/salient_translation_error_detection",
700
- "bigbench/epistemic_reasoning",
701
- "bigbench/movie_recommendation",
702
- "bigbench/strategyqa",
703
- "bigbench/tracking_shuffled_objects",
704
- "bigbench/unit_interpretation",
705
- "bigbench/reasoning_about_colored_objects",
706
- "bigbench/discourse_marker_prediction",
707
- "bigbench/logical_fallacy_detection",
708
- "bigbench/general_knowledge",
709
- "bigbench/abstract_narrative_understanding",
710
- "bigbench/color",
711
- "bigbench/hyperbaton",
712
- "bigbench/logical_sequence",
713
- "bigbench/mnist_ascii",
714
- "bigbench/fantasy_reasoning",
715
- "bigbench/mathematical_induction",
716
- "bigbench/timedial",
717
- "bigbench/identify_math_theorems",
718
- "bigbench/checkmate_in_one",
719
- "bigbench/phrase_relatedness",
720
- "bigbench/ruin_names",
721
- "bigbench/gre_reading_comprehension",
722
- "bigbench/metaphor_boolean",
723
- "bigbench/sentence_ambiguity",
724
- "bigbench/vitaminc_fact_verification",
725
- "bigbench/evaluating_information_essentiality",
726
- "cos_e/v1.0",
727
- "cosmos_qa",
728
- "dream",
729
- "openbookqa",
730
- "qasc",
731
- "quartz",
732
- "quail",
733
- "head_qa/en",
734
- "sciq",
735
- "social_i_qa",
736
- "wiki_hop/original",
737
- "wiqa",
738
- "piqa",
739
- "hellaswag",
740
- "super_glue/copa",
741
- "balanced-copa",
742
- "e-CARE",
743
- "art",
744
- "winogrande/winogrande_xl",
745
- "codah/codah",
746
- "ai2_arc/ARC-Challenge/challenge",
747
- "ai2_arc/ARC-Easy/challenge",
748
- "definite_pronoun_resolution",
749
- "swag/regular",
750
- "math_qa",
751
- "glue/cola",
752
- "glue/sst2",
753
- "utilitarianism",
754
- "amazon_counterfactual/en",
755
- "insincere-questions",
756
- "toxic_conversations",
757
- "TuringBench",
758
- "trec",
759
- "vitaminc/tals--vitaminc",
760
- "hope_edi/english",
761
- "rumoureval_2019/RumourEval2019",
762
- "ethos/binary",
763
- "ethos/multilabel",
764
- "tweet_eval/sentiment",
765
- "tweet_eval/irony",
766
- "tweet_eval/offensive",
767
- "tweet_eval/hate",
768
- "tweet_eval/emotion",
769
- "tweet_eval/emoji",
770
- "tweet_eval/stance_abortion",
771
- "tweet_eval/stance_atheism",
772
- "tweet_eval/stance_climate",
773
- "tweet_eval/stance_feminist",
774
- "tweet_eval/stance_hillary",
775
- "discovery/discovery",
776
- "pragmeval/squinky-informativeness",
777
- "pragmeval/emobank-arousal",
778
- "pragmeval/switchboard",
779
- "pragmeval/squinky-implicature",
780
- "pragmeval/emobank-valence",
781
- "pragmeval/mrda",
782
- "pragmeval/squinky-formality",
783
- "pragmeval/verifiability",
784
- "pragmeval/emobank-dominance",
785
- "pragmeval/persuasiveness-specificity",
786
- "pragmeval/persuasiveness-strength",
787
- "pragmeval/persuasiveness-claimtype",
788
- "pragmeval/pdtb",
789
- "pragmeval/sarcasm",
790
- "pragmeval/stac",
791
- "pragmeval/persuasiveness-premisetype",
792
- "pragmeval/persuasiveness-eloquence",
793
- "pragmeval/gum",
794
- "pragmeval/emergent",
795
- "pragmeval/persuasiveness-relevance",
796
- "silicone/dyda_da",
797
- "silicone/dyda_e",
798
- "silicone/maptask",
799
- "silicone/meld_e",
800
- "silicone/meld_s",
801
- "silicone/sem",
802
- "silicone/oasis",
803
- "silicone/iemocap",
804
- "lex_glue/eurlex",
805
- "lex_glue/scotus",
806
- "lex_glue/ledgar",
807
- "lex_glue/unfair_tos",
808
- "lex_glue/case_hold",
809
- "language-identification",
810
- "imdb",
811
- "rotten_tomatoes",
812
- "ag_news",
813
- "yelp_review_full/yelp_review_full",
814
- "financial_phrasebank/sentences_allagree",
815
- "poem_sentiment",
816
- "dbpedia_14/dbpedia_14",
817
- "amazon_polarity/amazon_polarity",
818
- "app_reviews",
819
- "hate_speech18",
820
- "sms_spam",
821
- "humicroedit/subtask-1",
822
- "humicroedit/subtask-2",
823
- "snips_built_in_intents",
824
- "hate_speech_offensive",
825
- "yahoo_answers_topics",
826
- "stackoverflow-questions",
827
- "hyperpartisan_news",
828
- "sciie",
829
- "citation_intent",
830
- "go_emotions/simplified",
831
- "scicite",
832
- "liar",
833
- "lexical_relation_classification/ROOT09",
834
- "lexical_relation_classification/BLESS",
835
- "lexical_relation_classification/CogALexV",
836
- "lexical_relation_classification/EVALution",
837
- "lexical_relation_classification/K&H+N",
838
- "linguisticprobing/coordination_inversion",
839
- "linguisticprobing/obj_number",
840
- "linguisticprobing/past_present",
841
- "linguisticprobing/sentence_length",
842
- "linguisticprobing/subj_number",
843
- "linguisticprobing/odd_man_out",
844
- "linguisticprobing/tree_depth",
845
- "linguisticprobing/top_constituents",
846
- "linguisticprobing/bigram_shift",
847
- "crowdflower/political-media-message",
848
- "crowdflower/political-media-audience",
849
- "crowdflower/economic-news",
850
- "crowdflower/text_emotion",
851
- "crowdflower/political-media-bias",
852
- "crowdflower/airline-sentiment",
853
- "crowdflower/tweet_global_warming",
854
- "crowdflower/corporate-messaging",
855
- "crowdflower/sentiment_nuclear_power",
856
- "ethics/commonsense",
857
- "ethics/deontology",
858
- "ethics/justice",
859
- "ethics/virtue",
860
- "emo/emo2019",
861
- "google_wellformed_query",
862
- "tweets_hate_speech_detection",
863
- "has_part",
864
- "wnut_17/wnut_17",
865
- "ncbi_disease/ncbi_disease",
866
- "acronym_identification",
867
- "jnlpba/jnlpba",
868
- "ontonotes_english/SpeedOfMagic--ontonotes_english",
869
- "blog_authorship_corpus/gender",
870
- "blog_authorship_corpus/age",
871
- "blog_authorship_corpus/job",
872
- "open_question_type",
873
- "health_fact",
874
- "commonsense_qa",
875
- "mc_taco",
876
- "ade_corpus_v2/Ade_corpus_v2_classification",
877
- "discosense",
878
- "circa",
879
- "phrase_similarity",
880
- "scientific-exaggeration-detection",
881
- "quarel",
882
- "fever-evidence-related/mwong--fever-related",
883
- "numer_sense",
884
- "dynasent/dynabench.dynasent.r1.all/r1",
885
- "dynasent/dynabench.dynasent.r2.all/r2",
886
- "Sarcasm_News_Headline",
887
- "sem_eval_2010_task_8",
888
- "auditor_review/demo-org--auditor_review",
889
- "medmcqa",
890
- "Dynasent_Disagreement",
891
- "Politeness_Disagreement",
892
- "SBIC_Disagreement",
893
- "SChem_Disagreement",
894
- "Dilemmas_Disagreement",
895
- "logiqa",
896
- "wiki_qa",
897
- "cycic_classification",
898
- "cycic_multiplechoice",
899
- "sts-companion",
900
- "commonsense_qa_2.0",
901
- "lingnli",
902
- "monotonicity-entailment",
903
- "arct",
904
- "scinli",
905
- "naturallogic",
906
- "onestop_qa",
907
- "moral_stories/full",
908
- "prost",
909
- "dynahate",
910
- "syntactic-augmentation-nli",
911
- "autotnli",
912
- "CONDAQA",
913
- "webgpt_comparisons",
914
- "synthetic-instruct-gptj-pairwise",
915
- "scruples",
916
- "wouldyourather",
917
- "attempto-nli",
918
- "defeasible-nli/snli",
919
- "defeasible-nli/atomic",
920
- "help-nli",
921
- "nli-veridicality-transitivity",
922
- "natural-language-satisfiability",
923
- "lonli",
924
- "dadc-limit-nli",
925
- "FLUTE",
926
- "strategy-qa",
927
- "summarize_from_feedback/comparisons",
928
- "folio",
929
- "tomi-nli",
930
- "avicenna",
931
- "SHP",
932
- "MedQA-USMLE-4-options-hf",
933
- "wikimedqa/medwiki",
934
- "cicero",
935
- "CREAK",
936
- "mutual",
937
- "NeQA",
938
- "quote-repetition",
939
- "redefine-math",
940
- "puzzte",
941
- "implicatures",
942
- "race/high",
943
- "race/middle",
944
- "race-c",
945
- "spartqa-yn",
946
- "spartqa-mchoice",
947
- "temporal-nli",
948
- "riddle_sense",
949
- "clcd-english",
950
- "twentyquestions",
951
- "reclor",
952
- "counterfactually-augmented-imdb",
953
- "counterfactually-augmented-snli",
954
- "cnli",
955
- "boolq-natural-perturbations",
956
- "acceptability-prediction",
957
- "equate",
958
- "ScienceQA_text_only",
959
- "ekar_english",
960
- "implicit-hate-stg1",
961
- "chaos-mnli-ambiguity",
962
- "headline_cause/en_simple",
963
- "logiqa-2.0-nli",
964
- "oasst1_dense_flat/quality",
965
- "oasst1_dense_flat/toxicity",
966
- "oasst1_dense_flat/helpfulness",
967
- "PARARULE-Plus",
968
- "mindgames",
969
- "universal_dependencies/en_lines/deprel",
970
- "universal_dependencies/en_partut/deprel",
971
- "universal_dependencies/en_ewt/deprel",
972
- "universal_dependencies/en_gum/deprel",
973
- "ambient",
974
- "path-naturalness-prediction",
975
- "civil_comments/toxicity",
976
- "civil_comments/severe_toxicity",
977
- "civil_comments/obscene",
978
- "civil_comments/threat",
979
- "civil_comments/insult",
980
- "civil_comments/identity_attack",
981
- "civil_comments/sexual_explicit",
982
- "cloth",
983
- "dgen",
984
- "oasst1_pairwise_rlhf_reward",
985
- "I2D2",
986
- "args_me",
987
- "Touche23-ValueEval",
988
- "starcon",
989
- "banking77",
990
- "ruletaker",
991
- "lsat_qa/all",
992
- "ConTRoL-nli",
993
- "tracie",
994
- "sherliic",
995
- "sen-making/1",
996
- "sen-making/2",
997
- "winowhy",
998
- "mbib-base/cognitive-bias",
999
- "mbib-base/fake-news",
1000
- "mbib-base/gender-bias",
1001
- "mbib-base/hate-speech",
1002
- "mbib-base/linguistic-bias",
1003
- "mbib-base/political-bias",
1004
- "mbib-base/racial-bias",
1005
- "mbib-base/text-level-bias",
1006
- "robustLR",
1007
- "v1/gen_train234_test2to10",
1008
- "logical-fallacy",
1009
- "parade",
1010
- "cladder",
1011
- "subjectivity",
1012
- "MOH",
1013
- "VUAC",
1014
- "TroFi",
1015
- "sharc_modified/mod",
1016
- "conceptrules_v2",
1017
- "disrpt/eng.dep.scidtb.rels",
1018
- "conll2000",
1019
- "few-nerd/supervised",
1020
- "finer-139",
1021
- "zero-shot-label-nli",
1022
- "com2sense",
1023
- "scone",
1024
- "winodict",
1025
- "fool-me-twice",
1026
- "monli",
1027
- "corr2cause",
1028
- "lsat_qa/all",
1029
- "apt",
1030
- "twitter-financial-news-sentiment",
1031
- "icl-symbol-tuning-instruct",
1032
- "SpaceNLI",
1033
- "propsegment/nli",
1034
- "HatemojiBuild",
1035
- "regset",
1036
- "esci",
1037
- "chatbot_arena_conversations",
1038
- "dnd_style_intents",
1039
- "FLD.v2",
1040
- "SDOH-NLI",
1041
- "scifact_entailment",
1042
- "feasibilityQA",
1043
- "simple_pair",
1044
- "AdjectiveScaleProbe-nli",
1045
- "resnli",
1046
- "SpaRTUN",
1047
- "ReSQ",
1048
- "semantic_fragments_nli",
1049
- "dataset_train_nli",
1050
- "babi_nli",
1051
- "gen_debiased_nli",
1052
- "imppres/presupposition",
1053
- "/prag",
1054
- "blimp-2",
1055
- "mmlu-4"
1056
- ],
1057
  "torch_dtype": "float32",
1058
  "transformers_version": "4.36.2",
1059
- "type_vocab_size": 0,
1060
- "vocab_size": 128100
1061
  }
 
1
  {
2
+ "_name_or_path": "model/distilbert-base-uncased/checkpoint-378",
3
+ "activation": "gelu",
4
  "architectures": [
5
+ "DistilBertForSequenceClassification"
6
  ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "id2label": {
12
  "0": "incident",
13
  "1": "legislation",
 
15
  "3": "unrelated"
16
  },
17
  "initializer_range": 0.02,
 
18
  "label2id": {
19
  "incident": 0,
20
  "legislation": 1,
21
  "penalty": 2,
22
  "unrelated": 3
23
  },
 
24
  "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
 
 
28
  "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  "torch_dtype": "float32",
35
  "transformers_version": "4.36.2",
36
+ "vocab_size": 30522
 
37
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ab2651b65ba4efc770cf148c9e9dc3782c2735d086a415e1058ff4754dc928
3
- size 737725432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bf062adde79591ca66a348eed1464fe5a3e814fd6e215a5cef2fe9e470ea2e6
3
+ size 267838720