jeffyelson commited on
Commit
f127158
1 Parent(s): 63c586a

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sjrhuschlee/flan-t5-base-mnli",
3
+ "architectures": [
4
+ "T5ForSequenceClassification"
5
+ ],
6
+ "auto_map": {
7
+ "AutoModelForSequenceClassification": "modeling_t5seq.T5ForSequenceClassification"
8
+ },
9
+ "classifier_dropout": 0.0,
10
+ "d_ff": 2048,
11
+ "d_kv": 64,
12
+ "d_model": 768,
13
+ "decoder_start_token_id": 0,
14
+ "dense_act_fn": "gelu_new",
15
+ "dropout_rate": 0.1,
16
+ "eos_token_id": 1,
17
+ "feed_forward_proj": "gated-gelu",
18
+ "finetuning_task": "mnli",
19
+ "id2label": {
20
+ "0": "entailment",
21
+ "1": "neutral",
22
+ "2": "contradiction"
23
+ },
24
+ "initializer_factor": 1.0,
25
+ "is_encoder_decoder": true,
26
+ "is_gated_act": true,
27
+ "label2id": {
28
+ "contradiction": 2,
29
+ "entailment": 0,
30
+ "neutral": 1
31
+ },
32
+ "layer_norm_epsilon": 1e-06,
33
+ "model_type": "t5",
34
+ "n_positions": 512,
35
+ "num_decoder_layers": 12,
36
+ "num_heads": 12,
37
+ "num_layers": 12,
38
+ "output_past": true,
39
+ "pad_token_id": 0,
40
+ "problem_type": "single_label_classification",
41
+ "relative_attention_max_distance": 128,
42
+ "relative_attention_num_buckets": 32,
43
+ "task_specific_params": {
44
+ "summarization": {
45
+ "early_stopping": true,
46
+ "length_penalty": 2.0,
47
+ "max_length": 200,
48
+ "min_length": 30,
49
+ "no_repeat_ngram_size": 3,
50
+ "num_beams": 4,
51
+ "prefix": "summarize: "
52
+ },
53
+ "translation_en_to_de": {
54
+ "early_stopping": true,
55
+ "max_length": 300,
56
+ "num_beams": 4,
57
+ "prefix": "translate English to German: "
58
+ },
59
+ "translation_en_to_fr": {
60
+ "early_stopping": true,
61
+ "max_length": 300,
62
+ "num_beams": 4,
63
+ "prefix": "translate English to French: "
64
+ },
65
+ "translation_en_to_ro": {
66
+ "early_stopping": true,
67
+ "max_length": 300,
68
+ "num_beams": 4,
69
+ "prefix": "translate English to Romanian: "
70
+ }
71
+ },
72
+ "tie_word_embeddings": false,
73
+ "torch_dtype": "float32",
74
+ "transformers_version": "4.18.0",
75
+ "use_cache": true,
76
+ "vocab_size": 32128
77
+ }
modeling_t5seq.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import warnings
3
+ from typing import List, Optional, Tuple, Union
4
+
5
+ import torch
6
+ from torch import nn
7
+ from torch.nn import CrossEntropyLoss, MSELoss, BCEWithLogitsLoss
8
+
9
+ from transformers import AutoModelForSequenceClassification
10
+ from transformers.modeling_outputs import Seq2SeqSequenceClassifierOutput
11
+ from transformers.models.t5.configuration_t5 import T5Config
12
+ from transformers.models.t5.modeling_t5 import T5PreTrainedModel, T5Model
13
+
14
+
15
+ class T5ClassificationHead(nn.Module):
16
+ """Head for sentence-level classification tasks."""
17
+
18
+ def __init__(self, config: T5Config):
19
+ super().__init__()
20
+ self.dense = nn.Linear(config.d_model, config.d_model)
21
+ self.dropout = nn.Dropout(p=config.classifier_dropout)
22
+ self.out_proj = nn.Linear(config.d_model, config.num_labels)
23
+
24
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
25
+ hidden_states = self.dropout(hidden_states)
26
+ hidden_states = self.dense(hidden_states)
27
+ hidden_states = torch.tanh(hidden_states)
28
+ hidden_states = self.dropout(hidden_states)
29
+ hidden_states = self.out_proj(hidden_states)
30
+ return hidden_states
31
+
32
+
33
+ class T5ForSequenceClassification(T5PreTrainedModel):
34
+ _keys_to_ignore_on_load_unexpected = ["decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight"]
35
+ _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"]
36
+
37
+ def __init__(self, config: T5Config):
38
+ super().__init__(config)
39
+ self.transformer = T5Model(config)
40
+ self.classification_head = T5ClassificationHead(config)
41
+
42
+ # Initialize weights and apply final processing
43
+ self.post_init()
44
+
45
+ self.model_parallel = False
46
+
47
+ def forward(
48
+ self,
49
+ input_ids: torch.LongTensor = None,
50
+ attention_mask: Optional[torch.Tensor] = None,
51
+ decoder_input_ids: Optional[torch.LongTensor] = None,
52
+ decoder_attention_mask: Optional[torch.LongTensor] = None,
53
+ head_mask: Optional[torch.Tensor] = None,
54
+ decoder_head_mask: Optional[torch.Tensor] = None,
55
+ cross_attn_head_mask: Optional[torch.Tensor] = None,
56
+ encoder_outputs: Optional[List[torch.FloatTensor]] = None,
57
+ inputs_embeds: Optional[torch.FloatTensor] = None,
58
+ decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
59
+ labels: Optional[torch.LongTensor] = None,
60
+ use_cache: Optional[bool] = None,
61
+ output_attentions: Optional[bool] = None,
62
+ output_hidden_states: Optional[bool] = None,
63
+ return_dict: Optional[bool] = None,
64
+ ) -> Union[Tuple, Seq2SeqSequenceClassifierOutput]:
65
+ r"""
66
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
67
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
68
+ config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
69
+ Returns:
70
+ """
71
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
72
+ if labels is not None:
73
+ use_cache = False
74
+
75
+ if input_ids is None and inputs_embeds is not None:
76
+ raise NotImplementedError(
77
+ f"Passing input embeddings is currently not supported for {self.__class__.__name__}"
78
+ )
79
+
80
+ # Copied from models.bart.modeling_bart.BartModel.forward different to other models, T5 automatically creates
81
+ # decoder_input_ids from input_ids if no decoder_input_ids are provided
82
+ if decoder_input_ids is None and decoder_inputs_embeds is None:
83
+ if input_ids is None:
84
+ raise ValueError(
85
+ "If no `decoder_input_ids` or `decoder_inputs_embeds` are "
86
+ "passed, `input_ids` cannot be `None`. Please pass either "
87
+ "`input_ids` or `decoder_input_ids` or `decoder_inputs_embeds`."
88
+ )
89
+ decoder_input_ids = self._shift_right(input_ids)
90
+
91
+ outputs = self.transformer(
92
+ input_ids,
93
+ attention_mask=attention_mask,
94
+ decoder_input_ids=decoder_input_ids,
95
+ decoder_attention_mask=decoder_attention_mask,
96
+ head_mask=head_mask,
97
+ decoder_head_mask=decoder_head_mask,
98
+ cross_attn_head_mask=cross_attn_head_mask,
99
+ encoder_outputs=encoder_outputs,
100
+ inputs_embeds=inputs_embeds,
101
+ decoder_inputs_embeds=decoder_inputs_embeds,
102
+ use_cache=use_cache,
103
+ output_attentions=output_attentions,
104
+ output_hidden_states=output_hidden_states,
105
+ return_dict=return_dict,
106
+ )
107
+ sequence_output = outputs[0]
108
+
109
+ eos_mask = input_ids.eq(self.config.eos_token_id).to(sequence_output.device)
110
+
111
+ if len(torch.unique_consecutive(eos_mask.sum(1))) > 1:
112
+ raise ValueError("All examples must have the same number of <eos> tokens.")
113
+ batch_size, _, hidden_size = sequence_output.shape
114
+ sentence_representation = sequence_output[eos_mask, :].view(batch_size, -1, hidden_size)[:, -1, :]
115
+ logits = self.classification_head(sentence_representation)
116
+
117
+ loss = None
118
+ if labels is not None:
119
+ labels = labels.to(logits.device)
120
+ if self.config.problem_type is None:
121
+ if self.config.num_labels == 1:
122
+ self.config.problem_type = "regression"
123
+ elif self.config.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
124
+ self.config.problem_type = "single_label_classification"
125
+ else:
126
+ self.config.problem_type = "multi_label_classification"
127
+
128
+ if self.config.problem_type == "regression":
129
+ loss_fct = MSELoss()
130
+ if self.config.num_labels == 1:
131
+ loss = loss_fct(logits.squeeze(), labels.squeeze())
132
+ else:
133
+ loss = loss_fct(logits, labels)
134
+ elif self.config.problem_type == "single_label_classification":
135
+ loss_fct = CrossEntropyLoss()
136
+ loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))
137
+ elif self.config.problem_type == "multi_label_classification":
138
+ loss_fct = BCEWithLogitsLoss()
139
+ loss = loss_fct(logits, labels)
140
+ if not return_dict:
141
+ output = (logits,) + outputs[1:]
142
+ return ((loss,) + output) if loss is not None else output
143
+
144
+ return Seq2SeqSequenceClassifierOutput(
145
+ loss=loss,
146
+ logits=logits,
147
+ past_key_values=outputs.past_key_values,
148
+ decoder_hidden_states=outputs.decoder_hidden_states,
149
+ decoder_attentions=outputs.decoder_attentions,
150
+ cross_attentions=outputs.cross_attentions,
151
+ encoder_last_hidden_state=outputs.encoder_last_hidden_state,
152
+ encoder_hidden_states=outputs.encoder_hidden_states,
153
+ encoder_attentions=outputs.encoder_attentions,
154
+ )
155
+
156
+ try:
157
+ AutoModelForSequenceClassification.register(T5Config, T5ForSequenceClassification)
158
+ except ValueError:
159
+ pass
160
+
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6f4d3eab06e05a7db1c3c57ac083b43dbddeef668eb3c0b46bb288117840d9
3
+ size 894085319
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "clean_up_tokenization_spaces": true, "model_max_length": 512, "sp_model_kwargs": {}, "special_tokens_map_file": "/home/elson/.cache/huggingface/transformers/0bad91bda9de89f6f0d16584aa5fe3fa990c3157bbd1c63454ad327761ce678b.a6ade5be9ee4d179c3ae03f26ae924a8473ffd7fc4b15c73138dcc1527b00e62", "name_or_path": "sjrhuschlee/flan-t5-base-mnli", "tokenizer_class": "T5Tokenizer"}