yangwang825 commited on
Commit
21708b7
1 Parent(s): 64a1592

Upload BertForSequenceClassification

Browse files
Files changed (3) hide show
  1. config.json +6 -1
  2. modeling_bert.py +79 -1
  3. pytorch_model.bin +1 -1
config.json CHANGED
@@ -1,8 +1,12 @@
1
  {
2
  "affine": true,
 
 
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "auto_map": {
5
- "AutoConfig": "configuration_bert.BertConfig"
 
6
  },
7
  "classifier_dropout": null,
8
  "hidden_act": "gelu",
@@ -17,6 +21,7 @@
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
 
20
  "transformers_version": "4.33.3",
21
  "type_vocab_size": 2,
22
  "use_cache": true,
 
1
  {
2
  "affine": true,
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_bert.BertConfig",
9
+ "AutoModelForSequenceClassification": "modeling_bert.BertForSequenceClassification"
10
  },
11
  "classifier_dropout": null,
12
  "hidden_act": "gelu",
 
21
  "num_hidden_layers": 12,
22
  "pad_token_id": 0,
23
  "position_embedding_type": "absolute",
24
+ "torch_dtype": "float32",
25
  "transformers_version": "4.33.3",
26
  "type_vocab_size": 2,
27
  "use_cache": true,
modeling_bert.py CHANGED
@@ -16,7 +16,8 @@ from transformers.models.bert.modeling_bert import (
16
  )
17
  from transformers.modeling_outputs import (
18
  BaseModelOutputWithPoolingAndCrossAttentions,
19
- SequenceClassifierOutput
 
20
  )
21
 
22
  from .configuration_bert import BertConfig
@@ -289,3 +290,80 @@ class BertForSequenceClassification(BertPreTrainedModel):
289
  hidden_states=outputs.hidden_states,
290
  attentions=outputs.attentions,
291
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
  from transformers.modeling_outputs import (
18
  BaseModelOutputWithPoolingAndCrossAttentions,
19
+ SequenceClassifierOutput,
20
+ MultipleChoiceModelOutput
21
  )
22
 
23
  from .configuration_bert import BertConfig
 
290
  hidden_states=outputs.hidden_states,
291
  attentions=outputs.attentions,
292
  )
293
+
294
+
295
+ class BertForMultipleChoice(BertPreTrainedModel):
296
+
297
+ def __init__(self, config):
298
+ super().__init__(config)
299
+
300
+ self.bert = BertModel(config)
301
+ classifier_dropout = (
302
+ config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
303
+ )
304
+ self.dropout = nn.Dropout(classifier_dropout)
305
+ self.classifier = nn.Linear(config.hidden_size, 1)
306
+
307
+ # Initialize weights and apply final processing
308
+ self.post_init()
309
+
310
+ def forward(
311
+ self,
312
+ input_ids: Optional[torch.Tensor] = None,
313
+ attention_mask: Optional[torch.Tensor] = None,
314
+ token_type_ids: Optional[torch.Tensor] = None,
315
+ position_ids: Optional[torch.Tensor] = None,
316
+ head_mask: Optional[torch.Tensor] = None,
317
+ inputs_embeds: Optional[torch.Tensor] = None,
318
+ labels: Optional[torch.Tensor] = None,
319
+ output_attentions: Optional[bool] = None,
320
+ output_hidden_states: Optional[bool] = None,
321
+ return_dict: Optional[bool] = None,
322
+ ) -> Union[Tuple[torch.Tensor], MultipleChoiceModelOutput]:
323
+
324
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
325
+ num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
326
+
327
+ input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
328
+ attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
329
+ token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
330
+ position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
331
+ inputs_embeds = (
332
+ inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
333
+ if inputs_embeds is not None
334
+ else None
335
+ )
336
+
337
+ outputs = self.bert(
338
+ input_ids,
339
+ attention_mask=attention_mask,
340
+ token_type_ids=token_type_ids,
341
+ position_ids=position_ids,
342
+ head_mask=head_mask,
343
+ inputs_embeds=inputs_embeds,
344
+ output_attentions=output_attentions,
345
+ output_hidden_states=output_hidden_states,
346
+ return_dict=return_dict,
347
+ )
348
+
349
+ pooled_output = outputs[1]
350
+
351
+ pooled_output = self.dropout(pooled_output)
352
+ logits = self.classifier(pooled_output)
353
+ reshaped_logits = logits.view(-1, num_choices)
354
+
355
+ loss = None
356
+ if labels is not None:
357
+ loss_fct = nn.CrossEntropyLoss()
358
+ loss = loss_fct(reshaped_logits, labels)
359
+
360
+ if not return_dict:
361
+ output = (reshaped_logits,) + outputs[2:]
362
+ return ((loss,) + output) if loss is not None else output
363
+
364
+ return MultipleChoiceModelOutput(
365
+ loss=loss,
366
+ logits=reshaped_logits,
367
+ hidden_states=outputs.hidden_states,
368
+ attentions=outputs.attentions,
369
+ )
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae9bcc7a4ae5b93f43cf78aa7dea754315ca54e073d4a6b4c780bc4be2dd2406
3
  size 438000689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad9ea182e61d0ef5b4b79090c0cd8d5dbef61c6247800102dd6a2df37bf0bc8
3
  size 438000689