ekolasky commited on
Commit
b0ebb4e
1 Parent(s): c6f6d7d

Upload model

Browse files
Files changed (3) hide show
  1. config.json +4 -1
  2. model.safetensors +1 -1
  3. modeling_CustomLEDForQA.py +13 -2
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "ekolasky/CustomLEDForQAonNQ",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -14,6 +14,9 @@
14
  1024,
15
  1024
16
  ],
 
 
 
17
  "bos_token_id": 0,
18
  "classif_dropout": 0.0,
19
  "classifier_dropout": 0.0,
 
1
  {
2
+ "_name_or_path": "allenai/led-base-16384",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
14
  1024,
15
  1024
16
  ],
17
+ "auto_map": {
18
+ "AutoModel": "modeling_CustomLEDForQA.CustomLEDForQAModel"
19
+ },
20
  "bos_token_id": 0,
21
  "classif_dropout": 0.0,
22
  "classifier_dropout": 0.0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29b6ebfc6c38c8764ab7440f03873a73b5f3eb75667bcd2e4a50df3d2b6c4e33
3
  size 417405656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7de0c365541e0d36d3fe80cb356e4da80915c6c57c12e9caff204628517f478a
3
  size 417405656
modeling_CustomLEDForQA.py CHANGED
@@ -29,12 +29,23 @@ class CustomLEDForQAModel(LEDPreTrainedModel):
29
  end_logits = end_logits.squeeze(-1).contiguous()
30
 
31
  total_loss = None
 
32
  if start_positions is not None and end_positions is not None:
33
 
34
  loss_fct = nn.CrossEntropyLoss()
35
- start_loss = loss_fct(start_logits, start_positions[0])
36
- end_loss = loss_fct(end_logits, end_positions[0])
 
 
 
 
 
 
 
 
37
  total_loss = (start_loss + end_loss) / 2
 
 
38
 
39
 
40
  return {
 
29
  end_logits = end_logits.squeeze(-1).contiguous()
30
 
31
  total_loss = None
32
+
33
  if start_positions is not None and end_positions is not None:
34
 
35
  loss_fct = nn.CrossEntropyLoss()
36
+
37
+ if len(start_positions.size()) > 1:
38
+ start_positions = start_positions.squeeze(-1)
39
+ if len(end_positions.size()) > 1:
40
+ end_positions = end_positions.squeeze(-1)
41
+
42
+ start_loss = loss_fct(start_logits, start_positions)
43
+ end_loss = loss_fct(end_logits, end_positions)
44
+ # start_loss = loss_fct(start_logits[index], start_positions[index][0])
45
+ # end_loss = loss_fct(end_logits[index], end_positions[index][0])
46
  total_loss = (start_loss + end_loss) / 2
47
+
48
+
49
 
50
 
51
  return {