Spaces:
Build error
Build error
PeteBleackley
commited on
Commit
·
b2593fa
1
Parent(s):
7cc6121
Making sure RoBERTa layers have all required arguments
Browse files
qarac/corpora/CombinedCorpus.py
CHANGED
|
@@ -185,7 +185,11 @@ class CombinedCorpus(keras.utils.Sequence):
|
|
| 185 |
maxlen = max((len(sample) for sample in batch))
|
| 186 |
for sample in batch:
|
| 187 |
sample.pad(maxlen,pad_id=self.pad_token)
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
|
| 191 |
|
|
|
|
| 185 |
maxlen = max((len(sample) for sample in batch))
|
| 186 |
for sample in batch:
|
| 187 |
sample.pad(maxlen,pad_id=self.pad_token)
|
| 188 |
+
input_ids = tensorflow.constant([sample.ids
|
| 189 |
+
for sample in batch])
|
| 190 |
+
attention_mask = tensorflow.constant(input_ids.numpy().apply(lambda x: 0.0 if x==self.pad_token
|
| 191 |
+
else 1.0))
|
| 192 |
+
return {'input_ids':input_ids,
|
| 193 |
+
'attention_mask':attention_mask}
|
| 194 |
|
| 195 |
|
qarac/models/QaracDecoderModel.py
CHANGED
|
@@ -49,7 +49,13 @@ class QaracDecoderHead(keras.layers.Layer):
|
|
| 49 |
"""
|
| 50 |
self.built = True
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
"""
|
| 54 |
Predicts text fron vector and hidden states of base model
|
| 55 |
|
|
@@ -64,9 +70,20 @@ class QaracDecoderHead(keras.layers.Layer):
|
|
| 64 |
Predicted text
|
| 65 |
|
| 66 |
"""
|
| 67 |
-
vectors = self.concat(
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_tf_utils.TFGenerationMixin):
|
| 72 |
|
|
@@ -114,7 +131,8 @@ class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_t
|
|
| 114 |
(v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
|
| 115 |
|
| 116 |
return self.decoder_head((tensorflow.expand_dims(v,1),
|
| 117 |
-
self.base_model(s)
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
|
|
|
|
| 49 |
"""
|
| 50 |
self.built = True
|
| 51 |
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def call(self,
|
| 56 |
+
vector,
|
| 57 |
+
hidden_states,
|
| 58 |
+
attention_mask=None,training=False):
|
| 59 |
"""
|
| 60 |
Predicts text fron vector and hidden states of base model
|
| 61 |
|
|
|
|
| 70 |
Predicted text
|
| 71 |
|
| 72 |
"""
|
| 73 |
+
vectors = self.concat(vector, hidden_states)
|
| 74 |
+
attentions = attention_mask if attention_mask is None else self.concat(tensorflow.ones((hidden_states.shape(0),
|
| 75 |
+
1)),
|
| 76 |
+
attention_mask)
|
| 77 |
+
l0 = self.layer_0(vectors,
|
| 78 |
+
attentions,
|
| 79 |
+
None,
|
| 80 |
+
False,
|
| 81 |
+
training)
|
| 82 |
+
return self.head(self.layer1(l0.last_hidden_state[:,1:],
|
| 83 |
+
attention_mask,
|
| 84 |
+
None,
|
| 85 |
+
False,
|
| 86 |
+
training))
|
| 87 |
|
| 88 |
class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_tf_utils.TFGenerationMixin):
|
| 89 |
|
|
|
|
| 131 |
(v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
|
| 132 |
|
| 133 |
return self.decoder_head((tensorflow.expand_dims(v,1),
|
| 134 |
+
self.base_model(s)),
|
| 135 |
+
training = kwargs.get('training',False))
|
| 136 |
|
| 137 |
|
| 138 |
|
qarac/models/QaracEncoderModel.py
CHANGED
|
@@ -45,7 +45,9 @@ class QaracEncoderModel(transformers.TFPreTrainedModel):
|
|
| 45 |
"""
|
| 46 |
self.built=True
|
| 47 |
|
| 48 |
-
def call(self,
|
|
|
|
|
|
|
| 49 |
"""
|
| 50 |
Vectorizes a tokenised text
|
| 51 |
|
|
@@ -61,7 +63,11 @@ class QaracEncoderModel(transformers.TFPreTrainedModel):
|
|
| 61 |
|
| 62 |
"""
|
| 63 |
|
| 64 |
-
return self.head(self.base_model(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
|
|
|
|
| 45 |
"""
|
| 46 |
self.built=True
|
| 47 |
|
| 48 |
+
def call(self,input_ids,
|
| 49 |
+
attention_mask=None,
|
| 50 |
+
training=False):
|
| 51 |
"""
|
| 52 |
Vectorizes a tokenised text
|
| 53 |
|
|
|
|
| 63 |
|
| 64 |
"""
|
| 65 |
|
| 66 |
+
return self.head(self.base_model(input_ids,
|
| 67 |
+
attention_mask,
|
| 68 |
+
training=training).last_hidden_state,
|
| 69 |
+
attention_mask,
|
| 70 |
+
training)
|
| 71 |
|
| 72 |
|
| 73 |
|
qarac/models/layers/GlobalAttentionPoolingHead.py
CHANGED
|
@@ -57,7 +57,7 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
|
|
| 57 |
self.local_projection,
|
| 58 |
axes=1)
|
| 59 |
|
| 60 |
-
def call(self,X,training=None):
|
| 61 |
"""
|
| 62 |
|
| 63 |
|
|
@@ -65,6 +65,8 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
|
|
| 65 |
----------
|
| 66 |
X : tensorflow.Tensor
|
| 67 |
Base model vectors to apply pooling to.
|
|
|
|
|
|
|
| 68 |
training : bool, optional
|
| 69 |
Not used. The default is None.
|
| 70 |
|
|
@@ -83,5 +85,7 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
|
|
| 83 |
X),
|
| 84 |
axis=2)
|
| 85 |
attention = tensorflow.vectorized_map(dot_prod,(lp,gp))
|
|
|
|
|
|
|
| 86 |
return tensorflow.vectorized_map(dot_prod,
|
| 87 |
-
(attention,X))
|
|
|
|
| 57 |
self.local_projection,
|
| 58 |
axes=1)
|
| 59 |
|
| 60 |
+
def call(self,X,attention_mask=None,training=None):
|
| 61 |
"""
|
| 62 |
|
| 63 |
|
|
|
|
| 65 |
----------
|
| 66 |
X : tensorflow.Tensor
|
| 67 |
Base model vectors to apply pooling to.
|
| 68 |
+
attention_mask: tensorflow.Tensor, optional
|
| 69 |
+
mask for pad values
|
| 70 |
training : bool, optional
|
| 71 |
Not used. The default is None.
|
| 72 |
|
|
|
|
| 85 |
X),
|
| 86 |
axis=2)
|
| 87 |
attention = tensorflow.vectorized_map(dot_prod,(lp,gp))
|
| 88 |
+
if attention_mask is None:
|
| 89 |
+
attention_mask = tensorflow.ones_like(attention)
|
| 90 |
return tensorflow.vectorized_map(dot_prod,
|
| 91 |
+
(attention * attention_mask,X))
|