jeanpoll commited on
Commit
1ec8eb5
β€’
1 Parent(s): 61e5adb

new version of signature model v35

Browse files
email_parser/_models_signatures.py CHANGED
@@ -157,10 +157,13 @@ def generate_x_y(df, minmax_scaler=None, standard_scaler=None, n_last_lines_to_k
157
  df, minmax_scaler, standard_scaler = f_scale_parameters(df, minmax_scaler, standard_scaler)
158
  x = df[list_columns].to_numpy()[-n_last_lines_to_keep:, :]
159
  x = np.expand_dims(x, axis=0)
 
 
160
  y = df["is_signature"].to_numpy()[-n_last_lines_to_keep:]
161
  y = np.expand_dims(y, axis=0)
162
- return x, y, minmax_scaler, standard_scaler
163
-
 
164
 
165
  def f_scale_parameters(df_tagged_data, minmax_scaler=None, standard_scaler=None):
166
  # df_tagged_data = df_tagged_data.copy(deep=True)
 
157
  df, minmax_scaler, standard_scaler = f_scale_parameters(df, minmax_scaler, standard_scaler)
158
  x = df[list_columns].to_numpy()[-n_last_lines_to_keep:, :]
159
  x = np.expand_dims(x, axis=0)
160
+ x = pad_sequences(x, dtype='float64', value=0, maxlen=n_last_lines_to_keep)
161
+
162
  y = df["is_signature"].to_numpy()[-n_last_lines_to_keep:]
163
  y = np.expand_dims(y, axis=0)
164
+ y_out = pad_sequences(y, value=0, maxlen=n_last_lines_to_keep)
165
+ y_mask = pad_sequences(y, value=-1, maxlen=n_last_lines_to_keep)
166
+ return x, y_out, y_mask, minmax_scaler, standard_scaler
167
 
168
  def f_scale_parameters(df_tagged_data, minmax_scaler=None, standard_scaler=None):
169
  # df_tagged_data = df_tagged_data.copy(deep=True)
email_parser/config.ini CHANGED
@@ -3,5 +3,5 @@ ner_model_fr = Jean-Baptiste/camembert-ner-with-dates
3
  ner_model_en = Jean-Baptiste/roberta-large-ner-english
4
  device = -1
5
  default_lang = en
6
- name_model_signature = model_signature_lstm_v16
7
  path_models = models
 
3
  ner_model_en = Jean-Baptiste/roberta-large-ner-english
4
  device = -1
5
  default_lang = en
6
+ name_model_signature = model_signature_lstm_v35
7
  path_models = models
email_parser/models/model_signature_lstm_v16/minmax_scaler.p DELETED
Binary file (1.16 kB)
 
email_parser/models/model_signature_lstm_v16/variables/variables.data-00000-of-00001 DELETED
Binary file (117 kB)
 
email_parser/models/model_signature_lstm_v16/variables/variables.index DELETED
Binary file (3.51 kB)
 
email_parser/models/{model_signature_lstm_v16 β†’ model_signature_lstm_v35}/keras_metadata.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73a9ab3d0262141747b5aafa348c57e1a5813e03cf1e5c291162b37cdf846143
3
- size 22142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542eb34419276ac2cf6b8daa50f9d5f8ba25610844db4c38a2ce3986eb393819
3
+ size 22118
email_parser/models/model_signature_lstm_v35/minmax_scaler.p ADDED
Binary file (1.16 kB). View file
 
email_parser/models/{model_signature_lstm_v16 β†’ model_signature_lstm_v35}/saved_model.pb RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70ff3069a27b192313bbdd9685f106fc46ef2082eab6479697f575bad555eb4f
3
- size 5296390
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c200a10c531b9a7062058f2eb8027d22b9260c652f6d1382322c3d7a73e6248
3
+ size 5294382
email_parser/models/{model_signature_lstm_v16 β†’ model_signature_lstm_v35}/standard_scaler.p RENAMED
Binary files a/email_parser/models/model_signature_lstm_v16/standard_scaler.p and b/email_parser/models/model_signature_lstm_v35/standard_scaler.p differ
 
email_parser/models/model_signature_lstm_v35/variables/variables.data-00000-of-00001 ADDED
Binary file (86 kB). View file
 
email_parser/models/model_signature_lstm_v35/variables/variables.index ADDED
Binary file (3.49 kB). View file
 
email_parser/nlp.py CHANGED
@@ -285,14 +285,14 @@ def f_detect_email_signature(text, df_ner=None, cut_off_score=0.6, lang=default_
285
  # We add dummy value for signature in order to use same function than for training of the model
286
  df_features["is_signature"] = -2
287
 
288
- x, y_out, _, _ = _models_signatures.generate_x_y(df_features, _models_signatures.minmax_scaler,
289
  _models_signatures.standard_scaler)
290
 
291
  y_predict = _models_signatures.model.predict(x)
292
- y_predict_value = (y_predict> cut_off_score).reshape([-1])
293
  y_predict_value = np.pad(y_predict_value, (len(df_features) - len(y_predict_value), 0), constant_values=0)[
294
  -len(df_features):]
295
- y_predict_score = y_predict.reshape([-1])
296
  y_predict_score = np.pad(y_predict_score, (len(df_features) - len(y_predict_score), 0), constant_values=1)[
297
  -len(df_features):]
298
 
 
285
  # We add dummy value for signature in order to use same function than for training of the model
286
  df_features["is_signature"] = -2
287
 
288
+ x, y_out, y_mask, _, _ = _models_signatures.generate_x_y(df_features, _models_signatures.minmax_scaler,
289
  _models_signatures.standard_scaler)
290
 
291
  y_predict = _models_signatures.model.predict(x)
292
+ y_predict_value = (y_predict[y_mask != -1]> cut_off_score).reshape([-1])
293
  y_predict_value = np.pad(y_predict_value, (len(df_features) - len(y_predict_value), 0), constant_values=0)[
294
  -len(df_features):]
295
+ y_predict_score = y_predict[y_mask != -1].reshape([-1])
296
  y_predict_score = np.pad(y_predict_score, (len(df_features) - len(y_predict_score), 0), constant_values=1)[
297
  -len(df_features):]
298