Spaces:
Running
Running
jeanpoll
commited on
Commit
β’
1ec8eb5
1
Parent(s):
61e5adb
new version of signature model v35
Browse files- email_parser/_models_signatures.py +5 -2
- email_parser/config.ini +1 -1
- email_parser/models/model_signature_lstm_v16/minmax_scaler.p +0 -0
- email_parser/models/model_signature_lstm_v16/variables/variables.data-00000-of-00001 +0 -0
- email_parser/models/model_signature_lstm_v16/variables/variables.index +0 -0
- email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/keras_metadata.pb +2 -2
- email_parser/models/model_signature_lstm_v35/minmax_scaler.p +0 -0
- email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/saved_model.pb +2 -2
- email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/standard_scaler.p +0 -0
- email_parser/models/model_signature_lstm_v35/variables/variables.data-00000-of-00001 +0 -0
- email_parser/models/model_signature_lstm_v35/variables/variables.index +0 -0
- email_parser/nlp.py +3 -3
email_parser/_models_signatures.py
CHANGED
@@ -157,10 +157,13 @@ def generate_x_y(df, minmax_scaler=None, standard_scaler=None, n_last_lines_to_k
|
|
157 |
df, minmax_scaler, standard_scaler = f_scale_parameters(df, minmax_scaler, standard_scaler)
|
158 |
x = df[list_columns].to_numpy()[-n_last_lines_to_keep:, :]
|
159 |
x = np.expand_dims(x, axis=0)
|
|
|
|
|
160 |
y = df["is_signature"].to_numpy()[-n_last_lines_to_keep:]
|
161 |
y = np.expand_dims(y, axis=0)
|
162 |
-
|
163 |
-
|
|
|
164 |
|
165 |
def f_scale_parameters(df_tagged_data, minmax_scaler=None, standard_scaler=None):
|
166 |
# df_tagged_data = df_tagged_data.copy(deep=True)
|
|
|
157 |
df, minmax_scaler, standard_scaler = f_scale_parameters(df, minmax_scaler, standard_scaler)
|
158 |
x = df[list_columns].to_numpy()[-n_last_lines_to_keep:, :]
|
159 |
x = np.expand_dims(x, axis=0)
|
160 |
+
x = pad_sequences(x, dtype='float64', value=0, maxlen=n_last_lines_to_keep)
|
161 |
+
|
162 |
y = df["is_signature"].to_numpy()[-n_last_lines_to_keep:]
|
163 |
y = np.expand_dims(y, axis=0)
|
164 |
+
y_out = pad_sequences(y, value=0, maxlen=n_last_lines_to_keep)
|
165 |
+
y_mask = pad_sequences(y, value=-1, maxlen=n_last_lines_to_keep)
|
166 |
+
return x, y_out, y_mask, minmax_scaler, standard_scaler
|
167 |
|
168 |
def f_scale_parameters(df_tagged_data, minmax_scaler=None, standard_scaler=None):
|
169 |
# df_tagged_data = df_tagged_data.copy(deep=True)
|
email_parser/config.ini
CHANGED
@@ -3,5 +3,5 @@ ner_model_fr = Jean-Baptiste/camembert-ner-with-dates
|
|
3 |
ner_model_en = Jean-Baptiste/roberta-large-ner-english
|
4 |
device = -1
|
5 |
default_lang = en
|
6 |
-
name_model_signature =
|
7 |
path_models = models
|
|
|
3 |
ner_model_en = Jean-Baptiste/roberta-large-ner-english
|
4 |
device = -1
|
5 |
default_lang = en
|
6 |
+
name_model_signature = model_signature_lstm_v35
|
7 |
path_models = models
|
email_parser/models/model_signature_lstm_v16/minmax_scaler.p
DELETED
Binary file (1.16 kB)
|
|
email_parser/models/model_signature_lstm_v16/variables/variables.data-00000-of-00001
DELETED
Binary file (117 kB)
|
|
email_parser/models/model_signature_lstm_v16/variables/variables.index
DELETED
Binary file (3.51 kB)
|
|
email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/keras_metadata.pb
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:542eb34419276ac2cf6b8daa50f9d5f8ba25610844db4c38a2ce3986eb393819
|
3 |
+
size 22118
|
email_parser/models/model_signature_lstm_v35/minmax_scaler.p
ADDED
Binary file (1.16 kB). View file
|
|
email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/saved_model.pb
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c200a10c531b9a7062058f2eb8027d22b9260c652f6d1382322c3d7a73e6248
|
3 |
+
size 5294382
|
email_parser/models/{model_signature_lstm_v16 β model_signature_lstm_v35}/standard_scaler.p
RENAMED
Binary files a/email_parser/models/model_signature_lstm_v16/standard_scaler.p and b/email_parser/models/model_signature_lstm_v35/standard_scaler.p differ
|
|
email_parser/models/model_signature_lstm_v35/variables/variables.data-00000-of-00001
ADDED
Binary file (86 kB). View file
|
|
email_parser/models/model_signature_lstm_v35/variables/variables.index
ADDED
Binary file (3.49 kB). View file
|
|
email_parser/nlp.py
CHANGED
@@ -285,14 +285,14 @@ def f_detect_email_signature(text, df_ner=None, cut_off_score=0.6, lang=default_
|
|
285 |
# We add dummy value for signature in order to use same function than for training of the model
|
286 |
df_features["is_signature"] = -2
|
287 |
|
288 |
-
x, y_out, _, _ = _models_signatures.generate_x_y(df_features, _models_signatures.minmax_scaler,
|
289 |
_models_signatures.standard_scaler)
|
290 |
|
291 |
y_predict = _models_signatures.model.predict(x)
|
292 |
-
y_predict_value = (y_predict> cut_off_score).reshape([-1])
|
293 |
y_predict_value = np.pad(y_predict_value, (len(df_features) - len(y_predict_value), 0), constant_values=0)[
|
294 |
-len(df_features):]
|
295 |
-
y_predict_score = y_predict.reshape([-1])
|
296 |
y_predict_score = np.pad(y_predict_score, (len(df_features) - len(y_predict_score), 0), constant_values=1)[
|
297 |
-len(df_features):]
|
298 |
|
|
|
285 |
# We add dummy value for signature in order to use same function than for training of the model
|
286 |
df_features["is_signature"] = -2
|
287 |
|
288 |
+
x, y_out, y_mask, _, _ = _models_signatures.generate_x_y(df_features, _models_signatures.minmax_scaler,
|
289 |
_models_signatures.standard_scaler)
|
290 |
|
291 |
y_predict = _models_signatures.model.predict(x)
|
292 |
+
y_predict_value = (y_predict[y_mask != -1]> cut_off_score).reshape([-1])
|
293 |
y_predict_value = np.pad(y_predict_value, (len(df_features) - len(y_predict_value), 0), constant_values=0)[
|
294 |
-len(df_features):]
|
295 |
+
y_predict_score = y_predict[y_mask != -1].reshape([-1])
|
296 |
y_predict_score = np.pad(y_predict_score, (len(df_features) - len(y_predict_score), 0), constant_values=1)[
|
297 |
-len(df_features):]
|
298 |
|