Model V8 Release
#11
by
Tihsrah-CD
- opened
- README.md +34 -30
- config.json +43 -47
- label encoder.joblib → label_encoder.joblib +2 -2
- pytorch_model.bin +2 -2
README.md
CHANGED
@@ -66,7 +66,7 @@ predicted_label = torch.argmax(probabilities, dim=-1)
|
|
66 |
REPO_NAME = "daxa-ai/pebblo-classifier"
|
67 |
|
68 |
# Path to the label encoder file in the repository
|
69 |
-
LABEL_ENCODER_FILE = "
|
70 |
|
71 |
# Construct the URL to the label encoder file
|
72 |
url = hf_hub_url(REPO_NAME, filename=LABEL_ENCODER_FILE)
|
@@ -96,9 +96,9 @@ Here are the labels along with their respective counts in the dataset:
|
|
96 |
| BOARD_MEETING_AGREEMENT | 4,225 |
|
97 |
| CONSULTING_AGREEMENT | 2,965 |
|
98 |
| CUSTOMER_LIST_AGREEMENT | 9,000 |
|
99 |
-
| DISTRIBUTION_PARTNER_AGREEMENT |
|
100 |
| EMPLOYEE_AGREEMENT | 3,921 |
|
101 |
-
| ENTERPRISE_AGREEMENT |
|
102 |
| ENTERPRISE_LICENSE_AGREEMENT | 9,000 |
|
103 |
| EXECUTIVE_SEVERANCE_AGREEMENT | 9,000 |
|
104 |
| FINANCIAL_REPORT_AGREEMENT | 8,381 |
|
@@ -107,11 +107,11 @@ Here are the labels along with their respective counts in the dataset:
|
|
107 |
| LOAN_AND_SECURITY_AGREEMENT | 9,000 |
|
108 |
| MEDICAL_ADVICE | 2,359 |
|
109 |
| MERGER_AGREEMENT | 7,706 |
|
110 |
-
| NDA_AGREEMENT |
|
111 |
-
| NORMAL_TEXT |
|
112 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 9,000 |
|
113 |
| PRICE_LIST_AGREEMENT | 9,000 |
|
114 |
-
| SETTLEMENT_AGREEMENT |
|
115 |
| SEXUAL_HARRASSMENT | 8,321 |
|
116 |
|
117 |
|
@@ -141,7 +141,7 @@ Here are the labels along with their respective counts in the dataset:
|
|
141 |
| MEDICAL_ADVICE | 289 |
|
142 |
| MERGER_AGREEMENT | 7,079 |
|
143 |
| NDA_AGREEMENT | 1,452 |
|
144 |
-
| NORMAL_TEXT |
|
145 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 6,177 |
|
146 |
| PRICE_LIST_AGREEMENT | 5,453 |
|
147 |
| SETTLEMENT_AGREEMENT | 5,806 |
|
@@ -153,33 +153,37 @@ Here are the labels along with their respective counts in the dataset:
|
|
153 |
|
154 |
| Agreement Type | precision | recall | f1-score | support |
|
155 |
| ------------------------------------------- | --------- | ------ | -------- | ------- |
|
156 |
-
| BOARD_MEETING_AGREEMENT | 0.
|
157 |
-
| CONSULTING_AGREEMENT | 0.
|
158 |
-
| CUSTOMER_LIST_AGREEMENT | 0.
|
159 |
-
| DISTRIBUTION_PARTNER_AGREEMENT | 0.
|
160 |
-
| EMPLOYEE_AGREEMENT | 0.78 | 0.
|
161 |
-
| ENTERPRISE_AGREEMENT | 0.
|
162 |
-
| ENTERPRISE_LICENSE_AGREEMENT | 0.
|
163 |
-
|
|
164 |
-
| FINANCIAL_REPORT_AGREEMENT | 0.
|
165 |
-
| HARMFUL_ADVICE | 0.
|
166 |
-
| INTERNAL_PRODUCT_ROADMAP_AGREEMENT | 0.
|
167 |
-
| LOAN_AND_SECURITY_AGREEMENT | 0.
|
168 |
-
| MEDICAL_ADVICE | 0.
|
169 |
-
| MERGER_AGREEMENT | 0.
|
170 |
-
| NDA_AGREEMENT | 0.
|
171 |
-
| NORMAL_TEXT | 0.
|
172 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 0.95 | 0.99 | 0.97 | 6177 |
|
173 |
-
| PRICE_LIST_AGREEMENT | 0.
|
174 |
-
| SETTLEMENT_AGREEMENT | 0.
|
175 |
-
|
|
176 |
| | | | | |
|
177 |
-
| accuracy | | | 0.
|
178 |
-
| macro avg | 0.
|
179 |
-
| weighted avg | 0.
|
180 |
|
181 |
|
182 |
#### Results
|
183 |
|
184 |
-
The model
|
|
|
|
|
|
|
|
|
185 |
|
|
|
66 |
REPO_NAME = "daxa-ai/pebblo-classifier"
|
67 |
|
68 |
# Path to the label encoder file in the repository
|
69 |
+
LABEL_ENCODER_FILE = "label_encoder.joblib"
|
70 |
|
71 |
# Construct the URL to the label encoder file
|
72 |
url = hf_hub_url(REPO_NAME, filename=LABEL_ENCODER_FILE)
|
|
|
96 |
| BOARD_MEETING_AGREEMENT | 4,225 |
|
97 |
| CONSULTING_AGREEMENT | 2,965 |
|
98 |
| CUSTOMER_LIST_AGREEMENT | 9,000 |
|
99 |
+
| DISTRIBUTION_PARTNER_AGREEMENT | 5,162 |
|
100 |
| EMPLOYEE_AGREEMENT | 3,921 |
|
101 |
+
| ENTERPRISE_AGREEMENT | 4,217 |
|
102 |
| ENTERPRISE_LICENSE_AGREEMENT | 9,000 |
|
103 |
| EXECUTIVE_SEVERANCE_AGREEMENT | 9,000 |
|
104 |
| FINANCIAL_REPORT_AGREEMENT | 8,381 |
|
|
|
107 |
| LOAN_AND_SECURITY_AGREEMENT | 9,000 |
|
108 |
| MEDICAL_ADVICE | 2,359 |
|
109 |
| MERGER_AGREEMENT | 7,706 |
|
110 |
+
| NDA_AGREEMENT | 5,229 |
|
111 |
+
| NORMAL_TEXT | 9,000 |
|
112 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 9,000 |
|
113 |
| PRICE_LIST_AGREEMENT | 9,000 |
|
114 |
+
| SETTLEMENT_AGREEMENT | 3,754 |
|
115 |
| SEXUAL_HARRASSMENT | 8,321 |
|
116 |
|
117 |
|
|
|
141 |
| MEDICAL_ADVICE | 289 |
|
142 |
| MERGER_AGREEMENT | 7,079 |
|
143 |
| NDA_AGREEMENT | 1,452 |
|
144 |
+
| NORMAL_TEXT | 8,335 |
|
145 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 6,177 |
|
146 |
| PRICE_LIST_AGREEMENT | 5,453 |
|
147 |
| SETTLEMENT_AGREEMENT | 5,806 |
|
|
|
153 |
|
154 |
| Agreement Type | precision | recall | f1-score | support |
|
155 |
| ------------------------------------------- | --------- | ------ | -------- | ------- |
|
156 |
+
| BOARD_MEETING_AGREEMENT | 0.96 | 0.94 | 0.95 | 4335 |
|
157 |
+
| CONSULTING_AGREEMENT | 0.77 | 0.89 | 0.83 | 1533 |
|
158 |
+
| CUSTOMER_LIST_AGREEMENT | 0.84 | 0.87 | 0.85 | 4995 |
|
159 |
+
| DISTRIBUTION_PARTNER_AGREEMENT | 0.71 | 0.64 | 0.67 | 7231 |
|
160 |
+
| EMPLOYEE_AGREEMENT | 0.78 | 0.90 | 0.83 | 1433 |
|
161 |
+
| ENTERPRISE_AGREEMENT | 0.19 | 0.72 | 0.30 | 1616 |
|
162 |
+
| ENTERPRISE_LICENSE_AGREEMENT | 0.92 | 0.78 | 0.84 | 8574 |
|
163 |
+
| EXECUTIVE_SEVERANCE_AGREEMENT | 0.96 | 0.85 | 0.90 | 5177 |
|
164 |
+
| FINANCIAL_REPORT_AGREEMENT | 0.92 | 0.98 | 0.95 | 4264 |
|
165 |
+
| HARMFUL_ADVICE | 0.82 | 0.92 | 0.87 | 474 |
|
166 |
+
| INTERNAL_PRODUCT_ROADMAP_AGREEMENT | 0.94 | 0.97 | 0.96 | 4116 |
|
167 |
+
| LOAN_AND_SECURITY_AGREEMENT | 0.92 | 0.96 | 0.94 | 6354 |
|
168 |
+
| MEDICAL_ADVICE | 0.76 | 1.00 | 0.86 | 289 |
|
169 |
+
| MERGER_AGREEMENT | 0.90 | 0.55 | 0.68 | 7079 |
|
170 |
+
| NDA_AGREEMENT | 0.62 | 0.89 | 0.74 | 1452 |
|
171 |
+
| NORMAL_TEXT | 0.99 | 0.99 | 0.99 | 6049 |
|
172 |
| PATENT_APPLICATION_FILLINGS_AGREEMENT | 0.95 | 0.99 | 0.97 | 6177 |
|
173 |
+
| PRICE_LIST_AGREEMENT | 0.81 | 0.75 | 0.78 | 5453 |
|
174 |
+
| SETTLEMENT_AGREEMENT | 0.83 | 0.73 | 0.78 | 5806 |
|
175 |
+
| SEXUAL_HARRASSMENT | 0.98 | 0.93 | 0.96 | 4750 |
|
176 |
| | | | | |
|
177 |
+
| accuracy | | | 0.84 | 87157 |
|
178 |
+
| macro avg | 0.83 | 0.86 | 0.83 | 87157 |
|
179 |
+
| weighted avg | 0.87 | 0.84 | 0.85 | 87157 |
|
180 |
|
181 |
|
182 |
#### Results
|
183 |
|
184 |
+
The model’s performance is summarized by precision, recall, and f1-score metrics, which are detailed across all 20 labels in the dataset. Based on the test data evaluation results, the model achieved an accuracy of 0.8376, a precision of 0.8744, and a recall of 0.8376. The F1-score, which is the harmonic mean of precision and recall, stands at 0.8478.
|
185 |
+
|
186 |
+
The evaluation loss, which measures the discrepancy between the model’s predictions and the actual values, is 0.5616. Lower loss values indicate better model performance.
|
187 |
+
|
188 |
+
The model was able to process approximately 101.886 samples per second during the evaluation, which took a total runtime of 855.4327 seconds. The model performed approximately 0.796 evaluation steps per second.
|
189 |
|
config.json
CHANGED
@@ -9,54 +9,51 @@
|
|
9 |
"dropout": 0.1,
|
10 |
"hidden_dim": 3072,
|
11 |
"id2label": {
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
},
|
35 |
"initializer_range": 0.02,
|
36 |
"label2id": {
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
"LOAN_AND_SECURITY_AGREEMENT": 9
|
59 |
-
},
|
60 |
"max_position_embeddings": 512,
|
61 |
"model_type": "distilbert",
|
62 |
"n_heads": 12,
|
@@ -70,4 +67,3 @@
|
|
70 |
"transformers_version": "4.36.2",
|
71 |
"vocab_size": 30522
|
72 |
}
|
73 |
-
|
|
|
9 |
"dropout": 0.1,
|
10 |
"hidden_dim": 3072,
|
11 |
"id2label": {
|
12 |
+
|
13 |
+
"0": "BOARD_MEETING_AGREEMENT",
|
14 |
+
"1": "CONSULTING_AGREEMENT",
|
15 |
+
"2": "CUSTOMER_LIST_AGREEMENT",
|
16 |
+
"3": "DISTRIBUTION_PARTNER_AGREEMENT",
|
17 |
+
"4": "EMPLOYEE_AGREEMENT",
|
18 |
+
"5": "ENTERPRISE_AGREEMENT",
|
19 |
+
"6": "ENTERPRISE_LICENSE_AGREEMENT",
|
20 |
+
"7": "EXECUTIVE_SEVERANCE_AGREEMENT",
|
21 |
+
"8": "FINANCIAL_REPORT_AGREEMENT",
|
22 |
+
"9": "HARMFUL_ADVICE",
|
23 |
+
"10": "INTERNAL_PRODUCT_ROADMAP_AGREEMENT",
|
24 |
+
"11": "LOAN_AND_SECURITY_AGREEMENT",
|
25 |
+
"12": "MEDICAL_ADVICE",
|
26 |
+
"13": "MERGER_AGREEMENT",
|
27 |
+
"14": "NDA_AGREEMENT",
|
28 |
+
"15": "NORMAL_TEXT",
|
29 |
+
"16": "PATENT_APPLICATION_FILLINGS_AGREEMENT",
|
30 |
+
"17": "PRICE_LIST_AGREEMENT",
|
31 |
+
"18": "SETTLEMENT_AGREEMENT",
|
32 |
+
"19": "SEXUAL_HARRASSMENT"
|
33 |
+
},
|
|
|
34 |
"initializer_range": 0.02,
|
35 |
"label2id": {
|
36 |
+
"BOARD_MEETING_AGREEMENT": 0,
|
37 |
+
"CONSULTING_AGREEMENT": 1,
|
38 |
+
"INTERNAL_PRODUCT_ROADMAP_AGREEMENT": 10,
|
39 |
+
"LOAN_AND_SECURITY_AGREEMENT": 11,
|
40 |
+
"MEDICAL_ADVICE": 12,
|
41 |
+
"MERGER_AGREEMENT": 13,
|
42 |
+
"NDA_AGREEMENT": 14,
|
43 |
+
"NORMAL_TEXT": 15,
|
44 |
+
"PATENT_APPLICATION_FILLINGS_AGREEMENT": 16,
|
45 |
+
"PRICE_LIST_AGREEMENT": 17,
|
46 |
+
"SETTLEMENT_AGREEMENT": 18,
|
47 |
+
"SEXUAL_HARRASSMENT": 19,
|
48 |
+
"CUSTOMER_LIST_AGREEMENT": 2,
|
49 |
+
"DISTRIBUTION_PARTNER_AGREEMENT": 3,
|
50 |
+
"EMPLOYEE_AGREEMENT": 4,
|
51 |
+
"ENTERPRISE_AGREEMENT": 5,
|
52 |
+
"ENTERPRISE_LICENSE_AGREEMENT": 6,
|
53 |
+
"EXECUTIVE_SEVERANCE_AGREEMENT": 7,
|
54 |
+
"FINANCIAL_REPORT_AGREEMENT": 8,
|
55 |
+
"HARMFUL_ADVICE": 9
|
56 |
+
},
|
|
|
|
|
57 |
"max_position_embeddings": 512,
|
58 |
"model_type": "distilbert",
|
59 |
"n_heads": 12,
|
|
|
67 |
"transformers_version": "4.36.2",
|
68 |
"vocab_size": 30522
|
69 |
}
|
|
label encoder.joblib → label_encoder.joblib
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:679d6eddc6f1fc6f4f4f58df6d284bf455024e8273567b22557de19dfc8753bb
|
3 |
+
size 1099
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c110ff2add5c7adf6aadaa01e0e14ce8e140ede610307633fb7172e066fa42fc
|
3 |
+
size 268209725
|