Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- config.json +1 -1
- fi_HI-NA-nb-re/metadata.pkl +1 -1
- fi_HI-NA-nb-re/training_details.txt +7 -3
- fi_HI-NA-nb/metadata.pkl +3 -0
- fi_HI-NA-nb/model.pkl +3 -0
- fi_HI-NA-nb/scaler.pkl +3 -0
- fi_HI-NA-nb/training_details.txt +43 -0
- fi_ID-NA/metadata.pkl +3 -0
- fi_ID-NA/model.pkl +3 -0
- fi_ID-NA/scaler.pkl +3 -0
- fi_ID-NA/training_details.txt +43 -0
- fi_ID/metadata.pkl +3 -0
- fi_ID/model.pkl +3 -0
- fi_ID/scaler.pkl +3 -0
- fi_ID/training_details.txt +43 -0
- fi_NA-nb/metadata.pkl +1 -1
- fi_NA-nb/training_details.txt +7 -3
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
config.json
CHANGED
|
@@ -4,6 +4,6 @@
|
|
| 4 |
"AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
|
| 5 |
"AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
|
| 6 |
},
|
| 7 |
-
"available_models": ["
|
| 8 |
"model_type": "sm_subgroup_classifier"
|
| 9 |
}
|
|
|
|
| 4 |
"AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
|
| 5 |
"AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
|
| 6 |
},
|
| 7 |
+
"available_models": ["fi_HI-NA-nb", "fi_HI-NA-nb-re", "fi_ID", "fi_ID-NA", "fi_NA-nb", "fi_NA-nb-OP", "fi_NA-nb-OP-rv"],
|
| 8 |
"model_type": "sm_subgroup_classifier"
|
| 9 |
}
|
fi_HI-NA-nb-re/metadata.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 76
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
|
| 3 |
size 76
|
fi_HI-NA-nb-re/training_details.txt
CHANGED
|
@@ -3,7 +3,7 @@ Training Details for fi_HI-NA-nb-re
|
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Model Name: HI-NA-nb-re
|
| 6 |
-
Training Date: 2025-09-
|
| 7 |
|
| 8 |
Data Summary:
|
| 9 |
- Total samples: 4597
|
|
@@ -13,8 +13,12 @@ Data Summary:
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
- Number of classes: 2
|
| 16 |
-
- Class names:
|
| 17 |
-
- Class distribution: {'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
Cross-Validation Results:
|
| 20 |
- CV folds: 5
|
|
|
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Model Name: HI-NA-nb-re
|
| 6 |
+
Training Date: 2025-09-19 09:24:43
|
| 7 |
|
| 8 |
Data Summary:
|
| 9 |
- Total samples: 4597
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
- Number of classes: 2
|
| 16 |
+
- Class names: blog_comments, general
|
| 17 |
+
- Class distribution: {'blog_comments': 111, 'general': 4486}
|
| 18 |
+
|
| 19 |
+
Cluster Merging:
|
| 20 |
+
- 'blog_comments': kept cluster 1 (no merge)
|
| 21 |
+
- 'general': kept cluster 2 (no merge)
|
| 22 |
|
| 23 |
Cross-Validation Results:
|
| 24 |
- CV folds: 5
|
fi_HI-NA-nb/metadata.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c569d94b6800991debcf3e841283806f74a4e5b1cda1f3eae6f37545aeb5b520
|
| 3 |
+
size 71
|
fi_HI-NA-nb/model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a56971a10c0a58917cb4fdc08ad63020f3d3272561152f82163450a4a8d1aa
|
| 3 |
+
size 9055
|
fi_HI-NA-nb/scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2df08b921372adc99c83b8c2e724f492621a53fb05d0b75915a36f1e366c2c0e
|
| 3 |
+
size 25191
|
fi_HI-NA-nb/training_details.txt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training Details for fi_HI-NA-nb
|
| 2 |
+
========================================
|
| 3 |
+
|
| 4 |
+
Language: fi
|
| 5 |
+
Model Name: HI-NA-nb
|
| 6 |
+
Training Date: 2025-09-19 09:29:41
|
| 7 |
+
|
| 8 |
+
Data Summary:
|
| 9 |
+
- Total samples: 934
|
| 10 |
+
- Training samples: 747
|
| 11 |
+
- Test samples: 187
|
| 12 |
+
- Embedding dimension: 1024
|
| 13 |
+
|
| 14 |
+
Classes:
|
| 15 |
+
- Number of classes: 2
|
| 16 |
+
- Class names: crafting, cooking
|
| 17 |
+
- Class distribution: {'crafting': 521, 'cooking': 413}
|
| 18 |
+
|
| 19 |
+
Cluster Merging:
|
| 20 |
+
- 'crafting': kept cluster 1 (no merge)
|
| 21 |
+
- 'cooking': kept cluster 2 (no merge)
|
| 22 |
+
|
| 23 |
+
Cross-Validation Results:
|
| 24 |
+
- CV folds: 5
|
| 25 |
+
- CV scores: [0.9466666666666667, 0.9266666666666666, 0.9261744966442953, 0.9463087248322147, 0.9328859060402684]
|
| 26 |
+
- CV mean: 0.9357
|
| 27 |
+
- CV std: 0.0091
|
| 28 |
+
- CV confidence interval: 0.9357 ± 0.0182
|
| 29 |
+
|
| 30 |
+
Final Performance:
|
| 31 |
+
- Test accuracy: 0.9519
|
| 32 |
+
|
| 33 |
+
Model Configuration:
|
| 34 |
+
- Algorithm: Logistic Regression
|
| 35 |
+
- Regularization (C): 1.0
|
| 36 |
+
- Feature scaling: StandardScaler
|
| 37 |
+
- Random state: 42
|
| 38 |
+
|
| 39 |
+
Files:
|
| 40 |
+
- Classifier: model.pkl
|
| 41 |
+
- Scaler: scaler.pkl
|
| 42 |
+
- Metadata: metadata.pkl
|
| 43 |
+
- This file: training_details.txt
|
fi_ID-NA/metadata.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
|
| 3 |
+
size 76
|
fi_ID-NA/model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fded060e6effa581ae765df79a6c32469fa6c8905f0ceee20781889a929d8d3f
|
| 3 |
+
size 9055
|
fi_ID-NA/scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e65976e997250b10340a77d706fde49c9d2572b62cc6ae0d2fc3101dbf449a70
|
| 3 |
+
size 25191
|
fi_ID-NA/training_details.txt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training Details for fi_ID-NA
|
| 2 |
+
========================================
|
| 3 |
+
|
| 4 |
+
Language: fi
|
| 5 |
+
Model Name: ID-NA
|
| 6 |
+
Training Date: 2025-09-19 09:31:09
|
| 7 |
+
|
| 8 |
+
Data Summary:
|
| 9 |
+
- Total samples: 1663
|
| 10 |
+
- Training samples: 1330
|
| 11 |
+
- Test samples: 333
|
| 12 |
+
- Embedding dimension: 1024
|
| 13 |
+
|
| 14 |
+
Classes:
|
| 15 |
+
- Number of classes: 2
|
| 16 |
+
- Class names: blog_comments, general
|
| 17 |
+
- Class distribution: {'blog_comments': 1317, 'general': 346}
|
| 18 |
+
|
| 19 |
+
Cluster Merging:
|
| 20 |
+
- 'blog_comments': kept cluster 1 (no merge)
|
| 21 |
+
- 'general': kept cluster 2 (no merge)
|
| 22 |
+
|
| 23 |
+
Cross-Validation Results:
|
| 24 |
+
- CV folds: 5
|
| 25 |
+
- CV scores: [0.9849624060150376, 0.9887218045112782, 0.9849624060150376, 0.9924812030075187, 0.981203007518797]
|
| 26 |
+
- CV mean: 0.9865
|
| 27 |
+
- CV std: 0.0038
|
| 28 |
+
- CV confidence interval: 0.9865 ± 0.0077
|
| 29 |
+
|
| 30 |
+
Final Performance:
|
| 31 |
+
- Test accuracy: 0.9940
|
| 32 |
+
|
| 33 |
+
Model Configuration:
|
| 34 |
+
- Algorithm: Logistic Regression
|
| 35 |
+
- Regularization (C): 1.0
|
| 36 |
+
- Feature scaling: StandardScaler
|
| 37 |
+
- Random state: 42
|
| 38 |
+
|
| 39 |
+
Files:
|
| 40 |
+
- Classifier: model.pkl
|
| 41 |
+
- Scaler: scaler.pkl
|
| 42 |
+
- Metadata: metadata.pkl
|
| 43 |
+
- This file: training_details.txt
|
fi_ID/metadata.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25f5f93a5f73e8dcdfa726082f2f9cbd8797e638be4d4696e7d74bfa9840f7c4
|
| 3 |
+
size 76
|
fi_ID/model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90508b6c172f9b2ca5359037f9ce6b2ebab5c4eb62ac743449221d969984dabf
|
| 3 |
+
size 9055
|
fi_ID/scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51544ccb9a8845358700c1923159d45551d48cf3c2ecb0146be6c42ec6042425
|
| 3 |
+
size 25191
|
fi_ID/training_details.txt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Training Details for fi_ID
|
| 2 |
+
========================================
|
| 3 |
+
|
| 4 |
+
Language: fi
|
| 5 |
+
Model Name: ID
|
| 6 |
+
Training Date: 2025-09-19 09:20:03
|
| 7 |
+
|
| 8 |
+
Data Summary:
|
| 9 |
+
- Total samples: 34973
|
| 10 |
+
- Training samples: 27978
|
| 11 |
+
- Test samples: 6995
|
| 12 |
+
- Embedding dimension: 1024
|
| 13 |
+
|
| 14 |
+
Classes:
|
| 15 |
+
- Number of classes: 2
|
| 16 |
+
- Class names: general, blog_comments
|
| 17 |
+
- Class distribution: {'general': 32370, 'blog_comments': 2603}
|
| 18 |
+
|
| 19 |
+
Cluster Merging:
|
| 20 |
+
- 'general': merged clusters [np.int64(0), np.int64(2), np.int64(3)]
|
| 21 |
+
- 'blog_comments': kept cluster 1 (no merge)
|
| 22 |
+
|
| 23 |
+
Cross-Validation Results:
|
| 24 |
+
- CV folds: 5
|
| 25 |
+
- CV scores: [0.9960686204431737, 0.9958899213724088, 0.9974982130092923, 0.9935656836461126, 0.9951742627345844]
|
| 26 |
+
- CV mean: 0.9956
|
| 27 |
+
- CV std: 0.0013
|
| 28 |
+
- CV confidence interval: 0.9956 ± 0.0026
|
| 29 |
+
|
| 30 |
+
Final Performance:
|
| 31 |
+
- Test accuracy: 0.9963
|
| 32 |
+
|
| 33 |
+
Model Configuration:
|
| 34 |
+
- Algorithm: Logistic Regression
|
| 35 |
+
- Regularization (C): 1.0
|
| 36 |
+
- Feature scaling: StandardScaler
|
| 37 |
+
- Random state: 42
|
| 38 |
+
|
| 39 |
+
Files:
|
| 40 |
+
- Classifier: model.pkl
|
| 41 |
+
- Scaler: scaler.pkl
|
| 42 |
+
- Metadata: metadata.pkl
|
| 43 |
+
- This file: training_details.txt
|
fi_NA-nb/metadata.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 76
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
|
| 3 |
size 76
|
fi_NA-nb/training_details.txt
CHANGED
|
@@ -3,7 +3,7 @@ Training Details for fi_NA-nb
|
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Model Name: NA-nb
|
| 6 |
-
Training Date: 2025-09-
|
| 7 |
|
| 8 |
Data Summary:
|
| 9 |
- Total samples: 218088
|
|
@@ -13,8 +13,12 @@ Data Summary:
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
- Number of classes: 2
|
| 16 |
-
- Class names:
|
| 17 |
-
- Class distribution: {'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
Cross-Validation Results:
|
| 20 |
- CV folds: 5
|
|
|
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Model Name: NA-nb
|
| 6 |
+
Training Date: 2025-09-19 09:35:23
|
| 7 |
|
| 8 |
Data Summary:
|
| 9 |
- Total samples: 218088
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
- Number of classes: 2
|
| 16 |
+
- Class names: blog_comments, general
|
| 17 |
+
- Class distribution: {'blog_comments': 24862, 'general': 193226}
|
| 18 |
+
|
| 19 |
+
Cluster Merging:
|
| 20 |
+
- 'blog_comments': kept cluster 1 (no merge)
|
| 21 |
+
- 'general': kept cluster 2 (no merge)
|
| 22 |
|
| 23 |
Cross-Validation Results:
|
| 24 |
- CV folds: 5
|