erikhenriksson commited on
Commit
c4a681d
·
verified ·
1 Parent(s): 20bf393

Upload folder using huggingface_hub

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
config.json CHANGED
@@ -4,6 +4,6 @@
4
  "AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
5
  "AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
6
  },
7
- "available_models": ["fi_NA-nb", "fi_HI-NA-nb-re", "fi_NA-nb-OP", "NA-nb-OP-rv"],
8
  "model_type": "sm_subgroup_classifier"
9
  }
 
4
  "AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
5
  "AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
6
  },
7
+ "available_models": ["fi_HI-NA-nb", "fi_HI-NA-nb-re", "fi_ID", "fi_ID-NA", "fi_NA-nb", "fi_NA-nb-OP", "fi_NA-nb-OP-rv"],
8
  "model_type": "sm_subgroup_classifier"
9
  }
fi_HI-NA-nb-re/metadata.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcebec77f65e7a4edab907a0680e37b2ee48f4c384a7ff2dee7d00dc88f60749
3
  size 76
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
3
  size 76
fi_HI-NA-nb-re/training_details.txt CHANGED
@@ -3,7 +3,7 @@ Training Details for fi_HI-NA-nb-re
3
 
4
  Language: fi
5
  Model Name: HI-NA-nb-re
6
- Training Date: 2025-09-18 16:15:20
7
 
8
  Data Summary:
9
  - Total samples: 4597
@@ -13,8 +13,12 @@ Data Summary:
13
 
14
  Classes:
15
  - Number of classes: 2
16
- - Class names: with_comments, general
17
- - Class distribution: {'with_comments': 111, 'general': 4486}
 
 
 
 
18
 
19
  Cross-Validation Results:
20
  - CV folds: 5
 
3
 
4
  Language: fi
5
  Model Name: HI-NA-nb-re
6
+ Training Date: 2025-09-19 09:24:43
7
 
8
  Data Summary:
9
  - Total samples: 4597
 
13
 
14
  Classes:
15
  - Number of classes: 2
16
+ - Class names: blog_comments, general
17
+ - Class distribution: {'blog_comments': 111, 'general': 4486}
18
+
19
+ Cluster Merging:
20
+ - 'blog_comments': kept cluster 1 (no merge)
21
+ - 'general': kept cluster 2 (no merge)
22
 
23
  Cross-Validation Results:
24
  - CV folds: 5
fi_HI-NA-nb/metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c569d94b6800991debcf3e841283806f74a4e5b1cda1f3eae6f37545aeb5b520
3
+ size 71
fi_HI-NA-nb/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a56971a10c0a58917cb4fdc08ad63020f3d3272561152f82163450a4a8d1aa
3
+ size 9055
fi_HI-NA-nb/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df08b921372adc99c83b8c2e724f492621a53fb05d0b75915a36f1e366c2c0e
3
+ size 25191
fi_HI-NA-nb/training_details.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training Details for fi_HI-NA-nb
2
+ ========================================
3
+
4
+ Language: fi
5
+ Model Name: HI-NA-nb
6
+ Training Date: 2025-09-19 09:29:41
7
+
8
+ Data Summary:
9
+ - Total samples: 934
10
+ - Training samples: 747
11
+ - Test samples: 187
12
+ - Embedding dimension: 1024
13
+
14
+ Classes:
15
+ - Number of classes: 2
16
+ - Class names: crafting, cooking
17
+ - Class distribution: {'crafting': 521, 'cooking': 413}
18
+
19
+ Cluster Merging:
20
+ - 'crafting': kept cluster 1 (no merge)
21
+ - 'cooking': kept cluster 2 (no merge)
22
+
23
+ Cross-Validation Results:
24
+ - CV folds: 5
25
+ - CV scores: [0.9466666666666667, 0.9266666666666666, 0.9261744966442953, 0.9463087248322147, 0.9328859060402684]
26
+ - CV mean: 0.9357
27
+ - CV std: 0.0091
28
+ - CV confidence interval: 0.9357 ± 0.0182
29
+
30
+ Final Performance:
31
+ - Test accuracy: 0.9519
32
+
33
+ Model Configuration:
34
+ - Algorithm: Logistic Regression
35
+ - Regularization (C): 1.0
36
+ - Feature scaling: StandardScaler
37
+ - Random state: 42
38
+
39
+ Files:
40
+ - Classifier: model.pkl
41
+ - Scaler: scaler.pkl
42
+ - Metadata: metadata.pkl
43
+ - This file: training_details.txt
fi_ID-NA/metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
3
+ size 76
fi_ID-NA/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fded060e6effa581ae765df79a6c32469fa6c8905f0ceee20781889a929d8d3f
3
+ size 9055
fi_ID-NA/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e65976e997250b10340a77d706fde49c9d2572b62cc6ae0d2fc3101dbf449a70
3
+ size 25191
fi_ID-NA/training_details.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training Details for fi_ID-NA
2
+ ========================================
3
+
4
+ Language: fi
5
+ Model Name: ID-NA
6
+ Training Date: 2025-09-19 09:31:09
7
+
8
+ Data Summary:
9
+ - Total samples: 1663
10
+ - Training samples: 1330
11
+ - Test samples: 333
12
+ - Embedding dimension: 1024
13
+
14
+ Classes:
15
+ - Number of classes: 2
16
+ - Class names: blog_comments, general
17
+ - Class distribution: {'blog_comments': 1317, 'general': 346}
18
+
19
+ Cluster Merging:
20
+ - 'blog_comments': kept cluster 1 (no merge)
21
+ - 'general': kept cluster 2 (no merge)
22
+
23
+ Cross-Validation Results:
24
+ - CV folds: 5
25
+ - CV scores: [0.9849624060150376, 0.9887218045112782, 0.9849624060150376, 0.9924812030075187, 0.981203007518797]
26
+ - CV mean: 0.9865
27
+ - CV std: 0.0038
28
+ - CV confidence interval: 0.9865 ± 0.0077
29
+
30
+ Final Performance:
31
+ - Test accuracy: 0.9940
32
+
33
+ Model Configuration:
34
+ - Algorithm: Logistic Regression
35
+ - Regularization (C): 1.0
36
+ - Feature scaling: StandardScaler
37
+ - Random state: 42
38
+
39
+ Files:
40
+ - Classifier: model.pkl
41
+ - Scaler: scaler.pkl
42
+ - Metadata: metadata.pkl
43
+ - This file: training_details.txt
fi_ID/metadata.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f5f93a5f73e8dcdfa726082f2f9cbd8797e638be4d4696e7d74bfa9840f7c4
3
+ size 76
fi_ID/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90508b6c172f9b2ca5359037f9ce6b2ebab5c4eb62ac743449221d969984dabf
3
+ size 9055
fi_ID/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51544ccb9a8845358700c1923159d45551d48cf3c2ecb0146be6c42ec6042425
3
+ size 25191
fi_ID/training_details.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Training Details for fi_ID
2
+ ========================================
3
+
4
+ Language: fi
5
+ Model Name: ID
6
+ Training Date: 2025-09-19 09:20:03
7
+
8
+ Data Summary:
9
+ - Total samples: 34973
10
+ - Training samples: 27978
11
+ - Test samples: 6995
12
+ - Embedding dimension: 1024
13
+
14
+ Classes:
15
+ - Number of classes: 2
16
+ - Class names: general, blog_comments
17
+ - Class distribution: {'general': 32370, 'blog_comments': 2603}
18
+
19
+ Cluster Merging:
20
+ - 'general': merged clusters [np.int64(0), np.int64(2), np.int64(3)]
21
+ - 'blog_comments': kept cluster 1 (no merge)
22
+
23
+ Cross-Validation Results:
24
+ - CV folds: 5
25
+ - CV scores: [0.9960686204431737, 0.9958899213724088, 0.9974982130092923, 0.9935656836461126, 0.9951742627345844]
26
+ - CV mean: 0.9956
27
+ - CV std: 0.0013
28
+ - CV confidence interval: 0.9956 ± 0.0026
29
+
30
+ Final Performance:
31
+ - Test accuracy: 0.9963
32
+
33
+ Model Configuration:
34
+ - Algorithm: Logistic Regression
35
+ - Regularization (C): 1.0
36
+ - Feature scaling: StandardScaler
37
+ - Random state: 42
38
+
39
+ Files:
40
+ - Classifier: model.pkl
41
+ - Scaler: scaler.pkl
42
+ - Metadata: metadata.pkl
43
+ - This file: training_details.txt
fi_NA-nb/metadata.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcebec77f65e7a4edab907a0680e37b2ee48f4c384a7ff2dee7d00dc88f60749
3
  size 76
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c92e39bf82f91b113e0ac07d03c2fb9d98bb25382748bae620549beb7ee9cd9
3
  size 76
fi_NA-nb/training_details.txt CHANGED
@@ -3,7 +3,7 @@ Training Details for fi_NA-nb
3
 
4
  Language: fi
5
  Model Name: NA-nb
6
- Training Date: 2025-09-18 16:12:26
7
 
8
  Data Summary:
9
  - Total samples: 218088
@@ -13,8 +13,12 @@ Data Summary:
13
 
14
  Classes:
15
  - Number of classes: 2
16
- - Class names: with_comments, general
17
- - Class distribution: {'with_comments': 24862, 'general': 193226}
 
 
 
 
18
 
19
  Cross-Validation Results:
20
  - CV folds: 5
 
3
 
4
  Language: fi
5
  Model Name: NA-nb
6
+ Training Date: 2025-09-19 09:35:23
7
 
8
  Data Summary:
9
  - Total samples: 218088
 
13
 
14
  Classes:
15
  - Number of classes: 2
16
+ - Class names: blog_comments, general
17
+ - Class distribution: {'blog_comments': 24862, 'general': 193226}
18
+
19
+ Cluster Merging:
20
+ - 'blog_comments': kept cluster 1 (no merge)
21
+ - 'general': kept cluster 2 (no merge)
22
 
23
  Cross-Validation Results:
24
  - CV folds: 5