add folders
Browse files- assets/classification_report_bilstm_longformer.csv +17 -0
- assets/classification_report_bilstm_phobertbase.csv +17 -0
- assets/classification_report_longformer.csv +17 -0
- assets/classification_report_phobertbase.csv +17 -0
- assets/model_results.csv +4 -0
- assets/study_bilstm_256_trials.csv +26 -0
- assets/study_bilstm_512_trials.csv +16 -0
- assets/summary_data.csv +5 -0
- hyperparameters/BiLSTM_phobertbase.json +1 -0
- hyperparameters/phobertbase.json +12 -0
- images/article_by_categories_test_data.html +0 -0
- images/article_by_categories_train_data.html +0 -0
- images/article_by_categories_val_data.html +0 -0
- images/bilstm_phobertbase_summary.png +0 -0
- images/combined_confusion_matrix.png +0 -0
- images/confusion_matrix_bilstm_phobertbase.png +0 -0
- images/confusion_matrix_longformer.png +0 -0
- images/confusion_matrix_phobertbase.png +0 -0
- images/logo.png +0 -0
- images/sample_data.png +0 -0
- images/study_bilstm_phobertbase_optimize_history.html +0 -0
- images/token_length_distribution.png +0 -0
assets/classification_report_bilstm_longformer.csv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,precision,recall,f1-score,support
|
2 |
+
Cong nghe,0.9030760301799188,0.9120750293083235,0.9075532225138525,1706.0
|
3 |
+
Doi song,0.8157736303431667,0.8108916816277678,0.8133253301320529,1671.0
|
4 |
+
Giai tri,0.8793349168646081,0.9095823095823096,0.8942028985507245,2035.0
|
5 |
+
Giao duc,0.8739711934156379,0.9105037513397642,0.8918635170603675,1866.0
|
6 |
+
Khoa hoc,0.8440285204991087,0.8867041198501873,0.8648401826484019,2136.0
|
7 |
+
Kinh te,0.8616517622304051,0.8064992614475628,0.8331637843336723,2031.0
|
8 |
+
Nha dat,0.8186528497409327,0.8956069910250354,0.8554026618542747,2117.0
|
9 |
+
Phap luat,0.846389228886169,0.8276481149012568,0.8369137670196671,1671.0
|
10 |
+
The gioi,0.9020516214427532,0.8996699669966997,0.9008592200925314,1515.0
|
11 |
+
The thao,0.9502673796791444,0.9689203925845147,0.9595032397408207,1834.0
|
12 |
+
Van hoa,0.7736784140969163,0.7758144671452236,0.7747449682933553,1811.0
|
13 |
+
Xa hoi,0.8380889183808892,0.6823338735818476,0.7522334723049435,1851.0
|
14 |
+
Xe co,0.9363768819815445,0.9418661455788959,0.939113492450073,2047.0
|
15 |
+
accuracy,0.8648470626981186,0.8648470626981186,0.8648470626981186,0.8648470626981186
|
16 |
+
macro avg,0.8648724113647075,0.863701238843799,0.8633630582303644,24291.0
|
17 |
+
weighted avg,0.8646388635237704,0.8648470626981186,0.8637948431758499,24291.0
|
assets/classification_report_bilstm_phobertbase.csv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,precision,recall,f1-score,support
|
2 |
+
Cong nghe,0.9163763066202091,0.9249706916764361,0.9206534422403735,1706.0
|
3 |
+
Doi song,0.8093645484949833,0.8689407540394973,0.8380952380952381,1671.0
|
4 |
+
Giai tri,0.9050632911392406,0.9135135135135135,0.9092687698703841,2035.0
|
5 |
+
Giao duc,0.9159528907922913,0.9169346195069668,0.9164434922335297,1866.0
|
6 |
+
Khoa hoc,0.8943798449612403,0.8642322097378277,0.879047619047619,2136.0
|
7 |
+
Kinh te,0.8685279187817259,0.8424421467257509,0.8552861784553861,2031.0
|
8 |
+
Nha dat,0.8609211126310989,0.891828058573453,0.8761020881670534,2117.0
|
9 |
+
Phap luat,0.8789769182782283,0.8432076600837821,0.8607208307880269,1671.0
|
10 |
+
The gioi,0.9101198402130493,0.9023102310231023,0.9061982101425257,1515.0
|
11 |
+
The thao,0.9642470205850487,0.9705561613958561,0.9673913043478262,1834.0
|
12 |
+
Van hoa,0.8184912081678957,0.7967973495306461,0.8074986010072748,1811.0
|
13 |
+
Xa hoi,0.8089275993467611,0.8028092922744462,0.8058568329718006,1851.0
|
14 |
+
Xe co,0.9436144578313252,0.9565217391304348,0.9500242600679282,2047.0
|
15 |
+
accuracy,0.884607467786423,0.884607467786423,0.884607467786423,0.884607467786423
|
16 |
+
macro avg,0.8842279198340844,0.8842357251701318,0.8840451436488436,24291.0
|
17 |
+
weighted avg,0.8847279223712978,0.884607467786423,0.8844859641551247,24291.0
|
assets/classification_report_longformer.csv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,precision,recall,f1-score,support
|
2 |
+
Cong nghe,0.9432234432234432,0.9426479560707749,0.9429356118400978,1639.0
|
3 |
+
Doi song,0.8915956151035322,0.8824593128390597,0.8870039382005452,1659.0
|
4 |
+
Giai tri,0.932446264073695,0.9314928425357873,0.9319693094629157,1956.0
|
5 |
+
Giao duc,0.9270563890325918,0.9451476793248945,0.9360146252285192,1896.0
|
6 |
+
Khoa hoc,0.9003306565895135,0.9054631828978622,0.9028896257697774,2105.0
|
7 |
+
Kinh te,0.9139344262295082,0.8762278978388998,0.8946840521564694,2036.0
|
8 |
+
Nha dat,0.9100138440239963,0.9219261337073399,0.9159312587087786,2139.0
|
9 |
+
Phap luat,0.9034883720930232,0.8946459412780656,0.8990454150997975,1737.0
|
10 |
+
The gioi,0.932762030323006,0.9358465608465608,0.9343017497523936,1512.0
|
11 |
+
The thao,0.9658314350797267,0.9826187717265353,0.974152785755313,1726.0
|
12 |
+
Van hoa,0.8727372462973121,0.8632664134563213,0.8679759956355702,1843.0
|
13 |
+
Xa hoi,0.8457655636567583,0.8543909348441926,0.8500563697857947,1765.0
|
14 |
+
Xe co,0.9698681732580038,0.9749171793658306,0.9723861222563134,2113.0
|
15 |
+
accuracy,0.9163972477824753,0.9163972477824753,0.9163972477824753,0.9163972477824753
|
16 |
+
macro avg,0.9160810353064701,0.9162346774409325,0.9161036045886374,24126.0
|
17 |
+
weighted avg,0.9162908586305778,0.9163972477824753,0.9162869113678563,24126.0
|
assets/classification_report_phobertbase.csv
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,precision,recall,f1-score,support
|
2 |
+
Cong nghe,0.9375382731169627,0.9341061622940817,0.9358190709046456,1639.0
|
3 |
+
Doi song,0.8717639975918121,0.8728149487643159,0.8722891566265061,1659.0
|
4 |
+
Giai tri,0.9223350253807107,0.9289366053169734,0.9256240448293429,1956.0
|
5 |
+
Giao duc,0.917312661498708,0.9361814345991561,0.9266510049595406,1896.0
|
6 |
+
Khoa hoc,0.9024390243902439,0.896437054631829,0.8994280266920878,2105.0
|
7 |
+
Kinh te,0.9054054054054054,0.8555992141453831,0.8797979797979797,2036.0
|
8 |
+
Nha dat,0.8869209809264306,0.9130434782608695,0.8997926744989634,2139.0
|
9 |
+
Phap luat,0.8925425719318849,0.8750719631548647,0.8837209302325582,1737.0
|
10 |
+
The gioi,0.9215686274509803,0.9325396825396826,0.9270216962524654,1512.0
|
11 |
+
The thao,0.9613196814562003,0.9791425260718424,0.9701492537313433,1726.0
|
12 |
+
Van hoa,0.8515667949422759,0.8404774823657081,0.8459858001092299,1843.0
|
13 |
+
Xa hoi,0.8269662921348314,0.8339943342776204,0.8304654442877291,1765.0
|
14 |
+
Xe co,0.9636792452830188,0.966871746332229,0.9652728561304039,2113.0
|
15 |
+
accuracy,0.9051231037055459,0.9051231037055459,0.9051231037055459,0.9051231037055459
|
16 |
+
macro avg,0.9047198908853434,0.9050166640580426,0.9047706106963689,24126.0
|
17 |
+
weighted avg,0.9050253122673806,0.9051231037055459,0.9049701515052252,24126.0
|
assets/model_results.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Precision,Recall,F1-Score,Accuracy,Training Time
|
2 |
+
BiLSTM+PhoBert,88.47%,88.46%,88.44%,88.46%,192.62 seconds + 8057.01 seconds (feature extraction)
|
3 |
+
Longformer-PhoBert,91.63%,91.64%,91.63%,91.64%,275155.18 seconds
|
4 |
+
PhoBert-base,90.50%,90.51%,90.50%,90.51%,130717.66 seconds
|
assets/study_bilstm_256_trials.csv
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
number,value,batch_size,dense_units,dropout_rate,epochs,learning_rate,lstm_units_1,lstm_units_2,state
|
2 |
+
0,0.8729982376098633,64,480,0.4,30,1.7091578189458266e-05,480,336,COMPLETE
|
3 |
+
1,0.878061830997467,96,224,0.30000000000000004,10,0.0005791380373255617,192,192,COMPLETE
|
4 |
+
2,0.880573034286499,64,512,0.5,20,0.0009283647052351477,160,112,COMPLETE
|
5 |
+
3,0.8770326375961304,224,320,0.2,30,8.731374870892545e-05,256,128,COMPLETE
|
6 |
+
4,0.8801202178001404,224,128,0.30000000000000004,20,0.0003112282856420484,320,160,COMPLETE
|
7 |
+
5,0.8666584491729736,128,384,0.4,10,3.7150636635286374e-05,320,192,COMPLETE
|
8 |
+
6,0.8568193912506104,160,224,0.30000000000000004,10,0.005812452943256737,96,48,COMPLETE
|
9 |
+
7,0.8629533648490906,160,256,0.30000000000000004,10,0.0033917930773413887,96,48,COMPLETE
|
10 |
+
12,0.8790087103843689,192,64,0.2,20,0.0016446580041057784,384,320,COMPLETE
|
11 |
+
13,0.8739039301872253,256,160,0.4,20,0.00014455421088706313,192,128,COMPLETE
|
12 |
+
16,0.8764151334762573,192,512,0.2,20,0.0023256035535991326,288,176,COMPLETE
|
13 |
+
17,0.8597010970115662,224,384,0.5,10,0.00010906114410156313,64,32,COMPLETE
|
14 |
+
18,0.8617183566093445,128,128,0.4,30,0.00714913476549205,160,112,COMPLETE
|
15 |
+
25,0.8676876425743103,160,96,0.30000000000000004,20,0.004438362069621676,384,320,COMPLETE
|
16 |
+
26,0.886130690574646,192,160,0.4,30,0.0006793443367131475,224,144,COMPLETE
|
17 |
+
36,0.8722572326660156,256,224,0.30000000000000004,10,0.0016868211535279705,128,96,COMPLETE
|
18 |
+
40,0.857519268989563,224,256,0.5,20,0.009310051845152361,192,128,COMPLETE
|
19 |
+
43,0.8744802474975586,96,192,0.30000000000000004,10,0.00020312787086169953,192,160,COMPLETE
|
20 |
+
55,0.8710633516311646,224,352,0.30000000000000004,30,2.1170451701105263e-05,352,240,COMPLETE
|
21 |
+
65,0.8641883730888367,192,96,0.30000000000000004,10,0.004046109560643685,160,112,COMPLETE
|
22 |
+
68,0.8569017052650452,256,512,0.30000000000000004,20,1.039561090533759e-05,224,144,COMPLETE
|
23 |
+
83,0.874068558216095,224,160,0.4,20,0.0001388571482883374,192,128,COMPLETE
|
24 |
+
86,0.8699518442153931,192,64,0.5,10,0.00028277011562710577,192,128,COMPLETE
|
25 |
+
91,0.8678111433982849,256,160,0.4,20,5.729950522594697e-05,192,128,COMPLETE
|
26 |
+
94,0.8728747367858887,160,192,0.4,20,9.20683037940821e-05,224,144,COMPLETE
|
assets/study_bilstm_512_trials.csv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
number,value,batch_size,dense_units,dropout_rate,epochs,learning_rate,lstm_units_1,lstm_units_2,state
|
2 |
+
0,0.865341067314148,96,256,0.5,30,0.000578497514716094,128,64,COMPLETE
|
3 |
+
1,0.8649293780326843,192,160,0.4,20,0.00011456781515208632,512,512,COMPLETE
|
4 |
+
2,0.8559548854827881,224,512,0.4,30,2.3318625484930674e-05,480,336,COMPLETE
|
5 |
+
4,0.8526203036308289,64,128,0.30000000000000004,30,1.5448155714819594e-05,256,160,COMPLETE
|
6 |
+
5,0.8334774374961853,128,320,0.5,10,0.009291957842882373,128,96,COMPLETE
|
7 |
+
8,0.8470627069473267,192,320,0.30000000000000004,10,5.3102245204562256e-05,128,128,COMPLETE
|
8 |
+
10,0.8587542772293091,128,448,0.2,30,0.0007533973844416604,64,32,COMPLETE
|
9 |
+
26,0.859742283821106,128,320,0.30000000000000004,30,0.0008787167048768327,64,32,COMPLETE
|
10 |
+
38,0.8452513217926025,192,224,0.4,20,2.335009856945039e-05,224,176,COMPLETE
|
11 |
+
40,0.8428224325180054,96,160,0.5,10,0.0037689674320787194,160,112,COMPLETE
|
12 |
+
61,0.8478037118911743,192,320,0.30000000000000004,10,5.440861671988972e-05,160,112,COMPLETE
|
13 |
+
64,0.8585072755813599,160,128,0.2,10,0.0002218345356715427,96,48,COMPLETE
|
14 |
+
66,0.8611419796943665,128,160,0.2,10,0.0007396031093644764,96,48,COMPLETE
|
15 |
+
71,0.8615948557853699,160,96,0.2,10,0.0007019623653114116,64,32,COMPLETE
|
16 |
+
88,0.8324070572853088,160,128,0.4,30,1.1489089899423732e-05,96,48,COMPLETE
|
assets/summary_data.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Dataset,Number of News
|
2 |
+
Train,113932
|
3 |
+
Test,24291
|
4 |
+
Validation,24126
|
5 |
+
Total,162349
|
hyperparameters/BiLSTM_phobertbase.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lstm_units_1": 224, "lstm_units_2": 144, "dense_units": 160, "dropout_rate": 0.4, "learning_rate": 0.0006793443367131475, "epochs": 30, "batch_size": 192}
|
hyperparameters/phobertbase.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"k_fold": 5,
|
3 |
+
"dropout_probability": 0.3,
|
4 |
+
"hidden_size": 768,
|
5 |
+
"learning_rate": 2e-05,
|
6 |
+
"batch_size": 16,
|
7 |
+
"num_epochs": 5,
|
8 |
+
"gradient_clipping": 1.0,
|
9 |
+
"type_learning_rate_scheduler": "linear_schedule_with_warmup",
|
10 |
+
"num_warmup_steps": 0,
|
11 |
+
"loss_function": "CrossEntropyLoss"
|
12 |
+
}
|
images/article_by_categories_test_data.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/article_by_categories_train_data.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/article_by_categories_val_data.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/bilstm_phobertbase_summary.png
ADDED
images/combined_confusion_matrix.png
ADDED
images/confusion_matrix_bilstm_phobertbase.png
ADDED
images/confusion_matrix_longformer.png
ADDED
images/confusion_matrix_phobertbase.png
ADDED
images/logo.png
ADDED
images/sample_data.png
ADDED
images/study_bilstm_phobertbase_optimize_history.html
ADDED
The diff for this file is too large to render.
See raw diff
|
|
images/token_length_distribution.png
ADDED