hienntd commited on
Commit
1f62012
1 Parent(s): e6dacdb

add folders

Browse files
assets/classification_report_bilstm_longformer.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Cong nghe,0.9030760301799188,0.9120750293083235,0.9075532225138525,1706.0
3
+ Doi song,0.8157736303431667,0.8108916816277678,0.8133253301320529,1671.0
4
+ Giai tri,0.8793349168646081,0.9095823095823096,0.8942028985507245,2035.0
5
+ Giao duc,0.8739711934156379,0.9105037513397642,0.8918635170603675,1866.0
6
+ Khoa hoc,0.8440285204991087,0.8867041198501873,0.8648401826484019,2136.0
7
+ Kinh te,0.8616517622304051,0.8064992614475628,0.8331637843336723,2031.0
8
+ Nha dat,0.8186528497409327,0.8956069910250354,0.8554026618542747,2117.0
9
+ Phap luat,0.846389228886169,0.8276481149012568,0.8369137670196671,1671.0
10
+ The gioi,0.9020516214427532,0.8996699669966997,0.9008592200925314,1515.0
11
+ The thao,0.9502673796791444,0.9689203925845147,0.9595032397408207,1834.0
12
+ Van hoa,0.7736784140969163,0.7758144671452236,0.7747449682933553,1811.0
13
+ Xa hoi,0.8380889183808892,0.6823338735818476,0.7522334723049435,1851.0
14
+ Xe co,0.9363768819815445,0.9418661455788959,0.939113492450073,2047.0
15
+ accuracy,0.8648470626981186,0.8648470626981186,0.8648470626981186,0.8648470626981186
16
+ macro avg,0.8648724113647075,0.863701238843799,0.8633630582303644,24291.0
17
+ weighted avg,0.8646388635237704,0.8648470626981186,0.8637948431758499,24291.0
assets/classification_report_bilstm_phobertbase.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Cong nghe,0.9163763066202091,0.9249706916764361,0.9206534422403735,1706.0
3
+ Doi song,0.8093645484949833,0.8689407540394973,0.8380952380952381,1671.0
4
+ Giai tri,0.9050632911392406,0.9135135135135135,0.9092687698703841,2035.0
5
+ Giao duc,0.9159528907922913,0.9169346195069668,0.9164434922335297,1866.0
6
+ Khoa hoc,0.8943798449612403,0.8642322097378277,0.879047619047619,2136.0
7
+ Kinh te,0.8685279187817259,0.8424421467257509,0.8552861784553861,2031.0
8
+ Nha dat,0.8609211126310989,0.891828058573453,0.8761020881670534,2117.0
9
+ Phap luat,0.8789769182782283,0.8432076600837821,0.8607208307880269,1671.0
10
+ The gioi,0.9101198402130493,0.9023102310231023,0.9061982101425257,1515.0
11
+ The thao,0.9642470205850487,0.9705561613958561,0.9673913043478262,1834.0
12
+ Van hoa,0.8184912081678957,0.7967973495306461,0.8074986010072748,1811.0
13
+ Xa hoi,0.8089275993467611,0.8028092922744462,0.8058568329718006,1851.0
14
+ Xe co,0.9436144578313252,0.9565217391304348,0.9500242600679282,2047.0
15
+ accuracy,0.884607467786423,0.884607467786423,0.884607467786423,0.884607467786423
16
+ macro avg,0.8842279198340844,0.8842357251701318,0.8840451436488436,24291.0
17
+ weighted avg,0.8847279223712978,0.884607467786423,0.8844859641551247,24291.0
assets/classification_report_longformer.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Cong nghe,0.9432234432234432,0.9426479560707749,0.9429356118400978,1639.0
3
+ Doi song,0.8915956151035322,0.8824593128390597,0.8870039382005452,1659.0
4
+ Giai tri,0.932446264073695,0.9314928425357873,0.9319693094629157,1956.0
5
+ Giao duc,0.9270563890325918,0.9451476793248945,0.9360146252285192,1896.0
6
+ Khoa hoc,0.9003306565895135,0.9054631828978622,0.9028896257697774,2105.0
7
+ Kinh te,0.9139344262295082,0.8762278978388998,0.8946840521564694,2036.0
8
+ Nha dat,0.9100138440239963,0.9219261337073399,0.9159312587087786,2139.0
9
+ Phap luat,0.9034883720930232,0.8946459412780656,0.8990454150997975,1737.0
10
+ The gioi,0.932762030323006,0.9358465608465608,0.9343017497523936,1512.0
11
+ The thao,0.9658314350797267,0.9826187717265353,0.974152785755313,1726.0
12
+ Van hoa,0.8727372462973121,0.8632664134563213,0.8679759956355702,1843.0
13
+ Xa hoi,0.8457655636567583,0.8543909348441926,0.8500563697857947,1765.0
14
+ Xe co,0.9698681732580038,0.9749171793658306,0.9723861222563134,2113.0
15
+ accuracy,0.9163972477824753,0.9163972477824753,0.9163972477824753,0.9163972477824753
16
+ macro avg,0.9160810353064701,0.9162346774409325,0.9161036045886374,24126.0
17
+ weighted avg,0.9162908586305778,0.9163972477824753,0.9162869113678563,24126.0
assets/classification_report_phobertbase.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,precision,recall,f1-score,support
2
+ Cong nghe,0.9375382731169627,0.9341061622940817,0.9358190709046456,1639.0
3
+ Doi song,0.8717639975918121,0.8728149487643159,0.8722891566265061,1659.0
4
+ Giai tri,0.9223350253807107,0.9289366053169734,0.9256240448293429,1956.0
5
+ Giao duc,0.917312661498708,0.9361814345991561,0.9266510049595406,1896.0
6
+ Khoa hoc,0.9024390243902439,0.896437054631829,0.8994280266920878,2105.0
7
+ Kinh te,0.9054054054054054,0.8555992141453831,0.8797979797979797,2036.0
8
+ Nha dat,0.8869209809264306,0.9130434782608695,0.8997926744989634,2139.0
9
+ Phap luat,0.8925425719318849,0.8750719631548647,0.8837209302325582,1737.0
10
+ The gioi,0.9215686274509803,0.9325396825396826,0.9270216962524654,1512.0
11
+ The thao,0.9613196814562003,0.9791425260718424,0.9701492537313433,1726.0
12
+ Van hoa,0.8515667949422759,0.8404774823657081,0.8459858001092299,1843.0
13
+ Xa hoi,0.8269662921348314,0.8339943342776204,0.8304654442877291,1765.0
14
+ Xe co,0.9636792452830188,0.966871746332229,0.9652728561304039,2113.0
15
+ accuracy,0.9051231037055459,0.9051231037055459,0.9051231037055459,0.9051231037055459
16
+ macro avg,0.9047198908853434,0.9050166640580426,0.9047706106963689,24126.0
17
+ weighted avg,0.9050253122673806,0.9051231037055459,0.9049701515052252,24126.0
assets/model_results.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Model,Precision,Recall,F1-Score,Accuracy,Training Time
2
+ BiLSTM+PhoBert,88.47%,88.46%,88.44%,88.46%,192.62 seconds + 8057.01 seconds (feature extraction)
3
+ Longformer-PhoBert,91.63%,91.64%,91.63%,91.64%,275155.18 seconds
4
+ PhoBert-base,90.50%,90.51%,90.50%,90.51%,130717.66 seconds
assets/study_bilstm_256_trials.csv ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ number,value,batch_size,dense_units,dropout_rate,epochs,learning_rate,lstm_units_1,lstm_units_2,state
2
+ 0,0.8729982376098633,64,480,0.4,30,1.7091578189458266e-05,480,336,COMPLETE
3
+ 1,0.878061830997467,96,224,0.30000000000000004,10,0.0005791380373255617,192,192,COMPLETE
4
+ 2,0.880573034286499,64,512,0.5,20,0.0009283647052351477,160,112,COMPLETE
5
+ 3,0.8770326375961304,224,320,0.2,30,8.731374870892545e-05,256,128,COMPLETE
6
+ 4,0.8801202178001404,224,128,0.30000000000000004,20,0.0003112282856420484,320,160,COMPLETE
7
+ 5,0.8666584491729736,128,384,0.4,10,3.7150636635286374e-05,320,192,COMPLETE
8
+ 6,0.8568193912506104,160,224,0.30000000000000004,10,0.005812452943256737,96,48,COMPLETE
9
+ 7,0.8629533648490906,160,256,0.30000000000000004,10,0.0033917930773413887,96,48,COMPLETE
10
+ 12,0.8790087103843689,192,64,0.2,20,0.0016446580041057784,384,320,COMPLETE
11
+ 13,0.8739039301872253,256,160,0.4,20,0.00014455421088706313,192,128,COMPLETE
12
+ 16,0.8764151334762573,192,512,0.2,20,0.0023256035535991326,288,176,COMPLETE
13
+ 17,0.8597010970115662,224,384,0.5,10,0.00010906114410156313,64,32,COMPLETE
14
+ 18,0.8617183566093445,128,128,0.4,30,0.00714913476549205,160,112,COMPLETE
15
+ 25,0.8676876425743103,160,96,0.30000000000000004,20,0.004438362069621676,384,320,COMPLETE
16
+ 26,0.886130690574646,192,160,0.4,30,0.0006793443367131475,224,144,COMPLETE
17
+ 36,0.8722572326660156,256,224,0.30000000000000004,10,0.0016868211535279705,128,96,COMPLETE
18
+ 40,0.857519268989563,224,256,0.5,20,0.009310051845152361,192,128,COMPLETE
19
+ 43,0.8744802474975586,96,192,0.30000000000000004,10,0.00020312787086169953,192,160,COMPLETE
20
+ 55,0.8710633516311646,224,352,0.30000000000000004,30,2.1170451701105263e-05,352,240,COMPLETE
21
+ 65,0.8641883730888367,192,96,0.30000000000000004,10,0.004046109560643685,160,112,COMPLETE
22
+ 68,0.8569017052650452,256,512,0.30000000000000004,20,1.039561090533759e-05,224,144,COMPLETE
23
+ 83,0.874068558216095,224,160,0.4,20,0.0001388571482883374,192,128,COMPLETE
24
+ 86,0.8699518442153931,192,64,0.5,10,0.00028277011562710577,192,128,COMPLETE
25
+ 91,0.8678111433982849,256,160,0.4,20,5.729950522594697e-05,192,128,COMPLETE
26
+ 94,0.8728747367858887,160,192,0.4,20,9.20683037940821e-05,224,144,COMPLETE
assets/study_bilstm_512_trials.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ number,value,batch_size,dense_units,dropout_rate,epochs,learning_rate,lstm_units_1,lstm_units_2,state
2
+ 0,0.865341067314148,96,256,0.5,30,0.000578497514716094,128,64,COMPLETE
3
+ 1,0.8649293780326843,192,160,0.4,20,0.00011456781515208632,512,512,COMPLETE
4
+ 2,0.8559548854827881,224,512,0.4,30,2.3318625484930674e-05,480,336,COMPLETE
5
+ 4,0.8526203036308289,64,128,0.30000000000000004,30,1.5448155714819594e-05,256,160,COMPLETE
6
+ 5,0.8334774374961853,128,320,0.5,10,0.009291957842882373,128,96,COMPLETE
7
+ 8,0.8470627069473267,192,320,0.30000000000000004,10,5.3102245204562256e-05,128,128,COMPLETE
8
+ 10,0.8587542772293091,128,448,0.2,30,0.0007533973844416604,64,32,COMPLETE
9
+ 26,0.859742283821106,128,320,0.30000000000000004,30,0.0008787167048768327,64,32,COMPLETE
10
+ 38,0.8452513217926025,192,224,0.4,20,2.335009856945039e-05,224,176,COMPLETE
11
+ 40,0.8428224325180054,96,160,0.5,10,0.0037689674320787194,160,112,COMPLETE
12
+ 61,0.8478037118911743,192,320,0.30000000000000004,10,5.440861671988972e-05,160,112,COMPLETE
13
+ 64,0.8585072755813599,160,128,0.2,10,0.0002218345356715427,96,48,COMPLETE
14
+ 66,0.8611419796943665,128,160,0.2,10,0.0007396031093644764,96,48,COMPLETE
15
+ 71,0.8615948557853699,160,96,0.2,10,0.0007019623653114116,64,32,COMPLETE
16
+ 88,0.8324070572853088,160,128,0.4,30,1.1489089899423732e-05,96,48,COMPLETE
assets/summary_data.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Dataset,Number of News
2
+ Train,113932
3
+ Test,24291
4
+ Validation,24126
5
+ Total,162349
hyperparameters/BiLSTM_phobertbase.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"lstm_units_1": 224, "lstm_units_2": 144, "dense_units": 160, "dropout_rate": 0.4, "learning_rate": 0.0006793443367131475, "epochs": 30, "batch_size": 192}
hyperparameters/phobertbase.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "k_fold": 5,
3
+ "dropout_probability": 0.3,
4
+ "hidden_size": 768,
5
+ "learning_rate": 2e-05,
6
+ "batch_size": 16,
7
+ "num_epochs": 5,
8
+ "gradient_clipping": 1.0,
9
+ "type_learning_rate_scheduler": "linear_schedule_with_warmup",
10
+ "num_warmup_steps": 0,
11
+ "loss_function": "CrossEntropyLoss"
12
+ }
images/article_by_categories_test_data.html ADDED
The diff for this file is too large to render. See raw diff
 
images/article_by_categories_train_data.html ADDED
The diff for this file is too large to render. See raw diff
 
images/article_by_categories_val_data.html ADDED
The diff for this file is too large to render. See raw diff
 
images/bilstm_phobertbase_summary.png ADDED
images/combined_confusion_matrix.png ADDED
images/confusion_matrix_bilstm_phobertbase.png ADDED
images/confusion_matrix_longformer.png ADDED
images/confusion_matrix_phobertbase.png ADDED
images/logo.png ADDED
images/sample_data.png ADDED
images/study_bilstm_phobertbase_optimize_history.html ADDED
The diff for this file is too large to render. See raw diff
 
images/token_length_distribution.png ADDED