thonyyy commited on
Commit
1eb98d3
1 Parent(s): 2c3d6a5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -42
README.md CHANGED
@@ -1,46 +1,104 @@
1
  ---
2
  license: apache-2.0
3
  ---
4
- |Train Loss|Train Accuracy|Validation Loss|Validation Accuracy|Epoch|
5
- |:--------:|:------------:|:-------------:|:-----------------:|:---:|
6
- |4.1939034461975|0.145276814699172|3.39564657211303|0.186678826808929|1|
7
- |3.13256049156188|0.208270609378814|2.82256889343261|0.233325317502021|2|
8
- |2.84938621520996|0.229006066918373|2.72168040275573|0.23955675959587|3|
9
- |2.76001143455505|0.234559893608093|2.65143990516662|0.243813350796699|4|
10
- |2.70404982566833|0.238061532378196|2.6107530593872|0.246574580669403|5|
11
- |2.6638650894165|0.240613579750061|2.57847166061401|0.248678594827651|6|
12
- |2.63293719291687|0.242613524198532|2.55772447586059|0.250325441360473|7|
13
- |2.60750746726989|0.244251564145088|2.53469848632812|0.251805543899536|8|
14
- |2.58670353889465|0.245637223124504|2.51883554458618|0.253003656864166|9|
15
- |2.56865572929382|0.24682830274105|2.49989652633666|0.254459708929061|10|
16
- |2.55285787582397|0.247884958982467|2.50092124938964|0.254229605197906|11|
17
- |2.53919672966003|0.248811900615692|2.47859454154968|0.255691051483154|12|
18
- |2.52694725990295|0.249630719423294|2.46921157836914|0.25649145245552|13|
19
- |2.51587128639221|0.250377029180526|2.46414017677307|0.257025629281997|14|
20
- |2.50599193572998|0.251064419746398|2.4557819366455|0.257613778114318|15|
21
- |2.49690246582031|0.251682370901107|2.44843244552612|0.258032590150833|16|
22
- |2.48859119415283|0.252267301082611|2.43858122825622|0.258764535188674|17|
23
- |2.48097324371337|0.252792716026306|2.43251323699951|0.259270757436752|18|
24
- |2.47009921073913|0.253554105758667|2.43577146530151|0.258938610553741|19|
25
- |2.45849394798278|0.254375785589218|2.42337107658386|0.260090589523315|20|
26
- |2.44776940345764|0.255127549171447|2.41147446632385|0.260682851076126|21|
27
- |2.43759155273437|0.255834341049194|2.41405510902404|0.260819226503372|22|
28
- |2.42819571495056|0.256486028432846|2.40314364433288|0.26152354478836|23|
29
- |2.41974592208862|0.257094115018844|2.39181518554687|0.262460082769393|24|
30
- |2.41181802749633|0.257666647434234|2.3825569152832|0.263035386800766|25|
31
- |2.4044873714447|0.258173674345016|2.37829279899597|0.263585090637207|26|
32
- |2.39774870872497|0.258645176887512|2.37718510627746|0.263547003269195|27|
33
- |2.39184403419494|0.259076595306396|2.37379837036132|0.264020860195159|28|
34
- |2.38593125343322|0.259495466947555|2.37083029747009|0.264293819665908|29|
35
- |2.38093471527099|0.259853214025497|2.36486291885375|0.264451295137405|30|
36
- |2.37621307373046|0.260185241699218|2.36547923088073|0.264706671237945|31|
37
- |2.37177920341491|0.260504961013793|2.3609721660614|0.264981210231781|32|
38
- |2.3679461479187|0.260774314403533|2.36445379257202|0.264800041913986|33|
39
- |2.3643410205841|0.261037856340408|2.3573100566864|0.265379041433334|34|
40
- |2.36092805862426|0.261268675327301|2.36105728149414|0.264868646860122|35|
41
- |2.35798692703247|0.261485010385513|2.35409832000732|0.265503793954849|36|
42
- |2.35523629188537|0.26168617606163|2.35252356529235|0.265713244676589|37|
43
- |2.35284709930419|0.261859744787216|2.35101222991943|0.265856444835662|38|
44
- |2.35047316551208|0.262033462524414|2.34698224067687|0.266099989414215|39|
45
- |2.34832262992858|0.262173235416412|2.34894156455993|0.266122311353683|40|
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ # pegasus-indonesian-base_pretrained
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ Github : [PegasusAnthony](https://github.com/nicholaswilven/PEGASUSAnthony/tree/master)
7
+
8
+ This model is a pretrained version of [pegasus-indonesian-base_finetune](https://huggingface.co/thonyyy/pegasus-indonesian-base_finetune) on [kaggle id news 2017](https://www.kaggle.com/datasets/aashari/indonesian-news-articles-published-at-2017), [CC_News_id](https://github.com/Wikidepia/indonesian_datasets/tree/master/dump/cc-news), and [OSCAR_2201](https://huggingface.co/datasets/oscar-corpus/OSCAR-2201/viewer/id/train).
9
+
10
+ It achieves the following results on the evaluation set:
11
+ - Train Loss: 2.34832262992858
12
+ - Train Accuracy: 0.262173235416412
13
+ - Validation Loss: 2.34894156455993
14
+ - Validation Accuracy: 0.266122311353683
15
+ - Train Lr: 0.000136618677061051
16
+ - Epoch: 40
17
+
18
+ ## Intended uses & limitations
19
+
20
+ This model is uncased, can't read special characters except "," and ".", having hard time understanding numbers, and performance only tested on news article text.
21
+
22
+ ## Training and evaluation data
23
+ Pretrain dataset:
24
+ 1. [kaggle id news 2017](https://www.kaggle.com/datasets/aashari/indonesian-news-articles-published-at-2017)
25
+ 2. [CC_News_id](https://github.com/Wikidepia/indonesian_datasets/tree/master/dump/cc-news)
26
+ 3. [OSCAR_2201](https://huggingface.co/datasets/oscar-corpus/OSCAR-2201/viewer/id/train)
27
+
28
+
29
+ ## Training procedure
30
+ For replication, go to GitHub page
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - optimizer: {'name': 'Adafactor', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': False, 'is_legacy_optimizer': False, 'learning_rate': 0.005, 'beta_2_decay': -0.8, 'epsilon_1': 1e-30, 'epsilon_2': 0.001, 'clip_threshold': 1.0, 'relative_step': True}
36
+ - training_precision: float32
37
+
38
+ ```python
39
+ configuration.vocab_size = 32103
40
+ configuration.d_model = 512
41
+ configuration.dropout = 0.15
42
+ configuration.decoder_attention_heads = 8
43
+ configuration.decoder_layers = 12
44
+ configuration.decoder_ffn_dim = 3072
45
+ configuration.encoder_attention_heads = 8
46
+ configuration.encoder_layers = 12
47
+ configuration.encoder_ffn_dim = 3072
48
+ ```
49
+ ### Training results
50
+
51
+
52
+ |Train Loss|Train Accuracy|Validation Loss|Validation Accuracy|Train Lr|Epoch|
53
+ |:--------:|:------------:|:-------------:|:-----------------:|:------:|:---:|
54
+ |4.1939034461975|0.145276814699172|3.39564657211303|0.186678826808929|0.00499999988824129|1|
55
+ |3.13256049156188|0.208270609378814|2.82256889343261|0.233325317502021|0.00499999988824129|2|
56
+ |2.84938621520996|0.229006066918373|2.72168040275573|0.23955675959587|0.00499999988824129|3|
57
+ |2.76001143455505|0.234559893608093|2.65143990516662|0.243813350796699|0.00499999988824129|4|
58
+ |2.70404982566833|0.238061532378196|2.6107530593872|0.246574580669403|0.00452418718487024|5|
59
+ |2.6638650894165|0.240613579750061|2.57847166061401|0.248678594827651|0.00409365398809313|6|
60
+ |2.63293719291687|0.242613524198532|2.55772447586059|0.250325441360473|0.00370409130118787|7|
61
+ |2.60750746726989|0.244251564145088|2.53469848632812|0.251805543899536|0.00335160037502646|8|
62
+ |2.58670353889465|0.245637223124504|2.51883554458618|0.253003656864166|0.00303265335969626|9|
63
+ |2.56865572929382|0.24682830274105|2.49989652633666|0.254459708929061|0.00274405837990343|10|
64
+ |2.55285787582397|0.247884958982467|2.50092124938964|0.254229605197906|0.00248292670585215|11|
65
+ |2.53919672966003|0.248811900615692|2.47859454154968|0.255691051483154|0.00224664504639804|12|
66
+ |2.52694725990295|0.249630719423294|2.46921157836914|0.25649145245552|0.00203284854069352|13|
67
+ |2.51587128639221|0.250377029180526|2.46414017677307|0.257025629281997|0.0018393974751234|14|
68
+ |2.50599193572998|0.251064419746398|2.4557819366455|0.257613778114318|0.00166435563005507|15|
69
+ |2.49690246582031|0.251682370901107|2.44843244552612|0.258032590150833|0.00150597130414098|16|
70
+ |2.48859119415283|0.252267301082611|2.43858122825622|0.258764535188674|0.00136265915352851|17|
71
+ |2.48097324371337|0.252792716026306|2.43251323699951|0.259270757436752|0.00123298505786806|18|
72
+ |2.47009921073913|0.253554105758667|2.43577146530151|0.258938610553741|0.00111565098632127|19|
73
+ |2.45849394798278|0.254375785589218|2.42337107658386|0.260090589523315|0.00100948277395218|20|
74
+ |2.44776940345764|0.255127549171447|2.41147446632385|0.260682851076126|0.000913417781703174|21|
75
+ |2.43759155273437|0.255834341049194|2.41405510902404|0.260819226503372|0.000826494593638926|22|
76
+ |2.42819571495056|0.256486028432846|2.40314364433288|0.26152354478836|0.000747843238059431|23|
77
+ |2.41974592208862|0.257094115018844|2.39181518554687|0.262460082769393|0.000676676572766155|24|
78
+ |2.41181802749633|0.257666647434234|2.3825569152832|0.263035386800766|0.000612282310612499|25|
79
+ |2.4044873714447|0.258173674345016|2.37829279899597|0.263585090637207|0.000554015976376831|26|
80
+ |2.39774870872497|0.258645176887512|2.37718510627746|0.263547003269195|0.000501294387504458|27|
81
+ |2.39184403419494|0.259076595306396|2.37379837036132|0.264020860195159|0.00045358992065303|28|
82
+ |2.38593125343322|0.259495466947555|2.37083029747009|0.264293819665908|0.000410425127483904|29|
83
+ |2.38093471527099|0.259853214025497|2.36486291885375|0.264451295137405|0.000371368019841611|30|
84
+ |2.37621307373046|0.260185241699218|2.36547923088073|0.264706671237945|0.000336027675075456|31|
85
+ |2.37177920341491|0.260504961013793|2.3609721660614|0.264981210231781|0.000304050423437729|32|
86
+ |2.3679461479187|0.260774314403533|2.36445379257202|0.264800041913986|0.000275116210104897|33|
87
+ |2.3643410205841|0.261037856340408|2.3573100566864|0.265379041433334|0.000248935451963916|34|
88
+ |2.36092805862426|0.261268675327301|2.36105728149414|0.264868646860122|0.000225246112677268|35|
89
+ |2.35798692703247|0.261485010385513|2.35409832000732|0.265503793954849|0.000203811112442053|36|
90
+ |2.35523629188537|0.26168617606163|2.35252356529235|0.265713244676589|0.000184415926923975|37|
91
+ |2.35284709930419|0.261859744787216|2.35101222991943|0.265856444835662|0.000166866433573886|38|
92
+ |2.35047316551208|0.262033462524414|2.34698224067687|0.266099989414215|0.000150986990774981|39|
93
+ |2.34832262992858|0.262173235416412|2.34894156455993|0.266122311353683|0.000136618677061051|40|
94
+
95
+
96
+ ### Framework versions
97
+
98
+ - Transformers 4.30.2
99
+ - TensorFlow 2.12.0
100
+ - Datasets 2.13.1
101
+ - Tokenizers 0.13.3
102
+
103
+ ### Special Thanks
104
+ Research supported with Cloud TPUs from Google’s TPU Research Cloud (TRC)