Wikidepia commited on
Commit
db03705
1 Parent(s): bf9037c

Initial models

Browse files
Files changed (4) hide show
  1. README.md +18 -0
  2. config.json +27 -0
  3. pytorch_model.bin +3 -0
  4. tf_model.h5 +3 -0
README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ inference: false
3
+ language: id
4
+ ---
5
+
6
+ # IndoConvBERT Base Model
7
+
8
+ IndoConvBERT is a ConvBERT model pretrained on Indo4B.
9
+
10
+ ## Pretraining details
11
+
12
+ We follow a different training procedure: instead of using a two-phase approach, that pre-trains the model for 90% with 128 sequence length and 10% with 512 sequence length, we pre-train the model with 512 sequence length for 1M steps on a v3-8 TPU.
13
+
14
+ The current version of the model is trained on Indo4B and small Twitter dump.
15
+
16
+ ## Acknowledgement
17
+
18
+ Big thanks to TFRC (TensorFlow Research Cloud) for providing free TPU.
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "IndoConvBERT-base/",
3
+ "architectures": [
4
+ "ConvBertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "conv_kernel_size": 9,
9
+ "embedding_size": 768,
10
+ "eos_token_id": 2,
11
+ "head_ratio": 2,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "layer_norm_eps": 1e-12,
18
+ "max_position_embeddings": 512,
19
+ "model_type": "convbert",
20
+ "num_attention_heads": 12,
21
+ "num_groups": 1,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "transformers_version": "4.4.0.dev0",
25
+ "type_vocab_size": 2,
26
+ "vocab_size": 30522
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c626393b3a07718ab46ef8f426c46e7d6bb529fcdfc5a5195b040066985c2d33
3
+ size 422837461
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742b00c619f5bacde6cfa70815035644c54f0c15e8f93a52c929ac13574d8425
3
+ size 423072408