pooya-mohammadi commited on
Commit
42b22ab
1 Parent(s): 9489708

Hezar: Upload training files

Browse files
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ace0f4102cd2c0b04cd376b0f4506d83d9822ce08669e074917854bbb580a46
3
+ size 473270933
model_config.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: bert_text_classification
2
+ config_type: model
3
+ task: TEXT_CLASSIFICATION
4
+ num_labels: 3
5
+ id2label:
6
+ 0: negative
7
+ 1: positive
8
+ 2: neutral
9
+ vocab_size: 42000
10
+ hidden_size: 768
11
+ num_hidden_layers: 12
12
+ num_attention_heads: 12
13
+ intermediate_size: 3072
14
+ hidden_act: gelu
15
+ hidden_dropout_prob: 0.1
16
+ attention_probs_dropout_prob: 0.1
17
+ max_position_embeddings: 512
18
+ type_vocab_size: 2
19
+ initializer_range: 0.02
20
+ layer_norm_eps: 1.0e-12
21
+ pad_token_id: 0
22
+ position_embedding_type: absolute
23
+ use_cache: true
preprocessor/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor/tokenizer_config.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: wordpiece_tokenizer
2
+ config_type: preprocessor
3
+ pretrained_path: hezarai/bert-base-fa
4
+ max_length: 512
5
+ truncation_strategy: longest_first
6
+ truncation_direction: right
7
+ stride: 0
8
+ padding_strategy: longest
9
+ padding_direction: right
10
+ pad_to_multiple_of: 0
11
+ pad_token_id: 0
12
+ pad_token: '[PAD]'
13
+ pad_token_type_id: 0
14
+ unk_token: '[UNK]'
15
+ special_tokens:
16
+ - '[UNK]'
17
+ - '[SEP]'
18
+ - '[CLS]'
19
+ - '[PAD]'
20
+ - '[MASK]'
21
+ wordpieces_prefix: '##'
22
+ train_config:
23
+ name: wordpiece_tokenizer
24
+ config_type: preprocessor
25
+ vocab_size: 30000
26
+ min_frequency: 2
27
+ limit_alphabet: 1000
28
+ initial_alphabet: []
29
+ show_progress: true
train/dataset_config.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ name: text_classification
2
+ config_type: dataset
3
+ task: text_classification
4
+ path: hezarai/sentiment_digikala_snappfood
5
+ tokenizer_path: hezarai/bert-base-fa
6
+ label_field: label
7
+ text_field: text
train/train_config.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: text_classification
2
+ config_type: train
3
+ device: cuda
4
+ init_weights_from: hezarai/bert-base-fa
5
+ num_dataloader_workers: 0
6
+ seed: 42
7
+ optimizer:
8
+ lr: 2.0e-05
9
+ batch_size: 8
10
+ use_amp: false
11
+ metrics:
12
+ f1:
13
+ task: multiclass
14
+ num_epochs: 5
15
+ save_freq: 1
16
+ checkpoints_dir: checkpoints/