sonoisa commited on
Commit
6e08a41
•
1 Parent(s): 6e39e37

Update text encoder model

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sonoisa/clip-vit-b-32-japanese",
3
  "architectures": [
4
  "BertModel"
5
  ],
1
  {
2
+ "_name_or_path": "sonoisa/clip-vit-b-32-japanese-v1",
3
  "architectures": [
4
  "BertModel"
5
  ],
output_linear.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d49a6e24d051ee5dc3490ed4927513abd4c46e32bc479188b00648e1ef311a4d
3
- size 4721639
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f134e1c5be9cac7be1bbd3d750774d3f5ab7166fb79f7a17565734a99fee1ad
3
+ size 9440231
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:762c0f6542b607e5f606ce41f21ca995cc456a0ab1c9bbf507f75d022add985a
3
  size 442547953
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69eb2a48e8e40f927229f04fa5feb871f2aa148c1705f7011ee4c1defa435d87
3
  size 442547953
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "do_lower_case": true, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", "never_split": null, "mecab_kwargs": null, "is_fast": true, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "sonoisa/clip-vit-b-32-japanese", "tokenizer_class": "BertJapaneseTokenizer"}
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "do_lower_case": true, "do_word_tokenize": true, "do_subword_tokenize": true, "word_tokenizer_type": "mecab", "subword_tokenizer_type": "wordpiece", "never_split": null, "mecab_kwargs": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "sonoisa/clip-vit-b-32-japanese-v1", "is_fast": true, "tokenizer_class": "BertJapaneseTokenizer"}
training_args_1.json → training_args-1.json RENAMED
@@ -2,19 +2,25 @@
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
 
5
  "eval_batch_size": 32,
6
  "fp_16": false,
7
- "gradient_accumulation_steps": 4,
 
 
8
  "learning_rate": 0.001,
 
 
 
 
9
  "max_grad_norm": 1.0,
10
  "max_input_length": 64,
11
- "model_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
12
  "n_gpu": 1,
13
- "num_train_epochs": 1,
14
  "seed": 42,
15
  "shuffle_buffer_size": 65536,
16
- "tokenizer_name_or_path": "cl-tohoku/bert-base-japanese-whole-word-masking",
17
- "train_batch_size": 256,
18
- "warmup_ratio": 0.05,
19
  "weight_decay": 0.0
20
  }
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
5
+ "en_ja_ratio": 4,
6
  "eval_batch_size": 32,
7
  "fp_16": false,
8
+ "freeze_bert": true,
9
+ "from_pretrained_bert": true,
10
+ "gradient_accumulation_steps": 1,
11
  "learning_rate": 0.001,
12
+ "lr_initial_epoch": 0,
13
+ "lr_total_epochs": 4,
14
+ "lr_warmup_epochs": 0.05,
15
+ "max_cls_depth": 6,
16
  "max_grad_norm": 1.0,
17
  "max_input_length": 64,
18
+ "model_name_or_path": "sonoisa/sentence-bert-base-ja-mean-tokens-v2",
19
  "n_gpu": 1,
20
+ "num_train_epochs": 4,
21
  "seed": 42,
22
  "shuffle_buffer_size": 65536,
23
+ "tokenizer_name_or_path": "sonoisa/sentence-bert-base-ja-mean-tokens-v2",
24
+ "train_batch_size": 2560,
 
25
  "weight_decay": 0.0
26
  }
training_args_2.json → training_args-2.json RENAMED
@@ -2,19 +2,25 @@
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
 
5
  "eval_batch_size": 32,
6
  "fp_16": false,
 
 
7
  "gradient_accumulation_steps": 4,
8
- "learning_rate": 0.0002,
 
 
 
 
9
  "max_grad_norm": 1.0,
10
  "max_input_length": 64,
11
  "model_name_or_path": "/content/pretrain_model",
12
  "n_gpu": 1,
13
- "num_train_epochs": 1,
14
- "seed": 12345,
15
  "shuffle_buffer_size": 65536,
16
  "tokenizer_name_or_path": "/content/pretrain_model",
17
  "train_batch_size": 256,
18
- "warmup_ratio": 0.05,
19
  "weight_decay": 0.0
20
  }
2
  "adam_epsilon": 1e-08,
3
  "data_dir": "/content/data",
4
  "early_stop_callback": false,
5
+ "en_ja_ratio": 4,
6
  "eval_batch_size": 32,
7
  "fp_16": false,
8
+ "freeze_bert": false,
9
+ "from_pretrained_bert": false,
10
  "gradient_accumulation_steps": 4,
11
+ "learning_rate": 0.0004,
12
+ "lr_initial_epoch": 0,
13
+ "lr_total_epochs": 4,
14
+ "lr_warmup_epochs": 0.05,
15
+ "max_cls_depth": 6,
16
  "max_grad_norm": 1.0,
17
  "max_input_length": 64,
18
  "model_name_or_path": "/content/pretrain_model",
19
  "n_gpu": 1,
20
+ "num_train_epochs": 4,
21
+ "seed": 1234,
22
  "shuffle_buffer_size": 65536,
23
  "tokenizer_name_or_path": "/content/pretrain_model",
24
  "train_batch_size": 256,
 
25
  "weight_decay": 0.0
26
  }