slseanwu commited on
Commit
a424d61
1 Parent(s): ca4f21e

add model weights

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.4267794423338829,
4
+ "eval_loss": 3.970010280609131,
5
+ "eval_runtime": 56.8854,
6
+ "eval_samples": 1045,
7
+ "eval_samples_per_second": 18.37,
8
+ "eval_steps_per_second": 0.299,
9
+ "test_samples": 1045,
10
+ "train_loss": 6.490346755981445,
11
+ "train_runtime": 7961.5563,
12
+ "train_samples": 3839,
13
+ "train_samples_per_second": 19.288,
14
+ "train_steps_per_second": 0.151
15
+ }
config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "exp_audiocaps_cfmer_2l/beats_ft_frozen_bart_8l_baseline_mincap6_ep10_convds3_embnce_t.5_mixup",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "architectures": [
6
+ "BartConformerBeatsSeq2SeqForCaptioning"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "contrastive_temperature": 0.5,
12
+ "d_model": 768,
13
+ "decoder_attention_heads": 16,
14
+ "decoder_ffn_dim": 3072,
15
+ "decoder_layerdrop": 0.0,
16
+ "decoder_layers": 6,
17
+ "decoder_start_token_id": 2,
18
+ "dropout": 0.1,
19
+ "embed_predictor_ffn_dim": 3072,
20
+ "embed_predictor_out_dim": 768,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_downsample_rate": 3,
23
+ "encoder_ffn_dim": 4096,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 12,
26
+ "encoder_repr_layer_idx": 10,
27
+ "eos_token_id": 2,
28
+ "forced_eos_token_id": 2,
29
+ "freeze_encoder": true,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "lsm_weight": 0.1,
43
+ "max_cross_position_embeddings": 512,
44
+ "max_position_embeddings": 128,
45
+ "min_caption_len": 6,
46
+ "model_type": "bart",
47
+ "num_hidden_layers": 12,
48
+ "pad_token_id": 1,
49
+ "pretrained_beats_path": "/scratch/bbjs/slseanwu/dcase23_aac/beats_baseline/pretrained_weights/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt1.pt",
50
+ "scale_embedding": false,
51
+ "spec_aug": {
52
+ "freq_mask_width_range": [
53
+ 0,
54
+ 64
55
+ ],
56
+ "num_freq_mask": 2,
57
+ "num_time_mask": 5,
58
+ "time_mask_width_ratio_range": [
59
+ 0,
60
+ 0.12
61
+ ]
62
+ },
63
+ "tokenizer_dir": "facebook/bart-base",
64
+ "torch_dtype": "float32",
65
+ "transformers_version": "4.22.2",
66
+ "use_cache": true,
67
+ "use_chatgpt_mixup": true,
68
+ "use_contrastive_embed_loss": true,
69
+ "use_conv_downsample": true,
70
+ "use_encoder_embed_mlp": true,
71
+ "use_weighted_encoder_repr": false,
72
+ "vocab_size": 50265
73
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.42745985711254153,
4
+ "eval_loss": 3.971204996109009,
5
+ "eval_runtime": 57.8802,
6
+ "eval_samples": 1045,
7
+ "eval_samples_per_second": 18.055,
8
+ "eval_steps_per_second": 0.294
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30777c73f63e6e556f0501ba7dcc838dd391311ced276f3fd95a5d8ffa4b6f20
3
+ size 869569707
test_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.4267794423338829,
4
+ "eval_loss": 3.970010280609131,
5
+ "eval_runtime": 56.8854,
6
+ "eval_samples_per_second": 18.37,
7
+ "eval_steps_per_second": 0.299,
8
+ "test_samples": 1045
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "train_loss": 6.490346755981445,
4
+ "train_runtime": 7961.5563,
5
+ "train_samples": 3839,
6
+ "train_samples_per_second": 19.288,
7
+ "train_steps_per_second": 0.151
8
+ }