mohdelgaar commited on
Commit
1248d55
·
1 Parent(s): c4d783e

restore args json

Browse files
Files changed (2) hide show
  1. app.py +1 -9
  2. ckpt/model.json +82 -0
app.py CHANGED
@@ -25,7 +25,7 @@ def process_examples(samples):
25
  processed.append(example)
26
  return processed
27
 
28
- args, args_list, lng_names = parse_args(ckpt='./ckpt/model_fixed.pt')
29
 
30
  tokenizer = T5Tokenizer.from_pretrained(args.model_name)
31
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -46,14 +46,6 @@ ling_collection_scaled = scaler.transform(ling_collection)
46
 
47
  model, ling_disc, sem_emb = get_model(args, tokenizer, device)
48
 
49
- # state = torch.load(args.ckpt, map_location=torch.device('cpu'))
50
- # model.load_state_dict(state['model'], strict=True)
51
- # model.eval()
52
- # ling_disc.eval()
53
-
54
- # state = torch.load(args.sem_ckpt, map_location=torch.device('cpu'))
55
- # sem_emb.load_state_dict(state['model'], strict=True)
56
- # sem_emb.eval()
57
 
58
  ############# Start demo code
59
  def round_ling(x):
 
25
  processed.append(example)
26
  return processed
27
 
28
+ args, args_list, lng_names = parse_args(ckpt='./ckpt/model.pt')
29
 
30
  tokenizer = T5Tokenizer.from_pretrained(args.model_name)
31
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
46
 
47
  model, ling_disc, sem_emb = get_model(args, tokenizer, device)
48
 
 
 
 
 
 
 
 
 
49
 
50
  ############# Start demo code
51
  def round_ling(x):
ckpt/model.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": "ling_conversion",
3
+ "data_sources": ["qqp", "mrpc", "stsb"],
4
+ "data_type": "text",
5
+ "kld_annealing": "cyclic",
6
+ "lingpred_annealing": "mono",
7
+ "ling_embed_type": "one-layer",
8
+ "combine_weight": 1,
9
+ "alpha_kld": 1,
10
+ "alpha_lingpred": 1,
11
+ "alpha_sem": 1,
12
+ "max_grad_norm": 10,
13
+ "sem_loss_tao": 0.5,
14
+ "sem_loss_eps": 1,
15
+ "ckpt": "./ckpt/model.pt",
16
+ "disc_type": "deberta",
17
+ "disc_ckpt": "./ckpt/ling_disc",
18
+ "sem_ckpt": "./ckpt/sem_emb.pt",
19
+ "lng_ids": null,
20
+ "lng_ids_idx": null,
21
+ "model_name": "google/flan-t5-base",
22
+ "aim_exp": "lingconv-0606",
23
+ "sem_loss_type": "dedicated",
24
+ "combine_method": "decoder_add_first",
25
+ "train_log": 200,
26
+ "val_log": 2000,
27
+ "batch_size": 80,
28
+ "eval_batch_size": 200,
29
+ "max_eval_samples": 1000,
30
+ "test_batch_size": 1,
31
+ "hidden_dim": 500,
32
+ "latent_dim": 150,
33
+ "lng_dim": 40,
34
+ "disc_lng_dim": 40,
35
+ "use_lora": false,
36
+ "lora_r": 64,
37
+ "gpu": "4",
38
+ "epochs": 20,
39
+ "grad_accumulation": 1,
40
+ "n_ica": 10,
41
+ "max_length": 200,
42
+ "total_steps": null,
43
+ "kld_const": 1,
44
+ "lr": 0.001,
45
+ "kl_weight": 0.1,
46
+ "weight_decay": 0.01,
47
+ "ling_dropout": 0.1,
48
+ "predict_fn": "logs/test.txt",
49
+ "save_predict": false,
50
+ "use_ica": false,
51
+ "pretrain_gen": false,
52
+ "pretrain_sem": false,
53
+ "pretrain_disc": false,
54
+ "linggen_type": "none",
55
+ "linggen_input": "s+l",
56
+ "aug_same": false,
57
+ "ling_vae": false,
58
+ "process_lingpred": false,
59
+ "fudge_lambda": 1.0,
60
+ "use_lingpred": false,
61
+ "ling2_only": true,
62
+ "cycle_loss": false,
63
+ "disc_loss": false,
64
+ "sem_loss": false,
65
+ "sim_loss": false,
66
+ "optuna": false,
67
+ "debug": false,
68
+ "demo": false,
69
+ "fudge": false,
70
+ "out_fn": "logs/default",
71
+ "eval_only": false,
72
+ "predict_with_feedback": false,
73
+ "feedback_param": "s",
74
+ "eval_ling": false,
75
+ "seed": 0,
76
+ "major_arg": 0,
77
+ "quantize_lng": false,
78
+ "quant_nbins": 20,
79
+ "src_lng": "ling",
80
+ "to_restore": [],
81
+ "disc_steps": 0
82
+ }