keshan commited on
Commit
19f5167
1 Parent(s): 987f694

adding flax to pt conversion script

Browse files
Files changed (1) hide show
  1. converter.py +31 -0
converter.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from transformers import AutoTokenizer, RobertaModel
2
+
3
+ # model = RobertaModel.from_pretrained('sinhala-roberta-mc4', from_flax=True)
4
+ # tokenizer = AutoTokenizer.from_pretrained('sinhala-roberta-mc4')
5
+
6
+ # tokenizer.save_pretrained('sinhala-roberta-mc4')
7
+ # model.save_pretrained('sinhala-roberta-mc4')
8
+
9
+ from transformers import RobertaForMaskedLM, FlaxRobertaForMaskedLM, AutoTokenizer
10
+ import torch
11
+ import numpy as np
12
+ import jax
13
+ import jax.numpy as jnp
14
+ jax.config.update('jax_platform_name', 'cpu')
15
+ MODEL_PATH = "sinhala-roberta-oscar"
16
+ model = FlaxRobertaForMaskedLM.from_pretrained(MODEL_PATH)
17
+ def to_f32(t):
18
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
19
+ model.params = to_f32(model.params)
20
+ model.save_pretrained(MODEL_PATH)
21
+ pt_model = RobertaForMaskedLM.from_pretrained(MODEL_PATH, from_flax=True).to('cpu')
22
+ input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
23
+ input_ids_pt = torch.tensor(input_ids)
24
+ logits_pt = pt_model(input_ids_pt).logits
25
+ print(logits_pt)
26
+ logits_fx = model(input_ids).logits
27
+ print(logits_fx)
28
+ pt_model.save_pretrained(MODEL_PATH)
29
+ # also save tokenizer
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
31
+ tokenizer.save_pretrained(MODEL_PATH)