File size: 892 Bytes
6e1c9c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Requires transformers >= 4.21.0;
# Sampling outputs may differ, depending on your hardware.
from transformers import AutoTokenizer, TFAutoModelForCausalLM

model_checkpoint = "./distilgpt2-base-pretrained-he"
save_directory = "tmp/tf/"
file_name = "tf_model.h5"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = TFAutoModelForCausalLM.from_pretrained(model_checkpoint, from_pt=True)
model.config.pad_token_id = model.config.eos_token_id
inputs = tokenizer(["צחוקים ושיגועים"], return_tensors="tf")

generated = model.generate(**inputs, do_sample=True, seed=(42, 0))
print("Sampling output: ", tokenizer.decode(generated[0]))

model.save_pretrained(save_directory, file_name=file_name)
tokenizer.save_pretrained(save_directory)

# > Sampling output: TensorFlow is a great learning platform for learning about
# data structure and structure in data science..