|
|
|
|
|
from transformers import AutoTokenizer, TFAutoModelForCausalLM |
|
|
|
model_checkpoint = "./distilgpt2-base-pretrained-he" |
|
save_directory = "tmp/tf/" |
|
file_name = "tf_model.h5" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) |
|
model = TFAutoModelForCausalLM.from_pretrained(model_checkpoint, from_pt=True) |
|
model.config.pad_token_id = model.config.eos_token_id |
|
inputs = tokenizer(["צחוקים ושיגועים"], return_tensors="tf") |
|
|
|
generated = model.generate(**inputs, do_sample=True, seed=(42, 0)) |
|
print("Sampling output: ", tokenizer.decode(generated[0])) |
|
|
|
model.save_pretrained(save_directory, file_name=file_name) |
|
tokenizer.save_pretrained(save_directory) |
|
|
|
|
|
|
|
|