Hebrew-GPT2-345M-Stage
An undertrained GPT2 based Hebrew text generation model which I slightly trained at 2020 on text from "Bama Hadasha" ("במה חדשה") A gguf version is available here
Dataset
Around 10% of the text from stage.co.il
LM Studio
A configuration scheme for LM Studio is available here
Usage with Transformers - sample code
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
from transformers import pipeline, set_seed
import random
model_id = "Norod78/Hebrew-GPT2-345M-Stage"
text_generator = pipeline('text-generation', model=model_id, tokenizer=model_id, device_map="auto")
max_length = 256
top_k = 70
top_p = 0.92
temperature = 1.0
max_seed = (2**32)-1
global_seed = random.randint(0, max_seed)
def text_generation(input_text = ''):
global global_seed
global_seed = global_seed + 1
if global_seed >= max_seed:
global_seed = 0
if input_text == None or len(input_text) == 0:
input_text = "\n"
set_seed(global_seed)
generated_text = text_generator(input_text,
max_length=max_length,
top_k=top_k,
top_p=top_p,
temperature=temperature,
do_sample=True,
repetition_penalty=1.4,
num_return_sequences=1)
parsed_text = generated_text[0]["generated_text"].replace("<|startoftext|>", "").replace("\r","").replace("\n\n", "\n").replace("\t", " ").replace("<|pad|>", " * ").replace("\"\"", "\"").strip()
#print("parsed_text = \"" + parsed_text + "\" (seed = " + str(global_seed) + ")")
return parsed_text
def main():
prompt_prefix = "\n\n שם היצירה: "
prompt_text = prompt_prefix + "חגבים ירוקים מקפצים בשדה"
result = text_generation(input_text=prompt_text)
print("result : \n" + str(result))
if __name__ == '__main__':
main()
- Downloads last month
- 854
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.