Norod78
/

distilgpt2-base-pretrained-he

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Norod78 commited on Jul 26, 2021

Commit

f59d548

·

1 Parent(s): 187c509

Update README.md

Updated sample code

Files changed (1) hide show

README.md +8 -14

README.md CHANGED Viewed

@@ -33,15 +33,16 @@ The Open Super-large Crawled ALMAnaCH coRpus is a huge multilingual corpus obtai
 ```python
-#pip install tokenizers==0.10.3 transformers==4.8.0
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
 model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
-prompt_text = "אני אוהב שוקולד ועוגות"
-max_len = 512
 sample_output_num = 3
 seed = 1000
@@ -80,10 +81,7 @@ if input_ids != None:
 print("Updated max_len = " + str(max_len))
 stop_token = "<|endoftext|>"
-new_lines = "\
-\
-\
-"
 sample_outputs = model.generate(
     input_ids,
@@ -94,9 +92,7 @@ sample_outputs = model.generate(
     num_return_sequences=sample_output_num
 )
-print(100 * '-' + "\
-\t\tOutput\
-" + 100 * '-')
 for i, sample_output in enumerate(sample_outputs):
   text = tokenizer.decode(sample_output, skip_special_tokens=True)
@@ -107,9 +103,7 @@ for i, sample_output in enumerate(sample_outputs):
   # Remove all text after 3 newlines
   text = text[: text.find(new_lines) if new_lines else None]
-  print("\
-{}: {}".format(i, text))
-  print("\
-" + 100 * '-')
 ```

 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM
+#pip install tokenizers==0.10.3 transformers==4.8.0
 tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
 model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
+prompt_text = "הנבחרת האולימפית של ישראל זכתה השנה"
+max_len = 50
 sample_output_num = 3
 seed = 1000
 print("Updated max_len = " + str(max_len))
 stop_token = "<|endoftext|>"
+new_lines = "\n\n\n"
 sample_outputs = model.generate(
     input_ids,
     num_return_sequences=sample_output_num
 )
+print(100 * '-' + "\n\t\tOutput\n" + 100 * '-')
 for i, sample_output in enumerate(sample_outputs):
   text = tokenizer.decode(sample_output, skip_special_tokens=True)
   # Remove all text after 3 newlines
   text = text[: text.find(new_lines) if new_lines else None]
+  print("\n{}: {}".format(i, text))
+  print("\n" + 100 * '-')
 ```