Norod78 commited on
Commit
f59d548
1 Parent(s): 187c509

Update README.md

Browse files

Updated sample code

Files changed (1) hide show
  1. README.md +8 -14
README.md CHANGED
@@ -33,15 +33,16 @@ The Open Super-large Crawled ALMAnaCH coRpus is a huge multilingual corpus obtai
33
 
34
  ```python
35
 
36
- #pip install tokenizers==0.10.3 transformers==4.8.0
37
 
38
  from transformers import AutoTokenizer, AutoModelForCausalLM
39
 
 
 
40
  tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
41
  model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
42
 
43
- prompt_text = "אני אוהב שוקולד ועוגות"
44
- max_len = 512
45
  sample_output_num = 3
46
  seed = 1000
47
 
@@ -80,10 +81,7 @@ if input_ids != None:
80
  print("Updated max_len = " + str(max_len))
81
 
82
  stop_token = "<|endoftext|>"
83
- new_lines = "\
84
- \
85
- \
86
- "
87
 
88
  sample_outputs = model.generate(
89
  input_ids,
@@ -94,9 +92,7 @@ sample_outputs = model.generate(
94
  num_return_sequences=sample_output_num
95
  )
96
 
97
- print(100 * '-' + "\
98
- \t\tOutput\
99
- " + 100 * '-')
100
  for i, sample_output in enumerate(sample_outputs):
101
 
102
  text = tokenizer.decode(sample_output, skip_special_tokens=True)
@@ -107,9 +103,7 @@ for i, sample_output in enumerate(sample_outputs):
107
  # Remove all text after 3 newlines
108
  text = text[: text.find(new_lines) if new_lines else None]
109
 
110
- print("\
111
- {}: {}".format(i, text))
112
- print("\
113
- " + 100 * '-')
114
 
115
  ```
 
33
 
34
  ```python
35
 
 
36
 
37
  from transformers import AutoTokenizer, AutoModelForCausalLM
38
 
39
+ #pip install tokenizers==0.10.3 transformers==4.8.0
40
+
41
  tokenizer = AutoTokenizer.from_pretrained("Norod78/distilgpt2-base-pretrained-he")
42
  model = AutoModelForCausalLM.from_pretrained("Norod78/distilgpt2-base-pretrained-he", pad_token_id=tokenizer.eos_token_id)
43
 
44
+ prompt_text = "הנבחרת האולימפית של ישראל זכתה השנה"
45
+ max_len = 50
46
  sample_output_num = 3
47
  seed = 1000
48
 
 
81
  print("Updated max_len = " + str(max_len))
82
 
83
  stop_token = "<|endoftext|>"
84
+ new_lines = "\n\n\n"
 
 
 
85
 
86
  sample_outputs = model.generate(
87
  input_ids,
 
92
  num_return_sequences=sample_output_num
93
  )
94
 
95
+ print(100 * '-' + "\n\t\tOutput\n" + 100 * '-')
 
 
96
  for i, sample_output in enumerate(sample_outputs):
97
 
98
  text = tokenizer.decode(sample_output, skip_special_tokens=True)
 
103
  # Remove all text after 3 newlines
104
  text = text[: text.find(new_lines) if new_lines else None]
105
 
106
+ print("\n{}: {}".format(i, text))
107
+ print("\n" + 100 * '-')
 
 
108
 
109
  ```