Update README.md
Browse files
README.md
CHANGED
@@ -46,11 +46,13 @@ tokenizer = AutoTokenizer.from_pretrained("pszemraj/pythia-6.9b-HC3")
|
|
46 |
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
"pszemraj/pythia-6.9b-HC3", load_in_8bit=True, device_map="auto"
|
49 |
-
)
|
50 |
|
51 |
prompt = "I was wondering how much wood a woodchuck could chuck? <answer>"
|
52 |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
53 |
-
outputs = model.generate(
|
|
|
|
|
54 |
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
55 |
result = result.split("<end_answer>")[0].strip()
|
56 |
|
|
|
46 |
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
"pszemraj/pythia-6.9b-HC3", load_in_8bit=True, device_map="auto"
|
49 |
+
) # shards are ~4GB each, there are eight total
|
50 |
|
51 |
prompt = "I was wondering how much wood a woodchuck could chuck? <answer>"
|
52 |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
|
53 |
+
outputs = model.generate(
|
54 |
+
**inputs, max_new_tokens=300
|
55 |
+
) # default generation config (+ 300 tokens)
|
56 |
result = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
57 |
result = result.split("<end_answer>")[0].strip()
|
58 |
|