monuminu commited on
Commit
c8dd642
1 Parent(s): 1e1b98d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -0
README.md CHANGED
@@ -1,3 +1,24 @@
1
  ---
2
  license: llama2
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: llama2
3
  ---
4
+ ```
5
+ import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("monuminu/indo-instruct-llama2-32k")
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "monuminu/indo-instruct-llama2-32k",
11
+ device_map="auto",
12
+ torch_dtype=torch.float16,
13
+ load_in_8bit=True,
14
+ rope_scaling={"type": "dynamic", "factor": 2} # allows handling of longer inputs
15
+ )
16
+
17
+ prompt = "### User:\nThomas is healthy, but he has to go to the hospital. What could be the reasons?\n\n### Assistant:\n"
18
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
19
+ del inputs["token_type_ids"]
20
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
21
+
22
+ output = model.generate(**inputs, streamer=streamer, use_cache=True, max_new_tokens=float('inf'))
23
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
24
+ ```