bleysg commited on
Commit
750b317
1 Parent(s): be42b0f

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +55 -0
README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - Open-Orca/OpenOrca
4
+ language:
5
+ - en
6
+ library_name: transformers
7
+ pipeline_tag: text-generation
8
+ ---
9
+
10
+ # Overview
11
+
12
+ Unreleased, untested, unfinished beta.
13
+
14
+
15
+ # Inference
16
+
17
+ Remove *`.to('cuda')`* for unaccelerated.
18
+
19
+ ```python
20
+ import torch
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
22
+
23
+ model = AutoModelForCausalLM.from_pretrained("Open-Orca/oo-phi-1_5",
24
+ trust_remote_code=True,
25
+ torch_dtype=torch.bfloat16
26
+ ).to('cuda')
27
+ tokenizer = AutoTokenizer.from_pretrained("Open-Orca/oo-phi-1_5",
28
+ trust_remote_code=True,
29
+ torch_dtype=torch.bfloat16)
30
+
31
+ sys_prompt = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning. " \
32
+ "I am an assistant who thinks through their answers step-by-step to be sure I always get the right answer. " \
33
+ "I think more clearly if I write out my thought process in a scratchpad manner first; therefore, I always " \
34
+ "explain background context, assumptions, and step-by-step thinking BEFORE trying to answer a question."
35
+ prompt = "Tell me about yourself please."
36
+
37
+ prefix = "<|im_start|>"
38
+ suffix = "<|im_end|>\n"
39
+ sys_format = prefix + "system\n" + sys_prompt + suffix
40
+ user_format = prefix + "user\n" + prompt + suffix
41
+ assistant_format = prefix + "assistant\n"
42
+ input_text = sys_format + user_format + assistant_format
43
+
44
+ generation_config = GenerationConfig(
45
+ max_length=512, temperature=0.01, top_p=0.95, repetition_penalty=1.1,
46
+ do_sample=True, use_cache=True,
47
+ eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id,
48
+ transformers_version="4.33.1")
49
+
50
+ inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=False).to('cuda')
51
+ outputs = model.generate(**inputs, generation_config=generation_config)
52
+
53
+ text = tokenizer.batch_decode(outputs)[0]
54
+ print(text)
55
+ ```