Michael Brunzel
commited on
Commit
•
ec2cda7
1
Parent(s):
d12b838
Add Flash Attention 2
Browse files- handler.py +6 -1
handler.py
CHANGED
@@ -36,7 +36,12 @@ class EndpointHandler:
|
|
36 |
# load model and processor from path
|
37 |
self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
|
38 |
# attn_implementation="flash_attention_2"
|
39 |
-
self.model = AutoPeftModelForCausalLM.from_pretrained(
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
self.template = {
|
42 |
"prompt_input": """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n""",
|
|
|
36 |
# load model and processor from path
|
37 |
self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
|
38 |
# attn_implementation="flash_attention_2"
|
39 |
+
self.model = AutoPeftModelForCausalLM.from_pretrained(
|
40 |
+
"MichaelAI23/mistral_7B_v0_2_Textmarker",
|
41 |
+
device_map="auto",
|
42 |
+
torch_dtype=torch.bfloat16,
|
43 |
+
attn_implementation="flash_attention_2"
|
44 |
+
) # load_in_4bit=True
|
45 |
|
46 |
self.template = {
|
47 |
"prompt_input": """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n""",
|