MichaelAI23
/

mistral_7B_v0_2_Textmarker

Inference Endpoints

Model card Files Files and versions Community

Michael Brunzel commited on Mar 24

Commit

ec2cda7

•

1 Parent(s): d12b838

Add Flash Attention 2

Files changed (1) hide show

handler.py +6 -1

handler.py CHANGED Viewed

@@ -36,7 +36,12 @@ class EndpointHandler:
         # load model and processor from path
         self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
         # attn_implementation="flash_attention_2"
-        self.model = AutoPeftModelForCausalLM.from_pretrained("MichaelAI23/mistral_7B_v0_2_Textmarker", device_map="auto") # load_in_4bit=True
         self.template = {
             "prompt_input": """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n""",

         # load model and processor from path
         self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
         # attn_implementation="flash_attention_2"
+        self.model = AutoPeftModelForCausalLM.from_pretrained(
+            "MichaelAI23/mistral_7B_v0_2_Textmarker",
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            attn_implementation="flash_attention_2"
+        ) # load_in_4bit=True
         self.template = {
             "prompt_input": """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n""",