Files changed (1) hide show
  1. README.md +112 -0
README.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: transformers
6
+ tags:
7
+ - axolotl
8
+ - finetune
9
+ - dpo
10
+ - microsoft
11
+ - phi
12
+ - pytorch
13
+ - phi-3
14
+ - nlp
15
+ - code
16
+ - chatml
17
+ base_model: microsoft/Phi-3-mini-4k-instruct
18
+ model_name: Phi-3-mini-4k-instruct-v0.3
19
+ pipeline_tag: text-generation
20
+ inference: false
21
+ model_creator: MaziyarPanahi
22
+ quantized_by: MaziyarPanahi
23
+ ---
24
+
25
+ <img src="./phi-3-instruct.webp" alt="Phi-3 Logo" width="500" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
26
+
27
+
28
+ # MaziyarPanahi/Phi-3-mini-4k-instruct-v0.3
29
+
30
+ This model is a fine-tune (DPO) of `microsoft/Phi-3-mini-4k-instruct` model.
31
+
32
+ # ⚡ Quantized GGUF
33
+
34
+ coming soon
35
+
36
+ # 🏆 [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
37
+ coming soon
38
+
39
+ # Prompt Template
40
+
41
+ This model uses `ChatML` prompt template:
42
+
43
+ ```
44
+ <|im_start|>system
45
+ {System}
46
+ <|im_end|>
47
+ <|im_start|>user
48
+ {User}
49
+ <|im_end|>
50
+ <|im_start|>assistant
51
+ {Assistant}
52
+ ````
53
+
54
+ # How to use
55
+
56
+ You can use this model by using `MaziyarPanahi/Phi-3-mini-4k-instruct-v0.3` as the model name in Hugging Face's
57
+ transformers library.
58
+
59
+ ```python
60
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
61
+ from transformers import pipeline
62
+ import torch
63
+
64
+ model_id = "MaziyarPanahi/Phi-3-mini-4k-instruct-v0.3"
65
+
66
+ model = AutoModelForCausalLM.from_pretrained(
67
+ model_id,
68
+ torch_dtype=torch.bfloat16,
69
+ device_map="auto",
70
+ trust_remote_code=True,
71
+ # attn_implementation="flash_attention_2"
72
+ )
73
+
74
+ tokenizer = AutoTokenizer.from_pretrained(
75
+ model_id,
76
+ trust_remote_code=True
77
+ )
78
+
79
+ streamer = TextStreamer(tokenizer)
80
+
81
+ messages = [
82
+ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
83
+ {"role": "user", "content": "Who are you?"},
84
+ ]
85
+
86
+ # this should work perfectly for the model to stop generating
87
+ terminators = [
88
+ tokenizer.eos_token_id, # this should be <|im_end|>
89
+ tokenizer.convert_tokens_to_ids("<|assistant|>"), # sometimes model stops generating at <|assistant|>
90
+ tokenizer.convert_tokens_to_ids("<|end|>") # sometimes model stops generating at <|end|>
91
+ ]
92
+
93
+ pipe = pipeline(
94
+ "text-generation",
95
+ model=model,
96
+ tokenizer=tokenizer,
97
+ )
98
+
99
+ generation_args = {
100
+ "max_new_tokens": 500,
101
+ "return_full_text": False,
102
+ "temperature": 0.0,
103
+ "do_sample": False,
104
+ "streamer": streamer,
105
+ "eos_token_id": terminators,
106
+ }
107
+
108
+ output = pipe(messages, **generation_args)
109
+ print(output[0]['generated_text'])
110
+
111
+
112
+ ```