Update README.md (#3)
Browse files- Update README.md (5a9c30869df0d16eed946cba143225fd96936aee)
Co-authored-by: wen wen <wwwaj@users.noreply.huggingface.co>
README.md
CHANGED
@@ -104,9 +104,7 @@ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
|
104 |
messages = [
|
105 |
{"role": "system", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
|
106 |
{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
|
107 |
-
{"role": "
|
108 |
-
{"role": "system", "content": "1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey."},
|
109 |
-
{"role": "system", "content": "2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
|
110 |
{"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
|
111 |
]
|
112 |
|
@@ -131,7 +129,7 @@ Note that by default the model use flash attention which requires certain types
|
|
131 |
|
132 |
+ V100 or earlier generation GPUs: call `AutoModelForCausalLM.from_pretrained()` with `attn_implementation="eager"`
|
133 |
+ CPU: use the **GGUF** quantized models [4K](https://aka.ms/Phi3-mini-4k-instruct-gguf)
|
134 |
-
+ Optimized inference: use the **ONNX** models [4K](https://aka.ms/Phi3-mini-4k-instruct-onnx)
|
135 |
|
136 |
## Responsible AI Considerations
|
137 |
|
|
|
104 |
messages = [
|
105 |
{"role": "system", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."},
|
106 |
{"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
|
107 |
+
{"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
|
|
|
|
|
108 |
{"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
|
109 |
]
|
110 |
|
|
|
129 |
|
130 |
+ V100 or earlier generation GPUs: call `AutoModelForCausalLM.from_pretrained()` with `attn_implementation="eager"`
|
131 |
+ CPU: use the **GGUF** quantized models [4K](https://aka.ms/Phi3-mini-4k-instruct-gguf)
|
132 |
+
+ Optimized inference on GPU, CPU, and Mobile: use the **ONNX** models [4K](https://aka.ms/Phi3-mini-4k-instruct-onnx)
|
133 |
|
134 |
## Responsible AI Considerations
|
135 |
|