Update README.md
Browse files
README.md
CHANGED
@@ -16,123 +16,55 @@ Mistral-7B-v0.3 has the following changes compared to [Mistral-7B-v0.2](https://
|
|
16 |
- Supports v3 Tokenizer
|
17 |
- Supports function calling
|
18 |
|
19 |
-
## Installation
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
```
|
24 |
-
pip install mistral_inference
|
25 |
-
```
|
26 |
-
|
27 |
-
## Download
|
28 |
-
|
29 |
-
```py
|
30 |
-
from huggingface_hub import snapshot_download
|
31 |
-
from pathlib import Path
|
32 |
-
|
33 |
-
mistral_models_path = Path.home().joinpath('mistral_models', '7B-Instruct-v0.3')
|
34 |
-
mistral_models_path.mkdir(parents=True, exist_ok=True)
|
35 |
-
|
36 |
-
snapshot_download(repo_id="mistralai/Mistral-7B-Instruct-v0.3", allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"], local_dir=mistral_models_path)
|
37 |
-
```
|
38 |
-
|
39 |
-
### Chat
|
40 |
-
|
41 |
-
After installing `mistral_inference`, a `mistral-chat` CLI command should be available in your environment. You can chat with the model using
|
42 |
-
|
43 |
-
```
|
44 |
-
mistral-chat $HOME/mistral_models/7B-Instruct-v0.3 --instruct --max_tokens 256
|
45 |
-
```
|
46 |
|
47 |
-
|
48 |
|
49 |
```py
|
50 |
-
|
51 |
-
from
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
56 |
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
model =
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
-
|
64 |
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
from mistral_inference.model import Transformer
|
76 |
-
from mistral_inference.generate import generate
|
77 |
-
|
78 |
-
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
79 |
-
from mistral_common.protocol.instruct.messages import UserMessage
|
80 |
-
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
81 |
-
|
82 |
-
|
83 |
-
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
|
84 |
-
model = Transformer.from_folder(mistral_models_path)
|
85 |
-
|
86 |
-
completion_request = ChatCompletionRequest(
|
87 |
-
tools=[
|
88 |
-
Tool(
|
89 |
-
function=Function(
|
90 |
-
name="get_current_weather",
|
91 |
-
description="Get the current weather",
|
92 |
-
parameters={
|
93 |
-
"type": "object",
|
94 |
-
"properties": {
|
95 |
-
"location": {
|
96 |
-
"type": "string",
|
97 |
-
"description": "The city and state, e.g. San Francisco, CA",
|
98 |
-
},
|
99 |
-
"format": {
|
100 |
-
"type": "string",
|
101 |
-
"enum": ["celsius", "fahrenheit"],
|
102 |
-
"description": "The temperature unit to use. Infer this from the users location.",
|
103 |
-
},
|
104 |
-
},
|
105 |
-
"required": ["location", "format"],
|
106 |
-
},
|
107 |
-
)
|
108 |
-
)
|
109 |
-
],
|
110 |
-
messages=[
|
111 |
-
UserMessage(content="What's the weather like today in Paris?"),
|
112 |
-
],
|
113 |
-
)
|
114 |
|
115 |
-
|
|
|
116 |
|
117 |
-
|
118 |
-
|
119 |
|
120 |
-
|
121 |
-
|
|
|
122 |
|
123 |
-
|
|
|
124 |
|
125 |
-
If you want to use Hugging Face `transformers` to generate text, you can do something like this.
|
126 |
|
127 |
-
```py
|
128 |
-
from transformers import pipeline
|
129 |
-
|
130 |
-
messages = [
|
131 |
-
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
132 |
-
{"role": "user", "content": "Who are you?"},
|
133 |
-
]
|
134 |
-
chatbot = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.3")
|
135 |
-
chatbot(messages)
|
136 |
```
|
137 |
|
138 |
## Limitations
|
|
|
16 |
- Supports v3 Tokenizer
|
17 |
- Supports function calling
|
18 |
|
|
|
19 |
|
20 |
+
## Generate with `transformers`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
If you want to use Hugging Face `transformers` to generate text, you can do something like this.
|
23 |
|
24 |
```py
|
25 |
+
import torch
|
26 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
27 |
|
28 |
+
pretrained_model_name = "thesven/Mistral-7B-Instruct-v0.3-GPTQ"
|
29 |
+
device = "cuda:0"
|
|
|
30 |
|
31 |
+
# Load the tokenizer
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
|
33 |
|
34 |
+
# Load the model with the specified configuration and move to device
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(
|
36 |
+
pretrained_model_name,
|
37 |
+
device_map="auto",
|
38 |
+
)
|
39 |
|
40 |
+
print(model)
|
41 |
|
42 |
+
# Set EOS token ID
|
43 |
+
model.eos_token_id = tokenizer.eos_token_id
|
44 |
|
45 |
+
# Move model to the specified device
|
46 |
+
model.to(device)
|
47 |
|
48 |
+
# Define the input text
|
49 |
+
input_text = "What is PEFT finetuning?"
|
50 |
|
51 |
+
# Encode the input text
|
52 |
+
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
# Generate output
|
55 |
+
output = model.generate(input_ids, max_length=1000)
|
56 |
|
57 |
+
# Decode the generated output
|
58 |
+
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)
|
59 |
|
60 |
+
# Print the decoded output
|
61 |
+
for i, sequence in enumerate(decoded_output):
|
62 |
+
print(f"Generated Sequence {i+1}: {sequence}")
|
63 |
|
64 |
+
del model
|
65 |
+
torch.cuda.empty_cache()
|
66 |
|
|
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
```
|
69 |
|
70 |
## Limitations
|