Create model.yml
Browse files
model.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BEGIN GENERAL GGUF METADATA
|
2 |
+
id: ministral # Model ID unique between models (author / quantization)
|
3 |
+
model: ministral # Model ID which is used for request construct - should be unique between models (author / quantization)
|
4 |
+
name: ministral # metadata.general.name
|
5 |
+
version: 1 # metadata.version
|
6 |
+
|
7 |
+
# END GENERAL GGUF METADATA
|
8 |
+
|
9 |
+
# BEGIN INFERENCE PARAMETERS
|
10 |
+
# BEGIN REQUIRED
|
11 |
+
stop: # tokenizer.ggml.eos_token_id
|
12 |
+
- [/INST]
|
13 |
+
# END REQUIRED
|
14 |
+
|
15 |
+
# BEGIN OPTIONAL
|
16 |
+
stream: true # Default true?
|
17 |
+
top_p: 0.9 # Ranges: 0 to 1
|
18 |
+
temperature: 0.7 # Ranges: 0 to 1
|
19 |
+
frequency_penalty: 0 # Ranges: 0 to 1
|
20 |
+
presence_penalty: 0 # Ranges: 0 to 1
|
21 |
+
max_tokens: 4096 # Should be default to context length
|
22 |
+
seed: -1
|
23 |
+
dynatemp_range: 0
|
24 |
+
dynatemp_exponent: 1
|
25 |
+
top_k: 40
|
26 |
+
min_p: 0.05
|
27 |
+
tfs_z: 1
|
28 |
+
typ_p: 1
|
29 |
+
repeat_last_n: 64
|
30 |
+
repeat_penalty: 1
|
31 |
+
mirostat: false
|
32 |
+
mirostat_tau: 5
|
33 |
+
mirostat_eta: 0.100000001
|
34 |
+
penalize_nl: false
|
35 |
+
ignore_eos: false
|
36 |
+
n_probs: 0
|
37 |
+
min_keep: 0
|
38 |
+
# END OPTIONAL
|
39 |
+
# END INFERENCE PARAMETERS
|
40 |
+
|
41 |
+
# BEGIN MODEL LOAD PARAMETERS
|
42 |
+
# BEGIN REQUIRED
|
43 |
+
engine: llama-cpp # engine to run model
|
44 |
+
prompt_template: "[INST]{system_message}\n\n{prompt}[/INST]"
|
45 |
+
# END REQUIRED
|
46 |
+
|
47 |
+
# BEGIN OPTIONAL
|
48 |
+
ctx_len: 4096 # llama.context_length | 0 or undefined = loaded from model
|
49 |
+
ngl: 37 # Undefined = loaded from model
|
50 |
+
# END OPTIONAL
|
51 |
+
# END MODEL LOAD PARAMETERS
|