tinybiggames
commited on
Commit
•
fc4e47b
1
Parent(s):
981a5a0
Update README.md
Browse files
README.md
CHANGED
@@ -16,7 +16,6 @@ tags:
|
|
16 |
- axolotl
|
17 |
- llama-cpp
|
18 |
- gguf-my-repo
|
19 |
-
- LMEngine
|
20 |
base_model: NousResearch/Meta-Llama-3-8B
|
21 |
datasets:
|
22 |
- teknium/OpenHermes-2.5
|
@@ -39,13 +38,13 @@ model-index:
|
|
39 |
# tinybiggames/Hermes-2-Pro-Llama-3-8B-Q4_K_M-GGUF
|
40 |
This model was converted to GGUF format from [`NousResearch/Hermes-2-Pro-Llama-3-8B`](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
|
41 |
Refer to the [original model card](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for more details on the model.
|
42 |
-
## Use with tinyBigGAMES's [
|
43 |
|
44 |
|
45 |
How to configure LMEngine:
|
46 |
|
47 |
```Delphi
|
48 |
-
|
49 |
'C:/LLM/gguf', // path to model files
|
50 |
-1 // number of GPU layer, -1 to use all available layers
|
51 |
);
|
@@ -54,7 +53,7 @@ LME_InitConfig(
|
|
54 |
How to define model:
|
55 |
|
56 |
```Delphi
|
57 |
-
|
58 |
'hermes-2-pro-llama-3-8b.Q4_K_M', 8000, '<|im_start|>{role}\n{content}<|im_end|>\n',
|
59 |
'<|im_start|>assistant');
|
60 |
```
|
@@ -62,7 +61,7 @@ LME_DefineModel('hermes-2-pro-llama-3-8b.Q4_K_M.gguf',
|
|
62 |
How to add a message:
|
63 |
|
64 |
```Delphi
|
65 |
-
|
66 |
ROLE_USER, // role
|
67 |
'What is AI?' // content
|
68 |
);
|
@@ -80,17 +79,17 @@ var
|
|
80 |
LOutputTokens: Int32;
|
81 |
LTotalTokens: Int32;
|
82 |
|
83 |
-
if
|
84 |
begin
|
85 |
-
|
86 |
@LTotalTokens);
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
end
|
91 |
else
|
92 |
begin
|
93 |
-
|
94 |
-
|
95 |
end;
|
96 |
```
|
|
|
16 |
- axolotl
|
17 |
- llama-cpp
|
18 |
- gguf-my-repo
|
|
|
19 |
base_model: NousResearch/Meta-Llama-3-8B
|
20 |
datasets:
|
21 |
- teknium/OpenHermes-2.5
|
|
|
38 |
# tinybiggames/Hermes-2-Pro-Llama-3-8B-Q4_K_M-GGUF
|
39 |
This model was converted to GGUF format from [`NousResearch/Hermes-2-Pro-Llama-3-8B`](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
|
40 |
Refer to the [original model card](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for more details on the model.
|
41 |
+
## Use with tinyBigGAMES's [Inference](https://github.com/tinyBigGAMES) Libraries.
|
42 |
|
43 |
|
44 |
How to configure LMEngine:
|
45 |
|
46 |
```Delphi
|
47 |
+
InitConfig(
|
48 |
'C:/LLM/gguf', // path to model files
|
49 |
-1 // number of GPU layer, -1 to use all available layers
|
50 |
);
|
|
|
53 |
How to define model:
|
54 |
|
55 |
```Delphi
|
56 |
+
DefineModel('hermes-2-pro-llama-3-8b.Q4_K_M.gguf',
|
57 |
'hermes-2-pro-llama-3-8b.Q4_K_M', 8000, '<|im_start|>{role}\n{content}<|im_end|>\n',
|
58 |
'<|im_start|>assistant');
|
59 |
```
|
|
|
61 |
How to add a message:
|
62 |
|
63 |
```Delphi
|
64 |
+
AddMessage(
|
65 |
ROLE_USER, // role
|
66 |
'What is AI?' // content
|
67 |
);
|
|
|
79 |
LOutputTokens: Int32;
|
80 |
LTotalTokens: Int32;
|
81 |
|
82 |
+
if RunInference('hermes-2-pro-llama-3-8b.Q4_K_M', 1024) then
|
83 |
begin
|
84 |
+
GetInferenceStats(nil, @LTokenOutputSpeed, @LInputTokens, @LOutputTokens,
|
85 |
@LTotalTokens);
|
86 |
+
PrintLn('', FG_WHITE);
|
87 |
+
PrintLn('Tokens :: Input: %d, Output: %d, Total: %d, Speed: %3.1f t/s',
|
88 |
+
FG_BRIGHTYELLOW, LInputTokens, LOutputTokens, LTotalTokens, LTokenOutputSpeed);
|
89 |
end
|
90 |
else
|
91 |
begin
|
92 |
+
PrintLn('', LME_FG_WHITE);
|
93 |
+
PrintLn('Error: %s', FG_RED, GetError());
|
94 |
end;
|
95 |
```
|