GGUF
code
CISCai commited on
Commit
a05ca96
1 Parent(s): a8fd1a1

Improve FIM example

Browse files
Files changed (1) hide show
  1. README.md +21 -5
README.md CHANGED
@@ -185,13 +185,29 @@ from llama_cpp import Llama
185
 
186
  # Completion API
187
 
 
 
 
188
  llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
189
- print(llm.create_completion(
190
  temperature = 0.0,
191
- repeat_penalty = 1.1,
192
- prompt = "def add(",
193
- suffix = " return sum"
194
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  ```
196
 
197
  #### Simple llama-cpp-python example function calling code
 
185
 
186
  # Completion API
187
 
188
+ prompt = "def add("
189
+ suffix = "\n return sum\n\n"
190
+
191
  llm = Llama(model_path="./Codestral-22B-v0.1.IQ4_XS.gguf", n_gpu_layers=57, n_ctx=32768, spm_infill=True)
192
+ output = llm.create_completion(
193
  temperature = 0.0,
194
+ repeat_penalty = 1.0,
195
+ prompt = prompt,
196
+ suffix = suffix
197
+ )
198
+
199
+ # Models sometimes repeat suffix in response, attempt to filter that
200
+ response = output["choices"][0]["text"]
201
+ response_stripped = response.rstrip()
202
+ unwanted_response_suffix = suffix.rstrip()
203
+ unwanted_response_length = len(unwanted_response_suffix)
204
+
205
+ filtered = False
206
+ if unwanted_response_suffix and response_stripped[-unwanted_response_length:] == unwanted_response_suffix:
207
+ response = response_stripped[:-unwanted_response_length]
208
+ filtered = True
209
+
210
+ print(f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{prompt}\033[32m{response}\033[0m{suffix}")
211
  ```
212
 
213
  #### Simple llama-cpp-python example function calling code