lukestanley commited on
Commit
a96b492
1 Parent(s): 68a2a07

Makes URL more obvious, update comments, lowers temp

Browse files
Files changed (1) hide show
  1. utils.py +7 -8
utils.py CHANGED
@@ -2,14 +2,13 @@ import json
2
  from typing import Any, Dict, Union
3
  import requests
4
 
5
- from llama_cpp import (
6
- json_schema_to_gbnf,
7
- )
8
 
9
- # Only used directly to convert the JSON schema to GBNF,
10
-
11
- # The main interface is the HTTP server, not the library directly.
12
 
 
13
 
14
  def llm_streaming(
15
  prompt: str, pydantic_model_class, return_pydantic_object=False
@@ -27,7 +26,7 @@ def llm_streaming(
27
  "stream": True,
28
  "max_tokens": 1000,
29
  "grammar": grammar,
30
- "temperature": 1.0,
31
  "messages": [{"role": "user", "content": prompt}],
32
  }
33
  headers = {
@@ -35,7 +34,7 @@ def llm_streaming(
35
  }
36
 
37
  response = requests.post(
38
- "http://localhost:5834/v1/chat/completions",
39
  headers=headers,
40
  json=payload,
41
  stream=True,
 
2
  from typing import Any, Dict, Union
3
  import requests
4
 
5
+ from llama_cpp import json_schema_to_gbnf
 
 
6
 
7
+ # The llama_cpp Python HTTP server communicates with the AI model, similar
8
+ # to the OpenAI API but adds a unique "grammar" parameter.
9
+ # The real OpenAI API has other ways to set the output format.
10
 
11
+ URL = "http://localhost:5834/v1/chat/completions"
12
 
13
  def llm_streaming(
14
  prompt: str, pydantic_model_class, return_pydantic_object=False
 
26
  "stream": True,
27
  "max_tokens": 1000,
28
  "grammar": grammar,
29
+ "temperature": 0.7,
30
  "messages": [{"role": "user", "content": prompt}],
31
  }
32
  headers = {
 
34
  }
35
 
36
  response = requests.post(
37
+ URL,
38
  headers=headers,
39
  json=payload,
40
  stream=True,