NERDDISCO commited on
Commit
e0b5928
1 Parent(s): 9eae9de

feat: changed model to "WizardCoder-15B-1.0.ggmlv3.q4_0" and increased max_tokens to 1024

Browse files
Files changed (1) hide show
  1. main.py +2 -2
main.py CHANGED
@@ -15,7 +15,7 @@ from ctransformers import AutoModelForCausalLM
15
  from pydantic import BaseModel
16
 
17
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
18
- model_file="WizardCoder-15B-1.0.ggmlv3.q5_0.bin",
19
  model_type="starcoder",
20
  threads=8)
21
  app = fastapi.FastAPI(title="🪄WizardCoder💫")
@@ -52,7 +52,7 @@ class Message(BaseModel):
52
 
53
  class ChatCompletionRequest(BaseModel):
54
  messages: List[Message]
55
- max_tokens: int = 250
56
 
57
  @app.post("/v1/completions")
58
  async def completion(request: ChatCompletionRequestV0, response_mode=None):
 
15
  from pydantic import BaseModel
16
 
17
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
18
+ model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
19
  model_type="starcoder",
20
  threads=8)
21
  app = fastapi.FastAPI(title="🪄WizardCoder💫")
 
52
 
53
  class ChatCompletionRequest(BaseModel):
54
  messages: List[Message]
55
+ max_tokens: int = 1024
56
 
57
  @app.post("/v1/completions")
58
  async def completion(request: ChatCompletionRequestV0, response_mode=None):