daniellefranca96 commited on
Commit
d899b2a
1 Parent(s): 33c2fff

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -4
main.py CHANGED
@@ -1,9 +1,20 @@
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  import requests
4
  from ctransformers import AutoModelForCausalLM
5
 
6
- llm = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.6", model_file="ggml-model-q4_0.gguf")
 
 
 
 
 
 
 
 
 
 
7
 
8
  #Pydantic object
9
  class validation(BaseModel):
@@ -13,11 +24,15 @@ app = FastAPI()
13
 
14
  @app.post("/llm_on_cpu")
15
  async def stream(item: validation):
16
- prefix="""<|user|>
17
- """
18
- suffix="""<|endoftext|><|assistant|>"""
 
 
19
  user="""
20
  {prompt}"""
 
 
21
 
22
  prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
23
  return llm(prompt)
 
1
+ from llama_cpp import Llama
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
  import requests
5
  from ctransformers import AutoModelForCausalLM
6
 
7
+ llms = {
8
+ "tinnyllama":{"name": "TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF", "file":"tinyllama-1.1b-1t-openorca.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
9
+ "orca2":{"name": "TheBloke/Orca-2-7B-GGUF", "file":"orca-2-7b.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant<|im_end|><|im_start|>user "},
10
+ "zephyr":{"name": "TheBloke/zephyr-7B-beta-GGUF", "file":"zephyr-7b-beta.Q4_K_M.gguf", "suffix":"</s><|assistant|>", "prefix":"<|system|>You are a helpful assistant</s><|user|> "},
11
+ "mixtral":{"name": "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "file":"mistral-7b-instruct-v0.1.Q4_K_M.gguf", "suffix":"[/INST]", "prefix":"<s>[INST] "},
12
+ "llama2":{"name": "TheBloke/Llama-2-7B-Chat-GGUF", "file":"llama-2-7b-chat.Q4_K_M.gguf", "suffix":"[/INST]", "prefix":"[INST] <<SYS>> You are a helpful assistant <</SYS>>"},
13
+ "llama2":{"name": "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF", "file":"solar-10.7b-instruct-v1.0.Q4_K_M.gguf", "suffix":"\n### Assistant:\n", "prefix":"### User:\n"}
14
+ }
15
+
16
+ for k in llms.keys():
17
+ AutoModelForCausalLM.from_pretrained(llms[k]['name'], model_file=llms[k]['file'])
18
 
19
  #Pydantic object
20
  class validation(BaseModel):
 
24
 
25
  @app.post("/llm_on_cpu")
26
  async def stream(item: validation):
27
+
28
+
29
+
30
+ prefix=llms[item.llm]['prefix']
31
+ suffix=llms[item.llm]['suffix']
32
  user="""
33
  {prompt}"""
34
+
35
+ llm = AutoModelForCausalLM.from_pretrained(llms[item.llm]['name'], model_file=llms[item.llm]['file'])
36
 
37
  prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
38
  return llm(prompt)