daniellefranca96 commited on
Commit
c51a031
1 Parent(s): e53fe12

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +44 -14
main.py CHANGED
@@ -4,34 +4,64 @@ import requests
4
  from ctransformers import AutoModelForCausalLM
5
 
6
  llms = {
7
- "tinyllama":{"name": "TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF", "file":"tinyllama-1.1b-1t-openorca.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
8
- "tinyllama2":{"name": "TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF", "file":"tinyllama-1.1b-1t-openorca.Q3_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
9
- "tinyllama3":{"name": "TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF", "file":"tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
10
- "tinyllama4":{"name": "TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF", "file":"tinyllama-1.1b-chat-v0.3.Q2_K.gguf", "suffix":"<|im_end|><|im_start|>assistant", "prefix":"<|im_start|>system You are a helpful assistant <|im_end|><|im_start|>user"},
11
- "solar":{"name": "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF", "file":"solar-10.7b-instruct-v1.0.Q4_K_M.gguf", "suffix":"\n### Assistant:\n", "prefix":"### User:\n"},
12
- #"mixtral-moe":{"name": "TheBloke/Mixtral_7Bx2_MoE-GGUF", "file":"mixtral_7bx2_moe.Q4_K_M.gguf", "suffix":"\n### Assistant:\n", "prefix":"### User:\n"},
13
- #"phi2":{"name": "TheBloke/phi-2-GGUF", "file":"phi-2.Q4_K_M.gguf", "suffix":"", "prefix":""}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
15
 
16
- for k in llms.keys():
17
- AutoModelForCausalLM.from_pretrained(llms[k]['name'], model_file=llms[k]['file'])
18
-
19
  #Pydantic object
20
  class validation(BaseModel):
21
  prompt: str
22
  llm: str
 
23
  #Fast API
24
  app = FastAPI()
25
 
26
  @app.post("/llm_on_cpu")
27
  async def stream(item: validation):
28
 
29
- prefix=llms[item.llm]['prefix']
30
- suffix=llms[item.llm]['suffix']
 
 
 
31
  user="""
32
  {prompt}"""
33
 
34
- llm = AutoModelForCausalLM.from_pretrained(llms[item.llm]['name'], model_file=llms[item.llm]['file'])
35
 
36
  prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
37
- return llm(prompt)
 
4
  from ctransformers import AutoModelForCausalLM
5
 
6
  llms = {
7
+ "TinyLLama 1b 4_K_M 2048": {
8
+ "nctx": 2048,
9
+ "file": "tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf",
10
+ "prefix": "### Human:",
11
+ "suffix": "### Assistant:"
12
+ },
13
+ "TinyLLama 1b OpenOrca 4_K_M 2048": {
14
+ "nctx": 2048,
15
+ "file": "tinyllama-1.1b-1t-openorca.Q4_K_M.gguf",
16
+ "prefix": "<|im_start|>system You are a helpfull assistant<|im_end|><|im_start|>user",
17
+ "suffix": "<|im_end|><|im_start|>assistant"
18
+ },
19
+ "OpenLLama 3b 4_K_M 196k": {
20
+ "nctx": 80000,
21
+ "file": "open-llama-3b-v2-wizard-evol-instuct-v2-196k.Q4_K_M.gguf",
22
+ "prefix": "### HUMAN:",
23
+ "suffix": "### RESPONSE:"
24
+ },
25
+ "Phi-2 2.7b 4_K_M 2048": {
26
+ "nctx": 2048,
27
+ "file": "phi-2.Q4_K_M.gguf",
28
+ "prefix": "Instruct:",
29
+ "suffix": "Output:"
30
+ },
31
+ "Mixtral MOE 7bx2 4_K_M 32K": {
32
+ "nctx": 32000,
33
+ "file": "mixtral_7bx2_moe.Q4_K_M.gguf",
34
+ "prefix": "",
35
+ "suffix": ""
36
+ },
37
+ "Stable Zephyr 3b 4_K_M 4096": {
38
+ "nctx": 4096,
39
+ "file": "stablelm-zephyr-3b.Q4_K_M.gguf",
40
+ "prefix": "<|user|>",
41
+ "suffix": "<|endoftext|><|assistant|>"
42
+ }
43
  }
44
 
 
 
 
45
  #Pydantic object
46
  class validation(BaseModel):
47
  prompt: str
48
  llm: str
49
+
50
  #Fast API
51
  app = FastAPI()
52
 
53
  @app.post("/llm_on_cpu")
54
  async def stream(item: validation):
55
 
56
+ model = llms[item.llm]
57
+ prefix=model['prefix']
58
+ suffix=model['suffix']
59
+ nctx = model['nctx'] if 'nctx' in item.keys() else 1024
60
+ max_tokens = model['max_tokens'] if 'max_tokens' in item.keys() else 512
61
  user="""
62
  {prompt}"""
63
 
64
+ model = Llama(model_path="./"+model['file'], n_ctx=model['nctx'], verbose=False, n_threads=8)
65
 
66
  prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
67
+ return llm(prompt, max_tokens=max_tokens)