Update main.py
Browse files
main.py
CHANGED
@@ -4,7 +4,7 @@ from pydantic import BaseModel
|
|
4 |
|
5 |
#Model loading
|
6 |
llm = AutoModelForCausalLM.from_pretrained("Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
|
7 |
-
model_type='llama',
|
8 |
max_new_tokens = 1096,
|
9 |
threads = 3,
|
10 |
)
|
@@ -21,7 +21,11 @@ app = FastAPI()
|
|
21 |
@app.post("/llm_on_cpu")
|
22 |
async def stream(item: validation):
|
23 |
system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
27 |
return llm(prompt)
|
|
|
4 |
|
5 |
#Model loading
|
6 |
llm = AutoModelForCausalLM.from_pretrained("Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
|
7 |
+
# model_type='llama',
|
8 |
max_new_tokens = 1096,
|
9 |
threads = 3,
|
10 |
)
|
|
|
21 |
@app.post("/llm_on_cpu")
|
22 |
async def stream(item: validation):
|
23 |
system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
|
24 |
+
prompt = f'''
|
25 |
+
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
26 |
+
|
27 |
+
{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>
|
28 |
+
|
29 |
+
{item.prompt.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
30 |
+
'''
|
31 |
return llm(prompt)
|