LLaMa3-assistant / main.py
Do0rMaMu's picture
Create main.py
2b25d5e verified
raw
history blame
No virus
894 Bytes
from transformers import AutoModelForCausalLM
from fastapi import FastAPI, Form
from pydantic import BaseModel
# Model loading
llm = AutoModelForCausalLM.from_pretrained(
"Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
max_new_tokens=1096,
threads=3,
)
# Pydantic object
class Validation(BaseModel):
user_prompt: str # User's prompt
system_prompt: str # System's instruction
# FastAPI application
app = FastAPI()
# Endpoint for generating responses
@app.post("/generate_response")
async def generate_response(item: Validation):
prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n {item.system_prompt}<|eot_id|> \n <|start_header_id|>user<|end_header_id|>{item.user_prompt} <|eot_id|><|start_header_id|>{assistant}<|end_header_id|>"
return llm.generate(prompt, do_sample=True) # Adjusted to include the generation method with do_sample if needed