from transformers import AutoModelForCausalLM from fastapi import FastAPI, Form from pydantic import BaseModel # Model loading llm = AutoModelForCausalLM.from_pretrained( "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", max_new_tokens=1096, threads=3, ) # Pydantic object class Validation(BaseModel): user_prompt: str # User's prompt system_prompt: str # System's instruction # FastAPI application app = FastAPI() # Endpoint for generating responses @app.post("/generate_response") async def generate_response(item: Validation): prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n {item.system_prompt}<|eot_id|> \n <|start_header_id|>user<|end_header_id|>{item.user_prompt} <|eot_id|><|start_header_id|>{assistant}<|end_header_id|>" return llm.generate(prompt, do_sample=True) # Adjusted to include the generation method with do_sample if needed