from huggingface_hub import InferenceClient from fastapi import FastAPI from pydantic import BaseModel client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct") class Message(BaseModel): role: str content: str history = [ { "role": "assistant", "content": "You are a helpful assistant called Awerbot. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com", }] app = FastAPI() @app.post('/chat') async def chat(messages: list[Message]): for message in messages: history.append({'role':'user', 'content':message.content}) output = client.chat_completion( model="meta-llama/Meta-Llama-3-8B-Instruct", messages=history, max_tokens=100, stop="assistant" ) return output