from huggingface_hub import InferenceClient from fastapi import FastAPI from pydantic import BaseModel client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct") class Message(BaseModel): role: str content: str system = [ { "role": "system", "content": "You are a helpful assistant called Awerbot. Never say your are an assistant. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com. You must generate responses shorter than 100 characters", }] app = FastAPI() @app.post('/chat') async def chat(messages: list[Message]): inputs = [{ "role": "system", "content": "You are a helpful assistant called Awerbot. Never say your are an assistant. You will help visitors of a website with their inquiries, Awersoft is a Software Development company, to contact to Awersoft the visitors should use contact@awersoft.com", }] for message in messages: inputs.append({'role':message.role, 'content':message.content}) output = client.chat_completion( model="meta-llama/Meta-Llama-3-8B-Instruct", messages=inputs, max_tokens=100, stop="assistant" ) output_message = output["choices"][0]["message"] # Split the content into words words = output_message["content"].split() # Check if the last word contains "assistant" and remove it if it does last_word = words[-1] if last_word.endswith("assistant"): words[-1] = last_word[:-len("assistant")] # Remove "assistant" from the last word # Join the words back together to form the updated content updated_content = ' '.join(words) # Update the content in the output message output_message["content"] = updated_content return output_message