import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from fastapi import FastAPI from pydantic import BaseModel torch.random.manual_seed(0) class Message(BaseModel): role: str content: str model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-4k-instruct", device_map="cpu", torch_dtype="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") history = [ {"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."}, ] pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, ) generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 0.0, "do_sample": False, } def chat(messages: list[Message]) -> str: for message in messages: history.append({'role':'user', 'content':message.content}) generated_text = pipe(history, **generation_args) print('Generated Text', generated_text) history.append({'role':'assistant', 'content':generated_text[0]['generated_text']}) return generated_text[0]['generated_text'] app = FastAPI() @app.post('/chat') async def root(messages: list[Message]): return chat(messages)