|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
torch.random.manual_seed(0) |
|
|
|
class Message(BaseModel): |
|
role: str |
|
content: str |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"microsoft/Phi-3-mini-4k-instruct", |
|
device_map="cpu", |
|
torch_dtype="auto", |
|
trust_remote_code=True, |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") |
|
|
|
history = [ |
|
{"role": "assistant", "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user."}, |
|
] |
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
) |
|
|
|
generation_args = { |
|
"max_new_tokens": 500, |
|
"return_full_text": False, |
|
"temperature": 0.0, |
|
"do_sample": False, |
|
} |
|
|
|
|
|
def chat(messages: list[Message]) -> str: |
|
|
|
for message in messages: |
|
history.append({'role':'user', 'content':message.content}) |
|
|
|
generated_text = pipe(history, **generation_args) |
|
|
|
print('Generated Text', generated_text) |
|
history.append({'role':'assistant', 'content':generated_text[0]['generated_text']}) |
|
return generated_text[0]['generated_text'] |
|
|
|
app = FastAPI() |
|
|
|
@app.post('/chat') |
|
async def root(messages: list[Message]): |
|
return chat(messages) |
|
|
|
|