aibot / main.py
xset's picture
debug things
77f5a26
raw
history blame
2.23 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import numpy as np
import logging
# Проверка версии NumPy
assert np.__version__.startswith('1.'), f"Несовместимая версия NumPy: {np.__version__}"
app = FastAPI(root_path="/")
class RequestData(BaseModel):
prompt: str
max_tokens: int = 50
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
try:
# Загрузка модели с явным указанием device_map
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="auto",
low_cpu_mem_usage=True
)
# Создаем pipeline без указания device
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)
except Exception as e:
print(f"Ошибка загрузки модели: {str(e)}")
generator = None
logging.basicConfig(level=logging.INFO)
@app.on_event("startup")
async def startup_event():
routes = [route.path for route in app.routes]
print(f"Registered routes: {routes}")
@app.get("/")
async def root_health_check():
return {"status": "ok"}
@app.post("/")
async def generate_text(request: RequestData):
print("Incoming path:", request.url.path)
print("Headers:", dict(request.headers))
body = await request.body()
print("Raw body:", body)
if not request.prompt:
return JSONResponse(content={"status": "ok"})
if not generator:
raise HTTPException(status_code=503, detail="Модель не загружена")
try:
output = generator(
request.prompt,
max_new_tokens=min(request.max_tokens, 100),
do_sample=False,
num_beams=1,
temperature=0.7,
)
return {"response": output[0]["generated_text"]}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "ok" if generator else "unavailable"}