Spaces:

xset
/

aibot

Sleeping

App Files Files Community

aibot / main.py

xset

debug things

77f5a26 7 months ago

raw

history blame

2.23 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import torch
	import numpy as np
	import logging

	# Проверка версии NumPy
	assert np.__version__.startswith('1.'), f"Несовместимая версия NumPy: {np.__version__}"

	app = FastAPI(root_path="/")

	class RequestData(BaseModel):
	prompt: str
	max_tokens: int = 50

	MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

	try:
	# Загрузка модели с явным указанием device_map
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float32,
	device_map="auto",
	low_cpu_mem_usage=True
	)

	# Создаем pipeline без указания device
	generator = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer
	)
	except Exception as e:
	print(f"Ошибка загрузки модели: {str(e)}")
	generator = None


	logging.basicConfig(level=logging.INFO)

	@app.on_event("startup")
	async def startup_event():
	routes = [route.path for route in app.routes]
	print(f"Registered routes: {routes}")

	@app.get("/")
	async def root_health_check():
	return {"status": "ok"}

	@app.post("/")
	async def generate_text(request: RequestData):
	print("Incoming path:", request.url.path)
	print("Headers:", dict(request.headers))
	body = await request.body()
	print("Raw body:", body)

	if not request.prompt:
	return JSONResponse(content={"status": "ok"})

	if not generator:
	raise HTTPException(status_code=503, detail="Модель не загружена")

	try:
	output = generator(
	request.prompt,
	max_new_tokens=min(request.max_tokens, 100),
	do_sample=False,
	num_beams=1,
	temperature=0.7,
	)
	return {"response": output[0]["generated_text"]}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/health")
	async def health_check():
	return {"status": "ok" if generator else "unavailable"}