Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import torch | |
| import re | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import StreamingResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.templating import Jinja2Templates | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from huggingface_hub import login | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import uvicorn | |
| # β Safe GPU decorator | |
| try: | |
| from spaces import GPU | |
| except ImportError: | |
| def GPU(func): return func | |
| # ---------------- FastAPI setup ---------------- | |
| app = FastAPI( | |
| title="ChatMate Real-Time API", | |
| description="LangChain + DuckDuckGo + Phi-4", | |
| version="1.0", | |
| docs_url="/apidocs", # Swagger UI at /apidocs | |
| redoc_url="/redoc" # ReDoc at /redoc | |
| ) | |
| # β Static + templates | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| templates = Jinja2Templates(directory="templates") | |
| # Enable CORS (important for browser clients) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # β Hugging Face login | |
| login(token=os.environ.get("CHAT_MATE")) | |
| # β Load model | |
| model_id = "microsoft/phi-4" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 | |
| ) | |
| device = 0 if torch.cuda.is_available() else -1 | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=device, | |
| max_new_tokens=512 | |
| ) | |
| def is_incomplete(text): | |
| return not re.search(r'[\.\!\?\'\"\u3002]\s*$', text.strip()) | |
| def generate_full_reply(message, history): | |
| system_prompt = ( | |
| "You are a friendly, helpful, and conversational AI assistant built by " | |
| "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you." | |
| ) | |
| messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}] | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| full_output = pipe(prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=512)[0]["generated_text"] | |
| reply = full_output[len(prompt):].strip() | |
| while is_incomplete(reply): | |
| continuation_prompt = prompt + reply | |
| next_output = pipe(continuation_prompt, do_sample=True, temperature=0.7, top_p=0.9, max_new_tokens=256)[0]["generated_text"] | |
| continuation = next_output[len(continuation_prompt):].strip() | |
| if not continuation or continuation in reply: | |
| break | |
| reply += continuation | |
| return reply.strip() | |
| # ---------------- Pydantic models ---------------- | |
| class ChatRequest(BaseModel): | |
| message: str | |
| history: list = [] | |
| # ---------------- FastAPI route ---------------- | |
| # ---------------- Routes ---------------- | |
| async def home(request: Request): | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| async def chat_stream(body: ChatRequest): | |
| """ | |
| Stream the AI assistant's reply token-by-token. | |
| """ | |
| def generate(): | |
| reply = generate_full_reply(body.message, body.history) | |
| for token in reply: | |
| yield token | |
| time.sleep(0.05) | |
| return StreamingResponse(generate(), media_type="text/plain") | |
| # ---------------- Startup warm-up ---------------- | |
| async def warmup_model(): | |
| print("π§ Warming up...") | |
| _ = generate_full_reply("Hello", []) | |
| # ---------------- Run with Uvicorn ---------------- | |
| # In Hugging Face Spaces, just run: uvicorn app:app --host 0.0.0.0 --port 7860 | |
| if __name__ == "__main__": | |
| # Hugging Face Spaces usually expects port 7860 | |
| port = int(os.environ.get("PORT", 7860)) | |
| # Run using uvicorn for FastAPI/Flask with ASGI wrapper | |
| uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False) |