# app.py
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
import uvicorn
import os

MODEL_REPO = "QuantFactory/Qwen2.5-7B-Instruct-GGUF"
MODEL_FILE = "Qwen2.5-7B-Instruct.Q4_K_M.gguf"
MODEL_PATH = f"/home/user/app/data/cache/{MODEL_FILE}"

# تأكد من أن النموذج موجود
if not os.path.exists(MODEL_PATH):
    from huggingface_hub import hf_hub_download
    os.makedirs("/home/user/app/data/cache", exist_ok=True)
    hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE,
        local_dir="/home/user/app/data/cache",
    )

if os.path.exists(MODEL_PATH):
    print(f"Model found at {MODEL_PATH}")
else:
    print(f"Model not found at {MODEL_PATH}")


# تحميل النموذج
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=16000,
    n_threads=6,
    n_gpu_layers=0,
    verbose=False
)

SYSTEM_PROMPT = """<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are an AI development assistant. Follow these rules:
1. If request is simple (single file, <50 lines), handle it directly
2. For complex requests (multiple files, >50 lines), just respond with \"CODER\"
3. Always check code for errors before sending
4. Never execute unsafe code<|im_end|>"""

# API setup
app = FastAPI()

class ChatRequest(BaseModel):
    message: str
    history: list[list[str]] = []  # [[user, assistant], ...]

class ChatResponse(BaseModel):
    response: str
    updated_history: list[list[str]]

def format_prompt(messages):
    chat = []
    for role, content in messages:
        if role == "system":
            chat.append(f"<|im_start|>system\n{content}<|im_end|>")
        elif role == "user":
            chat.append(f"<|im_start|>user\n{content}<|im_end|>")
        else:
            chat.append(f"<|im_start|>assistant\n{content}<|im_end|>")
    chat.append("<|im_start|>assistant\n")
    return "\n".join(chat)

@app.post("/chat", response_model=ChatResponse)
def chat(req: ChatRequest):
    messages = [("system", SYSTEM_PROMPT.strip())]
    for user_msg, bot_msg in req.history:
        messages.append(("user", user_msg))
        messages.append(("assistant", bot_msg))
    messages.append(("user", req.message))

    prompt = format_prompt(messages)
    output = llm(prompt, max_tokens=1024, temperature=0.7, top_p=0.9, repeat_penalty=1.05, stop=["<|im_end|>"])
    reply = output["choices"][0]["text"].split("<|im_end|>")[0].strip()
    req.history.append([req.message, reply])
    return ChatResponse(response=reply, updated_history=req.history)