import torch
import os
from fastapi import FastAPI, HTTPException
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from huggingface_hub import login
from pydantic import BaseModel

torch.set_num_threads(os.cpu_count()) 
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("Hugging Face token not found. Set the 'HF_TOKEN' environment variable.")

# Model name
# MODEL_NAME = "noideatt/Qwen2.5-0.5B-Writing"


base_model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it", token=HF_TOKEN)
model = PeftModel.from_pretrained(base_model, "noideatt/Gemma-3-1b-it-writing", token=HF_TOKEN)
model.to("cpu")
model = torch.compile(model)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it", token=HF_TOKEN)

# FastAPI app
app = FastAPI()

@app.get("/")
def home():
    return {"message": "Welcome to Writing Scoring Service"}
    
class GenerateRequest(BaseModel):
    prompt: str
    
@app.post("/generate")
def generate(request: GenerateRequest):
    instruction ="""You are an experienced IELTS Writing examiner specializing in Task 2 assessments.  
Evaluate candidates' essays based on the four official IELTS Writing Task 2 scoring criteria:  
1. Coherence and Cohesion (CC) – Are ideas logically organized and connected with appropriate linking words?
  - Logical structure (LS) - Introduction & conclusion present (ICP) -  Supported main points (SMP) -  Accurate linking words (ALW) -  Variety in linking words (VILW) - 
2. Lexical Resource (LR) – Is the vocabulary diverse, precise, and used appropriately?    
  - Varied vocabulary (VV) - Accurate spelling & word formation (ASWF) - 
3. Grammatical Range and Accuracy (GRA) – Is the grammar varied and used correctly?  
  - Mix of complex & simple sentences (MCSS) - Clear and correct grammar (CCG) - 
4. Task Response (TA) – Does the essay fully address all parts of the question with relevant, well-developed ideas?  
  - Complete response (CR) - Clear & comprehensive ideas (CCI) -  Relevant & specific examples (RSE) - 
Provide a band score from 4.0 to 9.0 for each criterion."""

    messages = [{"role": "system", "content": [{"type": "text", "text": instruction}]},
     {"role": "user", "content": [{"type": "text", "text": request.prompt}]}]
           
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cpu")

    outputs = model.generate(**inputs, max_new_tokens=250, num_return_sequences=1)
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = text.split("###Analysis:")[1]
    return {"generated_text": response}


# Run using: uvicorn app:app --host 0.0.0.0 --port 8000