import argparse
from tqdm import tqdm
import json
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
import sys
sys.path.append(f'../source')
import cefr_utils
import torch
from huggingface_hub import snapshot_download
from huggingface_hub import login
import gradio as gr
import os

description = {
    "C2": "Has a good command of idiomatic expressions and colloquialisms with awareness of connotative levels of meaning. Can convey finer shades of meaning precisely by using, with reasonable accuracy, a wide range of modification devices. Can backtrack and restructure around a difficulty so smoothly that the interlocutor is hardly aware of it.",
    "C1": "Can express themselves fluently and spontaneously, almost effortlessly. Has a good command of a broad lexical repertoire allowing gaps to be readily overcome with circumlocutions. There is little obvious searching for expressions or avoidance strategies; only a conceptually difficult subject can hinder a natural, smooth flow of language.",
    "B2": "Can interact with a degree of fluency and spontaneity that makes regular interaction, and sustained relationships with users of the target language, quite possible without imposing strain on either party. Can highlight the personal significance of events and experiences, and account for and sustain views clearly by providing relevant explanations and arguments.",
    "B1": "Can communicate with some confidence on familiar routine and non-routine matters related to their interests and professional field. Can exchange, check and confirm information, deal with less routine situations and explain why something is a problem. Can express thoughts on more abstract, cultural topics such as films, books, music, etc.",
    "A2": "Can interact with reasonable ease in structured situations and short conversations, provided the other person helps if necessary. Can manage simple, routine exchanges without undue effort; can ask and answer questions and exchange ideas and information on familiar topics in predictable everyday situations.",
    "A1": "Can interact in a simple way but communication is totally dependent on repetition at a slower rate, rephrasing and repair. Can ask and answer simple questions, initiate and respond to simple statements in areas of immediate need or on very familiar topics."
}

def parse_response(response, format="A: "):
    if format in response:
        return response[response.index(format)+len(format):]
    return response

"""CEFR Classifier Script"""
from CEFR_evaluator.level_model import LevelEstimaterClassification 

ckpt_path = "CEFR_evaluator/level_estimator.ckpt"

token = os.environ.get("Token1")
login(token=token)
snapshot_download(repo_id="shivansarora/cefr-evaluator", local_dir="CEFR_evaluator")

cefr_model = LevelEstimaterClassification.load_from_checkpoint(
    ckpt_path,
    pretrained_model="bert-base-cased",
    lm_layer=11,
    num_labels=6,
    alpha=0.2,
    strict=False,  
    with_loss_weight=False,     
    corpus_path=None,           
    test_corpus_path=None,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
state_dict = torch.load(ckpt_path, map_location=device)["state_dict"]
cefr_model.load_state_dict(state_dict, strict=False)  # ignore extra keys
cefr_model.eval().to(device)
cefr_model.eval()

cefr_tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
cefr_model.to(device)

labels = ["A1", "A2", "B1", "B2", "C1", "C2"]

def detect_cefr_level(text: str) -> str:
    """Detect the CEFR level of a given text."""
    inputs = cefr_tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        logits = cefr_model(inputs)
        if isinstance(logits, tuple):
            _, preds, _ = logits
        else:
            preds = logits
        pred_label = labels[int(preds[0])]
    return pred_label

"""Response Generation Script"""
def get_response(prompt):
    response_list = cefr_utils.generate(llm_model, llm_tokenizer, [prompt])
    response_str = "".join(response_list) if isinstance(response_list, list) else str(response_list)
    return parse_response(response_str)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

model_string = "meta-llama/Meta-Llama-3-8B-Instruct"
llm_tokenizer = AutoTokenizer.from_pretrained(model_string)
llm_tokenizer.pad_token = llm_tokenizer.eos_token
llm_model = AutoModelForCausalLM.from_pretrained(
    model_string,
    device_map="auto",                    
    quantization_config=bnb_config
)

responses = []
conversation_history = []
MAX_TURNS = 5  # Limit the number of turns to keep context manageable
def chat(user_input):
    global conversation_history

    if not user_input.strip():
        return conversation_history, "Please enter a message."

    # 2) Detect CEFR from input context
    detected_level = detect_cefr_level(user_input)
    print(f"[DEBUG] Detected CEFR = {detected_level} for context: {user_input}")

    # 3) Build prompt using detected CEFR
    conversation_history.append({"role": "user", "text": user_input, "CEFR": detected_level})
    recent_turns = conversation_history[-MAX_TURNS*2:]  # *2 because each turn has user+model

    item = {"context": recent_turns, "CEFR": detected_level, "response": ""}
    item = cefr_utils.get_CEFR_prompt(item, apply_chat_template=llm_tokenizer.apply_chat_template)
    print(f"[DEBUG] Prompt for response generation: {item['prompt']}")

    # 4) Generate response
    response = get_response(item['prompt'])
    print(f"[{detected_level}] {response}")

    # 5) Update conversation history
    conversation_history.append({"role": "model", "text": response, "CEFR": detected_level})

    gradio_history = []
    for turn in conversation_history:
        if turn["role"] == "user":
            gradio_history.append((turn['text'], None))
        else:
            gradio_history[-1] = (gradio_history[-1][0], turn["text"])
            
    return gradio_history, ""

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="Adaptive CEFR chatbot")
    msg = gr.Textbox(placeholder="Type your message here...")
    msg.submit(chat, inputs=msg, outputs=[chatbot, msg])

demo.launch(server_name="0.0.0.0", server_port=7860, share=False, ssr_mode=False)