import streamlit as st
import torch
import yaml
from transformers import AutoTokenizer, AutoModelForCausalLM

# Set page config first
st.set_page_config(page_title="Coding Multiple Choice Q&A", layout="wide")

# Use the specified model
MODEL_PATH = "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4"


from coding_examples import CODING_EXAMPLES_BY_CATEGORY

# Flatten examples
CODING_EXAMPLES = []
for category, examples in CODING_EXAMPLES_BY_CATEGORY.items():
    for example in examples:
        example["category"] = category
        CODING_EXAMPLES.append(example)

class PromptCreator:
    def __init__(self, prompt_type="yaml"):
        self.prompt_type = prompt_type
        
    def format_choices(self, choices):
        if not choices: return ""
        if isinstance(choices, str): return choices
        return "\n".join(f"{chr(65 + i)}. {choice}" for i, choice in enumerate(choices))

    def get_max_letter(self, choices):
        if not choices: return "A"
        if isinstance(choices, str):
            num_choices = len([line for line in choices.split("\n") if line.strip()])
            return "A" if num_choices == 0 else chr(64 + num_choices)
        return chr(64 + len(choices))

    def create_inference_prompt(self, question, choices):
        if not question: return ""
        formatted_choices = self.format_choices(choices)
        max_letter = self.get_max_letter(choices)
        
        return f"""Question: {question}

Choices:
{formatted_choices}

Analyze this question step-by-step and provide a detailed explanation.
Your response MUST be in YAML format as follows:

understanding: |
  <your understanding of what the question is asking>
analysis: |
  <your analysis of each option>
reasoning: |
  <your step-by-step reasoning process>
conclusion: |
  <your final conclusion>
answer: <single letter A through {max_letter}>

The answer field MUST contain ONLY a single character letter."""

class QwenModelHandler:
    def __init__(self, model_path):
        with st.spinner("Loading model..."):
            try:
                # Explicitly disable quantization options
                self.tokenizer = AutoTokenizer.from_pretrained(
                    model_path,
                    trust_remote_code=True
                )
                
                # Load with standard precision on CPU
                from peft import PeftModel
                from transformers import AutoModelForCausalLM

                base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-1.5B-Instruct")
                self.model = PeftModel.from_pretrained(base_model, "tuandunghcmut/Qwen25_Coder_MultipleChoice_v4")
                # self.model = AutoModelForCausalLM.from_pretrained(
                #     model_path,
                #     torch_dtype=torch.float32,
                #     device_map="cpu",
                #     trust_remote_code=True,
                #     # Explicitly disable quantization
                #     load_in_8bit=False,
                #     load_in_4bit=False
                # )
                
                if self.tokenizer.pad_token is None and self.tokenizer.eos_token is not None:
                    self.tokenizer.pad_token = self.tokenizer.eos_token
            except Exception as e:
                st.error(f"Error: {str(e)}")
                raise

    def generate_response(self, prompt, max_tokens=512, temperature=0.7, 
                          top_p=0.9, top_k=50, repetition_penalty=1.0, 
                          do_sample=True):
        try:
            inputs = self.tokenizer(prompt, return_tensors="pt")
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    max_new_tokens=max_tokens,
                    temperature=temperature,
                    top_p=top_p,
                    top_k=top_k,
                    repetition_penalty=repetition_penalty,
                    do_sample=do_sample,
                    pad_token_id=self.tokenizer.eos_token_id,
                )
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            if prompt in response:
                response = response[len(prompt):].strip()
            return response
        except Exception as e:
            return f"Error during generation: {str(e)}"

# Create prompt without requiring model
def create_prompt(question, choices):
    creator = PromptCreator(prompt_type="yaml")
    return creator.create_inference_prompt(question, choices)

def main():
    # Initialize session state
    if 'model_loaded' not in st.session_state:
        st.session_state.model_loaded = False
    if 'model_output' not in st.session_state:
        st.session_state.model_output = ""
    
    st.title("Coding Multiple Choice Q&A with YAML Reasoning")
    st.warning("⚠️ Running on CPU - model loading and inference will be slow")
    
    # Two-column layout
    col1, col2 = st.columns([4, 6])
    
    with col1:
        st.subheader("Examples")
        
        # Category selector
        category_options = ["All Categories"] + list(CODING_EXAMPLES_BY_CATEGORY.keys())
        selected_category = st.selectbox("Select a category", category_options)
        
        # Example selector
        if selected_category == "All Categories":
            example_options = [f"Example {i+1}: {ex['question']}" for i, ex in enumerate(CODING_EXAMPLES)]
        else:
            example_options = []
            start_idx = 0
            for cat, examples in CODING_EXAMPLES_BY_CATEGORY.items():
                if cat == selected_category:
                    example_options = [f"Example {start_idx+i+1}: {ex['question']}" for i, ex in enumerate(examples)]
                    break
                start_idx += len(examples)
        
        selected_example = st.selectbox("Select an example question", [""] + example_options)
        
        # Process selected example
        if selected_example:
            try:
                example_idx = int(selected_example.split(":")[0].split()[-1]) - 1
                example = CODING_EXAMPLES[example_idx]
                question = example["question"]
                choices = "\n".join(f"{chr(65+i)}. {choice}" for i, choice in enumerate(example["choices"]))
            except:
                question = ""
                choices = ""
        else:
            question = ""
            choices = ""
        
        st.subheader("Your Question")
        question_input = st.text_area("Question", value=question, height=100, 
                                     placeholder="Enter your coding question here...")
        
        choices_input = st.text_area("Choices", value=choices, height=150,
                                    placeholder="Enter each choice on a new line...")
        
        # Model Parameters
        temperature = st.slider("Temperature", 0.0, 1.0, 0.7, 0.1)
        
        with st.expander("Advanced Parameters"):
            max_tokens = st.slider("Max Tokens", 128, 1024, 512, 128)
            top_p = st.slider("Top-p", 0.1, 1.0, 0.9, 0.1)
            top_k = st.slider("Top-k", 1, 100, 50, 10)
            repetition_penalty = st.slider("Repetition Penalty", 1.0, 2.0, 1.1, 0.1)
            do_sample = st.checkbox("Enable Sampling", True)
        
        # Load model button
        if not st.session_state.model_loaded:
            if st.button("Load Model", type="primary"):
                try:
                    st.session_state.model_handler = QwenModelHandler(MODEL_PATH)
                    st.session_state.prompt_creator = PromptCreator("yaml")
                    st.session_state.model_loaded = True
                    # st.experimental_rerun()
                    st.rerun()
                except Exception as e:
                    st.error(f"Failed to load model: {str(e)}")
        
        # Generate button
        if st.session_state.model_loaded:
            generate_button = st.button("Generate Response", type="primary")
        else:
            st.info("Please load the model first")
            generate_button = False
    
    with col2:
        # Show prompt
        st.subheader("Model Input")
        if question_input and choices_input:
            prompt = create_prompt(question_input, choices_input)
            st.text_area("Prompt", value=prompt, height=200, disabled=True)
        else:
            st.text_area("Prompt", value="", height=200, disabled=True)
        
        # Results Area
        st.subheader("Model Response")
        st.text_area("Response", value=st.session_state.model_output, height=300)
        
        # YAML parsing
        if st.session_state.model_output:
            try:
                with st.expander("Raw Output"):
                    st.code(st.session_state.model_output, language="yaml")
                
                try:
                    yaml_data = yaml.safe_load(st.session_state.model_output)
                    with st.expander("Parsed Output", expanded=True):
                        st.json(yaml_data)
                except:
                    st.warning("Could not parse output as YAML")
            except:
                pass
    
    # Handle generation
    if generate_button and st.session_state.model_loaded:
        if not question_input or not choices_input:
            st.error("Please provide both a question and choices.")
        else:
            try:
                prompt = st.session_state.prompt_creator.create_inference_prompt(question_input, choices_input)
                with st.spinner("Generating response..."):
                    response = st.session_state.model_handler.generate_response(
                        prompt=prompt,
                        max_tokens=max_tokens,
                        temperature=temperature,
                        top_p=top_p,
                        top_k=top_k,
                        repetition_penalty=repetition_penalty,
                        do_sample=do_sample
                    )
                    st.session_state.model_output = response
                    st.experimental_rerun()
            except Exception as e:
                st.error(f"Error generating response: {e}")

if __name__ == "__main__":
    main()