import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import os
import traceback

# --- Configuration ---
# Running a VERY SMALL model LOCALLY on the CPU to avoid API errors.
# EXPECT SLOW PERFORMANCE AND LOWER QUALITY OUTPUT.
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Max new tokens for generation
MAX_NEW_TOKENS = 512 # Keep lower for faster generation on CPU
# Default System Prompt - guides the AI's behavior
DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant specialized in writing basic Luau code for the Roblox platform. Format Luau code blocks clearly using ```lua ... ```."

# --- Model and Tokenizer Loading (Local) ---
print(f"Attempting to load model LOCALLY: {MODEL_NAME}")
print("This may take some time and use significant RAM...")

model = None
pipe = None
tokenizer = None

try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    print("Tokenizer loaded.")

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float32, # Use float32 for CPU compatibility usually
        # device_map='auto' might try to use GPU if detected, force CPU if needed
        # Consider forcing CPU if 'auto' causes issues on basic tier: device_map='cpu'
        device_map="auto",
        # trust_remote_code=True # Not typically needed for TinyLlama base, but add if errors occur
        # low_cpu_mem_usage=True # Can help loading large models, maybe useful here too
    )
    print("Model loaded successfully.")

    # --- Create the Text Generation Pipeline (Local) ---
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=MAX_NEW_TOKENS,
        temperature=0.7, # Standard temp
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id if tokenizer else 50256 # Default EOS for many models
    )
    print("Text generation pipeline created.")

except Exception as e:
    print("-----------------------------------------------------")
    print(f"ERROR during local model loading or pipeline creation.")
    print(f"Error Type: {type(e).__name__}")
    print(f"Error Details: {e}")
    print("Possible Causes:")
    print("  1. Model is too large for available RAM (16GB limit).")
    print("  2. Missing dependencies (check requirements.txt).")
    print("  3. Issues downloading model files.")
    print("Full Traceback:")
    print(traceback.format_exc())
    print("-----------------------------------------------------")
    # Ensure pipe is None so the function returns an error

# --- Chatbot Logic Function (Local Execution) ---
def luau_coder_local(message: str, history: list[list[str]], system_prompt: str = DEFAULT_SYSTEM_PROMPT):
    # Check if the model/pipeline failed to load
    if pipe is None or tokenizer is None:
        return "Error: AI model could not be loaded locally. Check Space logs for details (likely memory limit or dependency issue)."

    # --- Assemble the Prompt for TinyLlama Chat (ChatML format) ---
    # Format: <|system|>\nPROMPT</s>\n<|user|>\nPROMPT</s>\n<|assistant|>\nRESPONSE</s>
    prompt_list = []
    current_system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT

    # Add system prompt first
    if current_system_prompt:
         prompt_list.append(f"<|system|>\n{current_system_prompt}</s>")

    # Add history
    for user_msg, assistant_msg in history:
         prompt_list.append(f"<|user|>\n{user_msg}</s>")
         if assistant_msg: # Ensure assistant message exists
              prompt_list.append(f"<|assistant|>\n{assistant_msg}</s>")

    # Add the current user message
    prompt_list.append(f"<|user|>\n{message}</s>")

    # Add the final model turn marker, signaling the model to start generating
    prompt_list.append("<|assistant|>") # Model generates after this

    # Combine the list into a single string prompt
    prompt = "\n".join(prompt_list)

    print(f"\n--- Generating Response Locally (length: {len(prompt)}) ---")
    # print(f"Prompt Content:\n{prompt}") # Uncomment for debugging

    try:
        # Generate response using the local pipeline
        # NOTE: This will be SLOW on CPU
        sequences = pipe(prompt)
        generated_text = sequences[0]['generated_text']

        print(f"Raw generated text received (length: {len(generated_text)})")

        # Extract the response part after the final marker
        response_start_index = generated_text.rfind('<|assistant|>')
        if response_start_index != -1:
             response = generated_text[response_start_index + len('<|assistant|>'):].strip()
        else:
             # Fallback if the marker wasn't found (shouldn't happen often)
             print("Warning: Could not find '<|assistant|>' marker in response.")
             response = generated_text # Use the whole text, might include prompt

        # Clean up the response text - remove EOS token
        response_cleaned = response.replace("</s>", "").strip()

        # Try to format as Lua code block if needed
        if "```lua" not in response_cleaned and "```" in response_cleaned:
             response_formatted = response_cleaned.replace("```", "```lua", 1)
        elif "```" not in response_cleaned:
             # Basic check if it looks like code, then wrap it
             if any(kw in response_cleaned for kw in ["function", "local ", "game.", "script.", "Instance.new", "Vector3.new", ":Connect", "wait("]):
                  response_formatted = f"```lua\n{response_cleaned}\n```"
             else:
                  response_formatted = response_cleaned # Assume plain text
        else:
             response_formatted = response_cleaned # Assume already formatted correctly

        # Handle cases where the response might be empty after cleaning
        if not response_cleaned:
            # Return the raw generated text if cleaning resulted in nothing, maybe EOS was the only thing generated
            if generated_text and generated_text != prompt:
                 return generated_text.replace("</s>", "").strip()
            else:
                 return "Sorry, the AI generated an empty response."

        return response_formatted

    # Catch potential errors during generation
    except Exception as e:
        error_type = type(e).__name__
        print(f"Error during local generation ({error_type}): {e}")
        print(traceback.format_exc()) # Log the full error for debugging
        return f"Sorry, an unexpected error occurred during local generation ({error_type})."

# --- Gradio Interface Setup ---
print("Setting up Gradio Interface...")
chatbot_interface = gr.ChatInterface(
    fn=luau_coder_local, # Connects the chat input to our LOCAL function
    title="🐌 Luau Code Assistant (Roblox - Local CPU Version)", # Updated title
    description=f"Ask me for basic Luau code. Responses will be SLOW. (Powered by {MODEL_NAME} running locally)", # Updated description
    chatbot=gr.Chatbot(height=600, render_markdown=True, bubble_full_width=False),
    textbox=gr.Textbox(placeholder="e.g., 'Make a part print Hello'", container=False, scale=7),
    additional_inputs=[gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="System Prompt")],
    examples=[
        ["Make a part print 'Hi!' when touched"],
        ["How to make a variable in Luau?"],
        ["Change part color to red"],
    ], # Simplified examples for a smaller model
    theme="soft",
)
print("Gradio Interface configured.")

# --- Launch the Application ---
if __name__ == "__main__":
    print("Launching Gradio app...")
    # queue() is still important for local generation as it can take time
    chatbot_interface.queue().launch()
    print("Gradio app should be running now.")