import streamlit as st
from groq import Groq
import base64
from io import BytesIO
from typing import List, Dict, Optional
import logging
import os
from PIL import Image
import numpy as np

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Groq client - preferably use environment variables for API keys
api_key = os.environ.get("GROQ_API_KEY", "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")
client = Groq(api_key=api_key)

# Convert PIL image to base64 for Groq API
def image_to_base64(image):
    try:
        if image is None:
            logger.error("No image provided to convert to base64")
            return None
            
        # Convert numpy array to PIL Image if necessary
        if isinstance(image, np.ndarray):
            image = Image.fromarray(image)
            
        buffered = BytesIO()
        image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
        logger.info(f"Successfully converted image to base64 (length: {len(img_str)})")
        return img_str
    except Exception as e:
        logger.error(f"Error converting image to base64: {str(e)}")
        return None

# Classify query using Compound Beta
def classify_query(question: str) -> str:
    try:
        logger.info(f"Classifying query: '{question}'")
        response = client.chat.completions.create(
            model="compound-beta",
            messages=[
                {"role": "system", "content": "Determine if the following question requires image processing to answer. Respond with 'yes' or 'no'. Do not include any additional text or explanations."},
                {"role": "user", "content": question}
            ],
            max_tokens=5
        )
        classification = response.choices[0].message.content.strip().lower()
        logger.info(f"Query classification result: {classification}")
        return classification
    except Exception as e:
        logger.error(f"Error in query classification: {str(e)}")
        # Default to text-only on error
        return "no"

# Process image with Llama 4 Maverick
def process_image_with_maverick(image, question: str) -> str:
    try:
        logger.info("Starting image processing with Maverick")
        
        if image is None:
            logger.error("Image is None, cannot process with Maverick")
            return "Error: No image provided for analysis."
            
        image_base64 = image_to_base64(image)
        if image_base64 is None:
            logger.error("Failed to convert image to base64")
            return "Error: Could not process the image."
        
        logger.info("Sending request to Maverick model")
        
        # Create the message content with both text and image
        content = [
            {"type": "text", "text": question},
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}}
        ]
        
        logger.info(f"Content types in request: {[item['type'] for item in content]}")
        
        response = client.chat.completions.create(
            model="meta-llama/llama-4-maverick-17b-128e-instruct",
            messages=[
                {
                    "role": "user",
                    "content": content
                }
            ],
            max_tokens=200
        )
        
        maverick_answer = response.choices[0].message.content
        logger.info(f"Successfully received Maverick response of length: {len(maverick_answer)}")
        return maverick_answer
    except Exception as e:
        logger.error(f"Error in image processing with Maverick: {str(e)}")
        return f"Error in image processing: {str(e)}"

# Generate conversational response with DeepSeek-R1-Distill-Llama-70B
def generate_response(prompt: str, history: List[Dict[str, str]]) -> str:
    try:
        logger.info(f"Generating response using DeepSeek for prompt: '{prompt[:50]}...'")
        
        messages = [{"role": "system", "content": "You are a helpful conversational chatbot. Provide concise, relevant responses based on the input."}]
        
        # Add conversation history
        for entry in history:
            messages.append({"role": "user", "content": entry["user"]})
            messages.append({"role": "assistant", "content": entry["bot"]})
        
        # Add current prompt
        messages.append({"role": "user", "content": prompt})
        
        logger.info(f"Sending request to DeepSeek with {len(messages)} messages")
        
        response = client.chat.completions.create(
            model="deepseek-r1-distill-llama-70b",
            messages=messages,
            max_tokens=500
        )
        
        deepseek_response = response.choices[0].message.content.strip()
        logger.info(f"Successfully received DeepSeek response of length: {len(deepseek_response)}")
        return deepseek_response
    except Exception as e:
        logger.error(f"Error in response generation with DeepSeek: {str(e)}")
        return f"Error in response generation: {str(e)}"

# Process image and question
def process_image_and_question(image, question: str, history: List[Dict[str, str]]) -> tuple[str, List[Dict[str, str]]]:
    # Input validation
    if not question or question.strip() == "":
        return "Please provide a question.", history

    logger.info(f"Processing request - Image provided: {image is not None}, Question: '{question}'")
    
    # Check if image is present and valid
    image_valid = image is not None
    
    if image_valid:
        logger.info("Image is present, determining if image processing is needed")
        # Classify if we need image processing
        classification = classify_query(question)
        
        needs_image = classification == "yes"
        logger.info(f"Classification result: {classification}, Will use image: {needs_image}")
    else:
        logger.info("No image provided, proceeding with text-only processing")
        needs_image = False
    
    # Process based on classification
    if needs_image and image_valid:
        logger.info("Using Maverick for image analysis")
        maverick_answer = process_image_with_maverick(image, question)
    
    # Check if Maverick processing succeeded
        if maverick_answer and not maverick_answer.startswith("Error"):
            # Use Maverick's answer as context for DeepSeek
            prompt = f"Based on image analysis: {maverick_answer}\nPlease respond to the question: {question}"
            response = generate_response(prompt, history)
        
        # Only show the final response, not the image analysis
            output = f"**Response**: {response}"
        else:
        # Fallback to text-only if image processing failed
            logger.warning(f"Image processing failed, falling back to text-only: {maverick_answer}")
            response = generate_response(question, history)
            output = f"**Response**: {response}"
    else:
    # Text-only processing
        logger.info("Using DeepSeek for text-only processing")
        response = generate_response(question, history)
        output = f"**Response**: {response}"
    # Update history with just the question and final response
    history.append({"user": question, "bot": response})
    return output, history

# Streamlit app
def main():
    st.set_page_config(page_title="Conversational Image Recognition Chatbot", layout="wide")
    
    st.title("Conversational Image Recognition Chatbot")
    st.markdown("Ask a question with or without an image. The system will decide whether to analyze the image or respond directly.")
    
    # Initialize session state for chat history
    if 'history' not in st.session_state:
        st.session_state.history = []
    
    # Create a sidebar for image upload and question input
    with st.sidebar:
        st.header("Input")
        uploaded_file = st.file_uploader("Upload an image (optional)", type=["jpg", "jpeg", "png"])
        question = st.text_input("Ask a question")
        submit_button = st.button("Submit")
        clear_button = st.button("Clear History")
    
    # Display uploaded image if available
    if uploaded_file is not None:
        image = Image.open(uploaded_file)
        # Replace deprecated use_column_width with use_container_width
        st.image(image, caption="Uploaded Image", use_container_width=True)
    else:
        image = None
    
    # Process question on submit
    if submit_button and question:
        with st.spinner("Processing..."):
            output, st.session_state.history = process_image_and_question(
                image, 
                question, 
                st.session_state.history
            )
            
            # Add to chat display
            st.session_state.messages = st.session_state.get('messages', [])
            st.session_state.messages.append({"role": "user", "content": question})
            st.session_state.messages.append({"role": "assistant", "content": output})
    
    # Clear history on clear button
    if clear_button:
        st.session_state.history = []
        st.session_state.messages = []
        st.success("Conversation history cleared!")
    
    # Display chat history
    st.header("Conversation")
    for message in st.session_state.get('messages', []):
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

if __name__ == "__main__":
    main()