import json
import io
import base64
import os, re
from langchain_google_vertexai.vision_models import VertexAIVisualQnAChat
from PIL import Image
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from groq import Groq
from flask import Flask, jsonify
from langgraph.prebuilt import create_react_agent

load_dotenv()
# os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

app = Flask(__name__)

'''#initialize groq client 
client = Groq(api_key=groq_api_key)
print(f"client:{client}") '''

static_image_path = os.path.join("images", "page2_print.jfif")

llm = ChatGroq(
    model="meta-llama/llama-4-maverick-17b-128e-instruct",
    temperature=0,
    max_tokens=None,
)

@app.route("/", methods=["GET"])
def analyze_static_image():
    if not os.path.exists(static_image_path):
        return jsonify({"error": f"Image not found"})

    # Load image and convert to base64 string
    image_path = r"images\page2_print.jfif"
    with open(image_path, "rb") as image_file:
        image_bytes = image_file.read()
    img_base64 = base64.b64encode(image_bytes).decode("utf-8")

    # # Construct image content block
    # image_content_block = {
    #     "type": "image_url",
    #     "image_url": {
    #         # "url": f"data:image/jpeg;base64,{image_data_url}"
    #         "url": f"data:image/jpeg;base64,{img_base64}"
    #     }
    # }

    # SET A SYSTEM PROMPT
    system_prompt = """
        You are an expert in visual scene understanding.
        Your Job is to analyze an image and respond with structured json like This : 
        - Any number of "Sprites": These refer to distinct characters, animals, or objects in the image that are **in front of the background** (e.g., cat, ball, crab, person, etc.).
        {
            "Sprite 1": {
                "name": "Cat",
                "description":"An orange cartoon cat with a cheerful expression, shown jumping playfully."
            },
            "Backdrop":{
                "name":"Beach Scene",
                "description":"A serene beach with sand, blue water, and a clear sky."
            }
        }
        Guidelines:
        - Focus only the images given in Square Shape.
        - Don't Consider Blank areas in Image as "Backdrop".
        - Do NOT classify the background scene as a sprite.
        - All characters or objects placed in the foreground should be "Sprites".
        - Use 'Sprite 1', 'Sprite 2', etc. for character or figures.
        - Use 'Backdrop' for environmental setting or Background behind Sprite.
        - Don't include generic summary or explanation outside the fields.
        Return only valid JSON.
        """

    # Compose message using LangChain's HumanMessage
    content = [
        {
            "type": "text",
            "text": "Analyze the image and describe the backdrops and characters as per instruction."
        },
        {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{img_base64}"
            }
        }
    ]

    agent = create_react_agent(
        model = llm, 
        tools = [],
        prompt = system_prompt
    )
    
    # agent_executor = AgentExecutor(agent=agent, tools=[])
    # Pass the human prompt + system message
    # messages = [system_prompt, *human_prompt]
    # messages = [system_prompt, *human_prompt]
    
    # call the LLM
    try:
        # response = llm.invoke(messages)
        # response = agent.invoke({"input":human_prompt})
        response = agent.invoke({"messages": [{"role": "user", "content":content}]})
        print(response)
        
        raw_response = response["messages"][-1].content
        
        cleaned_json_str = re.sub(r"^```json\s*|\s*```$", "", raw_response.strip(), flags=re.DOTALL)
        try:
            detected_info = json.loads(cleaned_json_str)
        except json.JSONDecodeError as e:
            # If parsing fails, fallback to raw string or handle error
            print("JSON parsing error:", e)
            detected_info = cleaned_json_str  # or handle as needed
        # Extract the answer text from the response
        # detected_info = response.content
        # detected_info = raw_response
    except Exception as e:
        return jsonify({"error": str(e)}), 500

    # Save the detected information to a JSON file
    result = {
        "image_path": image_path,
        "detected_info": detected_info,
    }

    # Save JSON result
    with open("detected_image_info.json", "w") as f:
        json.dump(result, f, indent=4)
    print("Detection results saved to detected_image_info.json")
    return jsonify(result)

if __name__ == "__main__":
    app.run(debug=True)


'''#build the chat messages
messages = [
    {
        "role":"system",
        "content":"you are an expert image analyzer. Describe backdrops and sprite/character in the image."
    },
    {
        "role":"user",
        "content":[
            {
                "type":"text",
                "text":"Describe image in detail. What backdrops and characters are present ?"
            },
            image_content_block
        ]
    }
]'''

'''# create completion with Groq 
response = client.chat.completions.create(
    model = "meta-llama/llama-4-maverick-17b-128e-instruct",
    messages=messages, 
    temperature=0,
    max_tokens=1024,
    top_p=1,
    stream=False
)
print(f"\n\n========RESPONSE CHOICES : {response}\n\n")
# extract the result 
detected_info = response.choices[0].message.content
print(f"DETECTED_INFO : {detected_info}")


# save output to json
output_data = {
    "image_path":image_path,
    "detected_info":detected_info
}
print(f"output_data : {output_data}")

with open("detected_image_info.json", "w") as f:
    json.dump(output_data, f, indent=4)

print("✅ Detection results saved to detected_image_info.json")'''


# # Define the question to detect objects and characters in the image
# question = "What objects and characters are present in this image?"

# messages = [HumanMessage(content=[image_content_block, question])]
# print(messages)
# Invoke the model with the image and question
# response = llm.invoke({"image": image_content_block, "question": question})