Scratch_Vision_Game_test_dup

Sleeping

App Files Files Community

prthm11 commited on Sep 22

Commit

4805ab5

verified ·

1 Parent(s): 9a0e352

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -519

app.py CHANGED Viewed

@@ -2,44 +2,32 @@ from flask import Flask, request, jsonify, render_template, send_from_directory,
 import cv2, json,base64,io,os,tempfile,logging, re
 import numpy as np
 from unstructured.partition.pdf import partition_pdf
-from PIL import Image
-# from imutils.perspective import four_point_transform
 from dotenv import load_dotenv
-import pytesseract
 from werkzeug.utils import secure_filename
 from langchain_groq import ChatGroq
 from langgraph.prebuilt import create_react_agent
 from pdf2image import convert_from_path, convert_from_bytes
-from concurrent.futures import ThreadPoolExecutor
-from pdf2image.exceptions import PDFInfoNotInstalledError
 from typing import Dict, TypedDict, Optional, Any
 from langgraph.graph import StateGraph, END
 import uuid
 import shutil, time, functools
-from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
-from langchain_core.utils.utils import secret_from_env
-# from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 from io import BytesIO
 from pathlib import Path
 import os
 from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
-from langchain.chat_models import ChatOpenAI
-from langchain_openai import ChatOpenAI
-from pydantic import Field, SecretStr
 from difflib import get_close_matches
 import torch
 from transformers import AutoImageProcessor, AutoModel
-import faiss
-from sentence_transformers import SentenceTransformer
-# image tools
-from PIL import Image, ImageOps, ImageEnhance
 import cv2
 # hashing & image-match
 from imagededup.methods import PHash
 from image_match.goldberg import ImageSignature
 # --- Config (tune threads as needed) ---
 # DINOv2 model id
 DINOV2_MODEL = "facebook/dinov2-small"
@@ -63,7 +51,6 @@ dinov2_model.eval()
 phash = PHash()
 gis = ImageSignature()
 def log_execution_time(func):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
@@ -98,12 +85,6 @@ llm = ChatGroq(
 app = Flask(__name__)
-# ============================== #
-#     TESSERACT CONFIGURATION    #
-# ============================== #
-pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')
-# poppler_path = r"C:\poppler\Library\bin"
 backdrop_images_path = r"app\blocks\Backdrops"
 sprite_images_path = r"app\blocks\sprites"
 code_blocks_image_path = r"app\blocks\code_blocks"
@@ -119,11 +100,6 @@ SPRITE_DIR       = BLOCKS_DIR / "sprites"
 CODE_BLOCKS_DIR  = BLOCKS_DIR / "code_blocks"
 # === new: outputs rooted under BASE_DIR ===
 OUTPUT_DIR       = BASE_DIR / "outputs"
-INDEX_PATH = os.path.join(BLOCKS_DIR, "faiss_index.bin")
-PATHS_JSON_PATH = os.path.join(BLOCKS_DIR, "image_paths.json")
-# DETECTED_IMAGE_DIR = OUTPUT_DIR / "DETECTED_IMAGE"
-# SCANNED_IMAGE_DIR  = OUTPUT_DIR / "SCANNED_IMAGE"
-# JSON_DIR           = OUTPUT_DIR / "EXTRACTED_JSON"
 # Global variables to hold the model and index, loaded only once.
 MODEL = None
@@ -139,9 +115,6 @@ for d in (
     SPRITE_DIR,
     CODE_BLOCKS_DIR,
     OUTPUT_DIR,
-    # DETECTED_IMAGE_DIR,
-    # SCANNED_IMAGE_DIR,
-    # JSON_DIR,
 ):
     d.mkdir(parents=True, exist_ok=True)
@@ -195,108 +168,19 @@ RULES:
 4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
 """
-# debugger and resolver agent for Scratch 3.0
 # Main agent of the system agent for Scratch 3.0
 agent = create_react_agent(
     model=llm,
     tools=[], # No specific tools are defined here, but could be added later
     prompt=SYSTEM_PROMPT
 )
-# agent_2 = create_react_agent(
-#     model=llm2,
-#     tools=[], # No specific tools are defined here, but could be added later
-#     prompt=SYSTEM_PROMPT
-# )
 agent_json_resolver = create_react_agent(
     model=llm,
     tools=[], # No specific tools are defined here, but could be added later
     prompt=SYSTEM_PROMPT_JSON_CORRECTOR
 )
-def load_model_and_index():
-    """
-    Loads the SentenceTransformer model, FAISS index, and image paths into global variables.
-    This function is called once on the first run to avoid reloading heavy assets.
-    """
-    global MODEL, FAISS_INDEX, IMAGE_PATHS
-    # This check ensures we only load everything once
-    if MODEL is None:
-        logger.info("Loading CLIP model 'clip-ViT-L-14' for the first time...")
-        MODEL = SentenceTransformer('clip-ViT-L-14')
-        logger.info("✅ CLIP model loaded.")
-        logger.info(f"Loading FAISS index from: {INDEX_PATH}")
-        FAISS_INDEX = faiss.read_index(INDEX_PATH)
-        logger.info("✅ FAISS index loaded.")
-        logger.info(f"Loading image paths from: {PATHS_JSON_PATH}")
-        with open(PATHS_JSON_PATH, "r") as f:
-            IMAGE_PATHS = json.load(f)
-        logger.info("✅ Image paths loaded.")
-import torch
-from transformers import AutoImageProcessor, AutoModel
-import numpy as np
-from PIL import Image
-from pathlib import Path
-from io import BytesIO
-import json
-# def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
-#     """
-#     Lazy-initialize DINOv2 processor & model (call once before embedding).
-#     """
-#     global _dinov2_processor, _dinov2_model
-#     if _dinov2_processor is None or _dinov2_model is None:
-#         _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
-#         _dinov2_model = AutoModel.from_pretrained(model_name)
-#         _dinov2_model.eval().to(device)
-# def embed_bytesio_list(bytesio_list, batch_size: int = 8):
-#     """
-#     Accepts a list of BytesIO objects (each contains an image).
-#     Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
-#     """
-#     if _dinov2_processor is None or _dinov2_model is None:
-#         init_dinov2()
-#     imgs = []
-#     for b in bytesio_list:
-#         with Image.open(b) as original_img:
-#             # Create a new image with a white background in RGB mode
-#             final_img = Image.new("RGB", original_img.size, (255, 255, 255))
-#             # Paste the original image onto the white background, using the alpha channel as a mask if it exists
-#             if original_img.mode == 'RGBA':
-#                 final_img.paste(original_img, mask=original_img.split()[-1])
-#             else:
-#                 final_img.paste(original_img)
-#             imgs.append(final_img.copy())
-#     embs = []
-#     for i in range(0, len(imgs), batch_size):
-#         batch = imgs[i: i + batch_size]
-#         inputs = _dinov2_processor(images=batch, return_tensors="pt")
-#         inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
-#         with torch.no_grad():
-#             out = _dinov2_model(**inputs)
-#             cls = out.last_hidden_state[:, 0, :]  # (B, D)
-#             cls = torch.nn.functional.normalize(cls, p=2, dim=1)
-#             embs.append(cls.cpu().numpy())
-#     if not embs:
-#         return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
-#     return np.vstack(embs).astype(np.float32)
-# def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
-#     """
-#     Row-wise L2 normalization for numpy arrays.
-#     """
-#     norm = np.linalg.norm(a, axis=1, keepdims=True)
-#     return a / (norm + eps)
 # -----------------------
 #  SERIALIZABLE HELPER
 # -----------------------
@@ -457,8 +341,7 @@ def cosine_similarity(a, b):
 from collections import defaultdict
 import math
-def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10,
-                          method_weights=(0.5, 0.3, 0.2), verbose=True):
     """
     embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
     phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
@@ -600,8 +483,7 @@ def is_subpath(path: str, base: str) -> bool:
     try:
         p = os.path.normpath(os.path.abspath(path))
         b = os.path.normpath(os.path.abspath(base))
-        if os.name == "nt":
-            p = p.lower(); b = b.lower()
         return os.path.commonpath([p, b]) == b
     except Exception:
         return False
@@ -619,7 +501,6 @@ def _load_block_catalog(block_type: str) -> Dict:
         catalog = json.loads(text)       # will raise JSONDecodeError if malformed
         logger.info(f"Successfully loaded block catalog from {catalog_path}")
         return catalog
     except FileNotFoundError:
         logger.error(f"Error: Block catalog file not found at {catalog_path}")
     except json.JSONDecodeError as e:
@@ -634,8 +515,7 @@ def get_block_by_opcode(catalog_data: dict, opcode: str) -> dict | None:
     Returns the block dict or None if not found.
     """
     for block in catalog_data["blocks"]:
-        if block.get("op_code") == opcode:
-            return block
     return None
 # Helper function to find a block in all catalogs by opcode
@@ -646,8 +526,7 @@ def find_block_in_all(opcode: str, all_catalogs: list[dict]) -> dict | None:
     """
     for catalog in all_catalogs:
         blk = get_block_by_opcode(catalog, opcode)
-        if blk is not None:
-            return blk
     return None
 def variable_intialization(project_data):
@@ -655,24 +534,18 @@ def variable_intialization(project_data):
     Updates variable and broadcast definitions in a Scratch project JSON,
     populating the 'variables' and 'broadcasts' sections of the Stage target
     and extracting initial values for variables.
-    Args:
-        project_data (dict): The loaded JSON data of the Scratch project.
-    Returns:
-        dict: The updated project JSON data.
     """
     stage_target = None
     for target in project_data['targets']:
-        if target.get('isStage'):
             stage_target = target
             break
     if stage_target is None:
         print("Error: Stage target not found in the project data.")
         return project_data
     # Ensure 'variables' and 'broadcasts' exist in the Stage target
     if "variables" not in stage_target:
         stage_target["variables"] = {}
@@ -741,14 +614,9 @@ def deduplicate_variables(project_data):
     """
     Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
     prioritizing entries with non-empty values.
-    Args:
-        project_data (dict): The loaded JSON data of the Scratch project.
-    Returns:
-        dict: The updated project JSON data with deduplicated variables.
     """
     stage_target = None
     for target in project_data['targets']:
         if target.get('isStage'):
@@ -762,36 +630,17 @@ def deduplicate_variables(project_data):
     if "variables" not in stage_target:
         return project_data # No variables to deduplicate
-    # Use a temporary dictionary to store the preferred variable entry by name
-    # Format: {variable_name: [variable_id, variable_name, variable_value]}
     resolved_variables = {}
     for var_id, var_info in stage_target["variables"].items():
         var_name = var_info[0]
         var_value = var_info[1]
-        if var_name not in resolved_variables:
-            # If the variable name is not yet seen, add it
-            resolved_variables[var_name] = [var_id, var_name, var_value]
-        else:
-            # If the variable name is already seen, decide which one to keep
-            existing_id, existing_name, existing_value = resolved_variables[var_name]
-            # Prioritize the entry with a non-empty value
-            if var_value != "" and existing_value == "":
-                resolved_variables[var_name] = [var_id, var_name, var_value]
-            # If both have non-empty values, or both are empty, keep the current one (arbitrary choice, but consistent)
-            # The current logic will effectively keep the last one encountered that has a value,
-            # or the very last one if all are empty.
-            elif var_value != "" and existing_value != "":
-                 # If there are multiple non-empty values for the same variable name
-                 # this keeps the one from the most recent iteration.
-                 # For the given example, this will correctly keep "5".
-                resolved_variables[var_name] = [var_id, var_name, var_value]
-            elif var_value == "" and existing_value == "":
-                # If both are empty, just keep the current one (arbitrary)
-                resolved_variables[var_name] = [var_id, var_name, var_value]
     # Reconstruct the 'variables' dictionary using the resolved entries
     new_variables_dict = {}
@@ -800,9 +649,7 @@ def deduplicate_variables(project_data):
         var_name_to_keep = var_data[1]
         var_value_to_keep = var_data[2]
         new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
     stage_target["variables"] = new_variables_dict
     return project_data
 def variable_adder_main(project_data):
@@ -819,81 +666,81 @@ def variable_adder_main(project_data):
         print(f"Error error in the variable initialization opcodes: {e}")
-# --- Global variable for the block catalog ---
-ALL_SCRATCH_BLOCKS_CATALOG = {}
-BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
-HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
-STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
-REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
-BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
-C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
-CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
-# Load the block catalogs from their respective JSON files
-hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
-hat_description = hat_block_data["description"]
-#hat_description = hat_block_data.get("description", "No description available")
-# hat_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
-hat_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in hat_block_data.get("blocks", [])
-]) if isinstance(hat_block_data.get("blocks"), list) else "    No blocks information available."
-#hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
-print("Hat blocks loaded successfully.", hat_description)
-boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
-boolean_description = boolean_block_data["description"]
-# boolean_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
-boolean_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in boolean_block_data.get("blocks", [])
-]) if isinstance(boolean_block_data.get("blocks"), list) else "    No blocks information available."
-#boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
-c_block_data = _load_block_catalog(C_BLOCKS_PATH)
-c_description = c_block_data["description"]
-# c_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
-c_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in c_block_data.get("blocks", [])
-]) if isinstance(c_block_data.get("blocks"), list) else "    No blocks information available."
-#c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
-cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
-cap_description = cap_block_data["description"]
-# cap_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
-cap_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in cap_block_data.get("blocks", [])
-]) if isinstance(cap_block_data.get("blocks"), list) else "    No blocks information available."
-#cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
-reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
-reporter_description = reporter_block_data["description"]
-# reporter_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
-reporter_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in reporter_block_data.get("blocks", [])
-]) if isinstance(reporter_block_data.get("blocks"), list) else "    No blocks information available."
-#reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
-stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
-stack_description = stack_block_data["description"]
-# stack_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
-stack_opcodes_functionalities = "\n".join([
-    # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
-    for block in stack_block_data.get("blocks", [])
-]) if isinstance(stack_block_data.get("blocks"), list) else "    No blocks information available."
-#stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
-# This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
-ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
 def extract_json_from_llm_response(raw_response: str) -> dict:
     """
@@ -1288,19 +1135,6 @@ end
         print(f"result:\n\n {result}")
     except json.JSONDecodeError as error_json:
-        # If JSON parsing fails, use the json resolver agent
-        # correction_prompt = (
-        #     "Your task is to correct the provided JSON string to ensure it is **syntactically perfect and adheres strictly to JSON rules**.\n"
-        #     "It must be a JSON object with `refined_logic` (string) and `block_relationships` (array of objects).\n"
-        #     f"- **Error Details**: {error_json}\n\n"
-        #     "**Strict Instructions for your response:**\n"
-        #     "1. **ONLY** output the corrected JSON. Do not include any other text or explanations.\n"
-        #     "2. Ensure all keys and string values are enclosed in **double quotes**. Escape internal quotes (`\\`).\n"
-        #     "3. No trailing commas. Correct nesting.\n\n"
-        #     "Here is the problematic JSON string to correct:\n"
-        #     f"```json\n{llm_output_raw}\n```\n"
-        #     "Corrected JSON:\n"
-        # )
         correction_prompt = f"""
 Fix this malformed response and return only the corrected JSON:
@@ -1317,7 +1151,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
         try:
             correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
             corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
-            #block_relationships = corrected_output.get("block_relationships", [])
             result = corrected_output
             print(f"result:\n\n {result}")
         except Exception as e_corr:
@@ -1328,8 +1161,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
     state["pseudo_code"] = result
     state["temp_pseudo_code"] += [result]
     Data = state["temp_pseudo_code"]
-    # with open("debug_state.json", "w", encoding="utf-8") as f:
-    #     json.dump(state, f, indent=2, ensure_ascii=False)
     print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
     print(f"[OVREALL LISTS OF LOGICS]: {Data}")
     logger.info("Plan refinement and block relation analysis completed for all plans.")
@@ -1464,14 +1295,7 @@ def overall_block_builder_node_2(state: GameState):
                     logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
                 except Exception as e:
                     logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
-                    # Consider adding more specific error handling here if a malformed output
-                    # from block_builder should cause a specific state change, but generally
-                    # avoid nulling the entire project_json.
-    state["project_json"] = project_json
-    # with open("debug_state.json", "w", encoding="utf-8") as f:
-    #     json.dump(state, f, indent=2, ensure_ascii=False)
     return state
 # Node 6: variable adder node
@@ -1553,15 +1377,11 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
         try:
             elements = partition_pdf(
-                # filename=str(pdf_path), # partition_pdf might expect a string
-                file=pdf_stream, # 'file=', inplace of 'filename'
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
                 hi_res_model_name="yolox",
                 extract_image_block_to_payload=True,
-                # ocr_languages=ocr_lang,
-                # extract_images_in_pdf=False,
-                # extract_image_block_output_dir=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\pdf_output"
             )
             print(f"ELEMENTS")
         except Exception as e:
@@ -1578,260 +1398,19 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
                 continue
             manipulated_json[f"Sprite {sprite_count}"] = {
-                # "id":auto_id,
-                # "name": name,
                 "base64": el["metadata"]["image_base64"],
                 "file-path": pdf_id,
-                # "description": description
             }
             sprite_count += 1
         return manipulated_json
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
-# def similarity_matching(sprites_data: str, project_folder: str) -> str:
-#     logger.info("🔍 Running similarity matching…")
-#     os.makedirs(project_folder, exist_ok=True)
-#     # ----------------------------------------
-#     # CHANGED: define normalized base-paths so startswith() checks work
-#     backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
-#     sprite_base_path   = os.path.normpath(str(SPRITE_DIR))
-#     code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
-#     # ----------------------------------------
-#     project_json_path = os.path.join(project_folder, "project.json")
-#     # ==============================
-#     #    READ SPRITE METADATA
-#     # ==============================
-#     # with open(input_json_path, 'r') as f:
-#     #     sprites_data = json.load(f)
-#     sprite_ids, sprite_base64 = [], []
-#     for sid, sprite in sprites_data.items():
-#         sprite_ids.append(sid)
-#         # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
-#         sprite_base64.append(sprite["base64"])
-#     sprite_images_bytes = []
-#     for b64 in sprite_base64:
-#         img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
-#         buffer = BytesIO()
-#         img.save(buffer, format="PNG")
-#         buffer.seek(0)
-#         sprite_images_bytes.append(buffer)
-#     # =========================================
-#     #  Build the list of all candidate images
-#     # =========================================
-#     folder_image_paths = [
-# BACKDROP_DIR/"Baseball 2.sb3"/"7be1f5b3e682813dac1f297e52ff7dca.png",
-# BACKDROP_DIR/"Beach Malibu.sb3"/"050615fe992a00d6af0e664e497ebf53.png",
-# BACKDROP_DIR/"Bedroom 3.sb3"/"8cc0b88d53345b3e337e8f028a32a4e7.png",
-# BACKDROP_DIR/"Blue Sky.sb3"/"e7c147730f19d284bcd7b3f00af19bb6.png",
-# BACKDROP_DIR/"Castle 2.sb3"/"951765ee7f7370f120c9df20b577c22f.png",
-# BACKDROP_DIR/"Colorful City.sb3"/"04d18ddd1b85f0ea30beb14b8da49f60.png",
-# BACKDROP_DIR/"Hall.sb3"/"ea86ca30b346f27ca5faf1254f6a31e3.png",
-# BACKDROP_DIR/"Jungle.sb3"/"f4f908da19e2753f3ed679d7b37650ca.png",
-# BACKDROP_DIR/"Soccer.sb3"/"04a63154f04b09494354090f7cc2f1b9.png",
-# BACKDROP_DIR/"Theater.sb3"/"c2b097bc5cdb6a14ef5485202bc5ee76.png",
-# SPRITE_DIR/"Batter.sprite3"/"592ee9ab2aeefe65cb4fb95fcd046f33.png",
-# SPRITE_DIR/"Batter.sprite3"/"9d193bef6e3d6d8eba6d1470b8bf9351.png",
-# SPRITE_DIR/"Batter.sprite3"/"baseball_sprite_motion_1.png",
-# SPRITE_DIR/"Batter.sprite3"/"bd4fc003528acfa847e45ff82f346eee.png",
-# SPRITE_DIR/"Batter.sprite3"/"fdfde4bcbaca0f68e83fdf3f4ef0c660.png",
-# SPRITE_DIR/"Bear.sprite3"/"6f303e972f33fcb7ef36d0d8012d0975.png",
-# SPRITE_DIR/"Bear.sprite3"/"bear_motion_2.png",
-# SPRITE_DIR/"Bear.sprite3"/"deef1eaa96d550ae6fc11524a1935024.png",
-# SPRITE_DIR/"Beetle.sprite3"/"46d0dfd4ae7e9bfe3a6a2e35a4905eae.png",
-# SPRITE_DIR/"Butterfly 1.sprite3"/"34b76c1835c6a7fc2c47956e49bb0f52.png",
-# SPRITE_DIR/"Butterfly 1.sprite3"/"49c9f952007d870a046cff93b6e5e098.png",
-# SPRITE_DIR/"Butterfly 1.sprite3"/"fe98df7367e314d9640bfaa54fc239be.png",
-# SPRITE_DIR/"Cat.sprite3"/"0fb9be3e8397c983338cb71dc84d0b25.png",
-# SPRITE_DIR/"Cat.sprite3"/"bcf454acf82e4504149f7ffe07081dbc.png",
-# SPRITE_DIR/"Centaur.sprite3"/"2373556e776cad3ba4d6ee04fc34550b.png",
-# SPRITE_DIR/"Centaur.sprite3"/"c00ffa6c5dd0baf9f456b897ff974377.png",
-# SPRITE_DIR/"Centaur.sprite3"/"d722329bd9373ad80625e5be6d52f3ed.png",
-# SPRITE_DIR/"Centaur.sprite3"/"d7aa990538915b7ef1f496d7e8486ade.png",
-# SPRITE_DIR/"City Bus.sprite3"/"7d7e26014a346b894db8ab1819f2167f.png",
-# SPRITE_DIR/"City Bus.sprite3"/"e9694adbff9422363e2ea03166015393.png",
-# SPRITE_DIR/"Crab.sprite3"/"49839aa1b0feed02a3c759db5f8dee71.png",
-# SPRITE_DIR/"Crab.sprite3"/"bear_element.png",
-# SPRITE_DIR/"Crab.sprite3"/"f7cdd2acbc6d7559d33be8675059c79e.png",
-# SPRITE_DIR/"Glow-G.sprite3"/"56839bc48957869d980c6f9b6f5a2a91.png",
-# SPRITE_DIR/"Jordyn.sprite3"/"00c8c464c19460df693f8d5ae69afdab.png",
-# SPRITE_DIR/"Jordyn.sprite3"/"768c4601174f0dfcb96b3080ccc3a192.png",
-# SPRITE_DIR/"Jordyn.sprite3"/"a7cc1e5f02b58ecc8095cfc18eef0289.png",
-# SPRITE_DIR/"Jordyn.sprite3"/"db4d97cbf24e2b8af665bfbf06f67fa0.png",
-# SPRITE_DIR/"Soccer Ball.sprite3"/"5d973d7a3a8be3f3bd6e1cd0f73c32b5.png",
-# SPRITE_DIR/"Soccer Ball.sprite3"/"cat_football.png",
-# SPRITE_DIR/"Star.sprite3"/"551629f2a64c1f3703e57aaa133effa6.png",
-# SPRITE_DIR/"Wizard.sprite3"/"55ba51188af86ca16ef30267e874c1ed.png",
-# SPRITE_DIR/"Wizard.sprite3"/"91d495085eb4d02a375c42f6318071e7.png",
-# SPRITE_DIR/"Wizard.sprite3"/"df943c9894ee4b9df8c5893ce30c2a5f.png",
-# # CODE_BLOCKS_DIR/"client_code_block_1.jpg",
-# # CODE_BLOCKS_DIR/"client_code_block_2.jpg",
-# CODE_BLOCKS_DIR/"script1.JPG",
-# CODE_BLOCKS_DIR/"script2.JPG",
-# CODE_BLOCKS_DIR/"script3.JPG",
-# CODE_BLOCKS_DIR/"script4.JPG",
-# CODE_BLOCKS_DIR/"script5.JPG",
-# CODE_BLOCKS_DIR/"script6.JPG",
-# CODE_BLOCKS_DIR/"script7.JPG",
-# CODE_BLOCKS_DIR/"script8.JPG",
-# CODE_BLOCKS_DIR/"script9.JPG",
-# CODE_BLOCKS_DIR/"static_white.png"]
-#     folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
-#     # =========================================
-#     # -----------------------------------------
-#     #  Load reference embeddings from JSON
-#     # -----------------------------------------
-#     with open(f"{BLOCKS_DIR}/dinov2_embeddings.json", "r") as f:
-#         embedding_json = json.load(f)
-#     # ============================== #
-#     #      EMBED SPRITE IMAGES       #
-#     # ============================== #
-#     # ensure model is initialized (fast no-op after first call)
-#     init_dinov2()
-#     # embed the incoming sprite BytesIO images (same data structure you already use)
-#     sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8)  # shape (N, D)
-#     # load reference embeddings from JSON (they must be numeric lists)
-#     img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
-#     # normalize both sides (important — stored embeddings may not be normalized)
-#     sprite_matrix = l2_normalize_rows(sprite_matrix)
-#     img_matrix = l2_normalize_rows(img_matrix)
-#     # =========================================
-#     #  Compute similarities & pick best match
-#     # =========================================
-#     similarity = np.matmul(sprite_matrix, img_matrix.T)
-#     most_similar_indices = np.argmax(similarity, axis=1)
-#     # =========================================
-#     #  Copy matched sprite assets + collect data
-#     # =========================================
-#     project_data   = []
-#     copied_folders = set()
-#     for sprite_idx, matched_idx in enumerate(most_similar_indices):
-#         matched_image_path = folder_image_paths[matched_idx]
-#         matched_folder     = os.path.dirname(matched_image_path)
-#         # CHANGED: use our new normalized sprite_base_path
-#         if not matched_folder.startswith(sprite_base_path):
-#             continue
-#         if matched_folder in copied_folders:
-#             continue
-#         copied_folders.add(matched_folder)
-#         logger.info(f"Matched sprite: {matched_image_path}")
-#         sprite_json_path = os.path.join(matched_folder, 'sprite.json')
-#         if not os.path.exists(sprite_json_path):
-#             logger.warning(f"No sprite.json in {matched_folder}")
-#             continue
-#         with open(sprite_json_path, 'r') as f:
-#             sprite_info = json.load(f)
-#         # copy all non‐matched files
-#         for fname in os.listdir(matched_folder):
-#             if fname in (os.path.basename(matched_image_path), 'sprite.json'):
-#                 continue
-#             shutil.copy2(os.path.join(matched_folder, fname),
-#                          os.path.join(project_folder, fname))
-#         project_data.append(sprite_info)
-#     # =========================================
-#     #  Copy matched backdrop assets + collect
-#     # =========================================
-#     backdrop_data = []
-#     copied_backdrop_folders = set()
-#     for backdrop_idx, matched_idx in enumerate(most_similar_indices):
-#         matched_image_path = folder_image_paths[matched_idx]
-#         matched_folder     = os.path.dirname(matched_image_path)
-#         matched_filename = os.path.basename(matched_image_path)
-#         # CHANGED: use our new normalized backdrop_base_path
-#         if not matched_folder.startswith(backdrop_base_path):
-#             continue
-#         # skip if backdrop folder already processed
-#         if matched_folder in copied_backdrop_folders:
-#             continue
-#         copied_backdrop_folders.add(matched_folder)
-#         logger.info(f"Matched backdrop: {matched_image_path}")
-#         # 1) Copy the matched backdrop image itself
-#         try:
-#             shutil.copy2(
-#                 matched_image_path,
-#                 os.path.join(project_folder, matched_filename)
-#             )
-#             logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
-#         except Exception as e:
-#             logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
-#         # copy non‐matched files
-#         for fname in os.listdir(matched_folder):
-#             # if fname in (os.path.basename(matched_image_path), 'project.json'):
-#             if fname in {matched_filename, 'project.json'}:
-#                 continue
-#             # shutil.copy2(os.path.join(matched_folder, fname),
-#             #              os.path.join(project_folder, fname))
-#             src = os.path.join(matched_folder, fname)
-#             dst = os.path.join(project_folder, fname)
-#             if os.path.isfile(src):
-#                 try:
-#                     shutil.copy2(src, dst)
-#                     logger.info(f"Copied additional backdrop asset {fname} to project folder")
-#                 except Exception as e:
-#                     logger.error(f"Failed to copy {src}: {e}")
-#         # append the stage‐target from its project.json
-#         pj = os.path.join(matched_folder, 'project.json')
-#         if os.path.exists(pj):
-#             with open(pj, 'r') as f:
-#                 bd_json = json.load(f)
-#             for tgt in bd_json.get("targets", []):
-#                 if tgt.get("isStage"):
-#                     backdrop_data.append(tgt)
-#         else:
-#             logger.warning(f"No project.json in {matched_folder}")
-#     # =========================================
-#     #  Merge into final Scratch project.json
-#     # =========================================
-#     final_project = {
-#         "targets": [], "monitors": [], "extensions": [],
-#         "meta": {
-#             "semver": "3.0.0",
-#             "vm": "11.3.0",
-#             "agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
-#         }
-#     }
-#     # sprites first
-#     for spr in project_data:
-#         if not spr.get("isStage", False):
-#             final_project["targets"].append(spr)
 ''' It appends all the list and paths from json files and pick the best match's path'''
 def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
     print("🔍 Running similarity matching…")
     os.makedirs(project_folder, exist_ok=True)
-    # backdrop_base_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\Backdrops"
-    # sprite_base_path   = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\sprites"
-    # code_blocks_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\code_blocks"
     backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
     sprite_base_path   = os.path.normpath(str(SPRITE_DIR))
     code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))

 import cv2, json,base64,io,os,tempfile,logging, re
 import numpy as np
 from unstructured.partition.pdf import partition_pdf
+from PIL import Image, ImageOps, ImageEnhance
 from dotenv import load_dotenv
+# import pytesseract
 from werkzeug.utils import secure_filename
 from langchain_groq import ChatGroq
 from langgraph.prebuilt import create_react_agent
 from pdf2image import convert_from_path, convert_from_bytes
 from typing import Dict, TypedDict, Optional, Any
 from langgraph.graph import StateGraph, END
 import uuid
 import shutil, time, functools
 from io import BytesIO
 from pathlib import Path
 import os
 from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
 from difflib import get_close_matches
 import torch
 from transformers import AutoImageProcessor, AutoModel
+from pathlib import Path
+from io import BytesIO
+import torch
+import json
 import cv2
 # hashing & image-match
 from imagededup.methods import PHash
 from image_match.goldberg import ImageSignature
 # --- Config (tune threads as needed) ---
 # DINOv2 model id
 DINOV2_MODEL = "facebook/dinov2-small"
 phash = PHash()
 gis = ImageSignature()
 def log_execution_time(func):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
 app = Flask(__name__)
 backdrop_images_path = r"app\blocks\Backdrops"
 sprite_images_path = r"app\blocks\sprites"
 code_blocks_image_path = r"app\blocks\code_blocks"
 CODE_BLOCKS_DIR  = BLOCKS_DIR / "code_blocks"
 # === new: outputs rooted under BASE_DIR ===
 OUTPUT_DIR       = BASE_DIR / "outputs"
 # Global variables to hold the model and index, loaded only once.
 MODEL = None
     SPRITE_DIR,
     CODE_BLOCKS_DIR,
     OUTPUT_DIR,
 ):
     d.mkdir(parents=True, exist_ok=True)
 4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
 """
 # Main agent of the system agent for Scratch 3.0
 agent = create_react_agent(
     model=llm,
     tools=[], # No specific tools are defined here, but could be added later
     prompt=SYSTEM_PROMPT
 )
 agent_json_resolver = create_react_agent(
     model=llm,
     tools=[], # No specific tools are defined here, but could be added later
     prompt=SYSTEM_PROMPT_JSON_CORRECTOR
 )
 # -----------------------
 #  SERIALIZABLE HELPER
 # -----------------------
 from collections import defaultdict
 import math
+def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10, method_weights=(0.5, 0.3, 0.2), verbose=True):
     """
     embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
     phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
     try:
         p = os.path.normpath(os.path.abspath(path))
         b = os.path.normpath(os.path.abspath(base))
+        if os.name == "nt": p = p.lower(); b = b.lower()
         return os.path.commonpath([p, b]) == b
     except Exception:
         return False
         catalog = json.loads(text)       # will raise JSONDecodeError if malformed
         logger.info(f"Successfully loaded block catalog from {catalog_path}")
         return catalog
     except FileNotFoundError:
         logger.error(f"Error: Block catalog file not found at {catalog_path}")
     except json.JSONDecodeError as e:
     Returns the block dict or None if not found.
     """
     for block in catalog_data["blocks"]:
+        if block.get("op_code") == opcode: return block
     return None
 # Helper function to find a block in all catalogs by opcode
     """
     for catalog in all_catalogs:
         blk = get_block_by_opcode(catalog, opcode)
+        if blk is not None: return blk
     return None
 def variable_intialization(project_data):
     Updates variable and broadcast definitions in a Scratch project JSON,
     populating the 'variables' and 'broadcasts' sections of the Stage target
     and extracting initial values for variables.
+    Args: project_data (dict): The loaded JSON data of the Scratch project.
+    Returns: dict: The updated project JSON data.
     """
     stage_target = None
     for target in project_data['targets']:
+        if target.get('isStage'):
             stage_target = target
             break
     if stage_target is None:
         print("Error: Stage target not found in the project data.")
         return project_data
     # Ensure 'variables' and 'broadcasts' exist in the Stage target
     if "variables" not in stage_target:
         stage_target["variables"] = {}
     """
     Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
     prioritizing entries with non-empty values.
+    Args: project_data (dict): The loaded JSON data of the Scratch project.
+    Returns: dict: The updated project JSON data with deduplicated variables.
     """
     stage_target = None
     for target in project_data['targets']:
         if target.get('isStage'):
     if "variables" not in stage_target:
         return project_data # No variables to deduplicate
     resolved_variables = {}
     for var_id, var_info in stage_target["variables"].items():
         var_name = var_info[0]
         var_value = var_info[1]
+        if var_name not in resolved_variables: resolved_variables[var_name] = [var_id, var_name, var_value]
+        else: existing_id, existing_name, existing_value = resolved_variables[var_name]
+            if var_value != "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
+            elif var_value != "" and existing_value != "": resolved_variables[var_name] = [var_id, var_name, var_value]
+            elif var_value == "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
     # Reconstruct the 'variables' dictionary using the resolved entries
     new_variables_dict = {}
         var_name_to_keep = var_data[1]
         var_value_to_keep = var_data[2]
         new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
     stage_target["variables"] = new_variables_dict
     return project_data
 def variable_adder_main(project_data):
         print(f"Error error in the variable initialization opcodes: {e}")
+# # --- Global variable for the block catalog ---
+# ALL_SCRATCH_BLOCKS_CATALOG = {}
+# BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
+# HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
+# STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
+# REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
+# BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
+# C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
+# CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
+# # Load the block catalogs from their respective JSON files
+# hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
+# hat_description = hat_block_data["description"]
+# #hat_description = hat_block_data.get("description", "No description available")
+# # hat_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
+# hat_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in hat_block_data.get("blocks", [])
+# ]) if isinstance(hat_block_data.get("blocks"), list) else "    No blocks information available."
+# #hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
+# print("Hat blocks loaded successfully.", hat_description)
+# boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
+# boolean_description = boolean_block_data["description"]
+# # boolean_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
+# boolean_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in boolean_block_data.get("blocks", [])
+# ]) if isinstance(boolean_block_data.get("blocks"), list) else "    No blocks information available."
+# #boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
+# c_block_data = _load_block_catalog(C_BLOCKS_PATH)
+# c_description = c_block_data["description"]
+# # c_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
+# c_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in c_block_data.get("blocks", [])
+# ]) if isinstance(c_block_data.get("blocks"), list) else "    No blocks information available."
+# #c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
+# cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
+# cap_description = cap_block_data["description"]
+# # cap_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
+# cap_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in cap_block_data.get("blocks", [])
+# ]) if isinstance(cap_block_data.get("blocks"), list) else "    No blocks information available."
+# #cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
+# reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
+# reporter_description = reporter_block_data["description"]
+# # reporter_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
+# reporter_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in reporter_block_data.get("blocks", [])
+# ]) if isinstance(reporter_block_data.get("blocks"), list) else "    No blocks information available."
+# #reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
+# stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
+# stack_description = stack_block_data["description"]
+# # stack_opcodes_functionalities = "\n".join([f"    - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
+# stack_opcodes_functionalities = "\n".join([
+#     # f"    - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     f"    - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
+#     for block in stack_block_data.get("blocks", [])
+# ]) if isinstance(stack_block_data.get("blocks"), list) else "    No blocks information available."
+# #stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
+# # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
+# ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
 def extract_json_from_llm_response(raw_response: str) -> dict:
     """
         print(f"result:\n\n {result}")
     except json.JSONDecodeError as error_json:
         correction_prompt = f"""
 Fix this malformed response and return only the corrected JSON:
         try:
             correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
             corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
             result = corrected_output
             print(f"result:\n\n {result}")
         except Exception as e_corr:
     state["pseudo_code"] = result
     state["temp_pseudo_code"] += [result]
     Data = state["temp_pseudo_code"]
     print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
     print(f"[OVREALL LISTS OF LOGICS]: {Data}")
     logger.info("Plan refinement and block relation analysis completed for all plans.")
                     logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
                 except Exception as e:
                     logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
+    state["project_json"] = project_json
     return state
 # Node 6: variable adder node
         try:
             elements = partition_pdf(
+                file=pdf_stream,
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
                 hi_res_model_name="yolox",
                 extract_image_block_to_payload=True,
             )
             print(f"ELEMENTS")
         except Exception as e:
                 continue
             manipulated_json[f"Sprite {sprite_count}"] = {
                 "base64": el["metadata"]["image_base64"],
                 "file-path": pdf_id,
             }
             sprite_count += 1
         return manipulated_json
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
 ''' It appends all the list and paths from json files and pick the best match's path'''
 def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
     print("🔍 Running similarity matching…")
     os.makedirs(project_folder, exist_ok=True)
     backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
     sprite_base_path   = os.path.normpath(str(SPRITE_DIR))
     code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))