Update app.py
Browse files
app.py
CHANGED
|
@@ -2,44 +2,32 @@ from flask import Flask, request, jsonify, render_template, send_from_directory,
|
|
| 2 |
import cv2, json,base64,io,os,tempfile,logging, re
|
| 3 |
import numpy as np
|
| 4 |
from unstructured.partition.pdf import partition_pdf
|
| 5 |
-
from PIL import Image
|
| 6 |
-
# from imutils.perspective import four_point_transform
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
-
import pytesseract
|
| 9 |
from werkzeug.utils import secure_filename
|
| 10 |
from langchain_groq import ChatGroq
|
| 11 |
from langgraph.prebuilt import create_react_agent
|
| 12 |
from pdf2image import convert_from_path, convert_from_bytes
|
| 13 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 14 |
-
from pdf2image.exceptions import PDFInfoNotInstalledError
|
| 15 |
from typing import Dict, TypedDict, Optional, Any
|
| 16 |
from langgraph.graph import StateGraph, END
|
| 17 |
import uuid
|
| 18 |
import shutil, time, functools
|
| 19 |
-
from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
|
| 20 |
-
from langchain_core.utils.utils import secret_from_env
|
| 21 |
-
# from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
| 22 |
from io import BytesIO
|
| 23 |
from pathlib import Path
|
| 24 |
import os
|
| 25 |
from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
|
| 26 |
-
from langchain.chat_models import ChatOpenAI
|
| 27 |
-
from langchain_openai import ChatOpenAI
|
| 28 |
-
from pydantic import Field, SecretStr
|
| 29 |
from difflib import get_close_matches
|
| 30 |
import torch
|
| 31 |
from transformers import AutoImageProcessor, AutoModel
|
| 32 |
-
import
|
| 33 |
-
from
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
from PIL import Image, ImageOps, ImageEnhance
|
| 37 |
import cv2
|
| 38 |
-
|
| 39 |
# hashing & image-match
|
| 40 |
from imagededup.methods import PHash
|
| 41 |
from image_match.goldberg import ImageSignature
|
| 42 |
-
|
| 43 |
# --- Config (tune threads as needed) ---
|
| 44 |
# DINOv2 model id
|
| 45 |
DINOV2_MODEL = "facebook/dinov2-small"
|
|
@@ -63,7 +51,6 @@ dinov2_model.eval()
|
|
| 63 |
phash = PHash()
|
| 64 |
gis = ImageSignature()
|
| 65 |
|
| 66 |
-
|
| 67 |
def log_execution_time(func):
|
| 68 |
@functools.wraps(func)
|
| 69 |
def wrapper(*args, **kwargs):
|
|
@@ -98,12 +85,6 @@ llm = ChatGroq(
|
|
| 98 |
|
| 99 |
app = Flask(__name__)
|
| 100 |
|
| 101 |
-
# ============================== #
|
| 102 |
-
# TESSERACT CONFIGURATION #
|
| 103 |
-
# ============================== #
|
| 104 |
-
pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')
|
| 105 |
-
|
| 106 |
-
# poppler_path = r"C:\poppler\Library\bin"
|
| 107 |
backdrop_images_path = r"app\blocks\Backdrops"
|
| 108 |
sprite_images_path = r"app\blocks\sprites"
|
| 109 |
code_blocks_image_path = r"app\blocks\code_blocks"
|
|
@@ -119,11 +100,6 @@ SPRITE_DIR = BLOCKS_DIR / "sprites"
|
|
| 119 |
CODE_BLOCKS_DIR = BLOCKS_DIR / "code_blocks"
|
| 120 |
# === new: outputs rooted under BASE_DIR ===
|
| 121 |
OUTPUT_DIR = BASE_DIR / "outputs"
|
| 122 |
-
INDEX_PATH = os.path.join(BLOCKS_DIR, "faiss_index.bin")
|
| 123 |
-
PATHS_JSON_PATH = os.path.join(BLOCKS_DIR, "image_paths.json")
|
| 124 |
-
# DETECTED_IMAGE_DIR = OUTPUT_DIR / "DETECTED_IMAGE"
|
| 125 |
-
# SCANNED_IMAGE_DIR = OUTPUT_DIR / "SCANNED_IMAGE"
|
| 126 |
-
# JSON_DIR = OUTPUT_DIR / "EXTRACTED_JSON"
|
| 127 |
|
| 128 |
# Global variables to hold the model and index, loaded only once.
|
| 129 |
MODEL = None
|
|
@@ -139,9 +115,6 @@ for d in (
|
|
| 139 |
SPRITE_DIR,
|
| 140 |
CODE_BLOCKS_DIR,
|
| 141 |
OUTPUT_DIR,
|
| 142 |
-
# DETECTED_IMAGE_DIR,
|
| 143 |
-
# SCANNED_IMAGE_DIR,
|
| 144 |
-
# JSON_DIR,
|
| 145 |
):
|
| 146 |
d.mkdir(parents=True, exist_ok=True)
|
| 147 |
|
|
@@ -195,108 +168,19 @@ RULES:
|
|
| 195 |
4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
|
| 196 |
"""
|
| 197 |
|
| 198 |
-
# debugger and resolver agent for Scratch 3.0
|
| 199 |
# Main agent of the system agent for Scratch 3.0
|
| 200 |
agent = create_react_agent(
|
| 201 |
model=llm,
|
| 202 |
tools=[], # No specific tools are defined here, but could be added later
|
| 203 |
prompt=SYSTEM_PROMPT
|
| 204 |
)
|
| 205 |
-
|
| 206 |
-
# model=llm2,
|
| 207 |
-
# tools=[], # No specific tools are defined here, but could be added later
|
| 208 |
-
# prompt=SYSTEM_PROMPT
|
| 209 |
-
# )
|
| 210 |
agent_json_resolver = create_react_agent(
|
| 211 |
model=llm,
|
| 212 |
tools=[], # No specific tools are defined here, but could be added later
|
| 213 |
prompt=SYSTEM_PROMPT_JSON_CORRECTOR
|
| 214 |
)
|
| 215 |
|
| 216 |
-
def load_model_and_index():
|
| 217 |
-
"""
|
| 218 |
-
Loads the SentenceTransformer model, FAISS index, and image paths into global variables.
|
| 219 |
-
This function is called once on the first run to avoid reloading heavy assets.
|
| 220 |
-
"""
|
| 221 |
-
global MODEL, FAISS_INDEX, IMAGE_PATHS
|
| 222 |
-
|
| 223 |
-
# This check ensures we only load everything once
|
| 224 |
-
if MODEL is None:
|
| 225 |
-
logger.info("Loading CLIP model 'clip-ViT-L-14' for the first time...")
|
| 226 |
-
MODEL = SentenceTransformer('clip-ViT-L-14')
|
| 227 |
-
logger.info("✅ CLIP model loaded.")
|
| 228 |
-
|
| 229 |
-
logger.info(f"Loading FAISS index from: {INDEX_PATH}")
|
| 230 |
-
FAISS_INDEX = faiss.read_index(INDEX_PATH)
|
| 231 |
-
logger.info("✅ FAISS index loaded.")
|
| 232 |
-
|
| 233 |
-
logger.info(f"Loading image paths from: {PATHS_JSON_PATH}")
|
| 234 |
-
with open(PATHS_JSON_PATH, "r") as f:
|
| 235 |
-
IMAGE_PATHS = json.load(f)
|
| 236 |
-
logger.info("✅ Image paths loaded.")
|
| 237 |
-
|
| 238 |
-
import torch
|
| 239 |
-
from transformers import AutoImageProcessor, AutoModel
|
| 240 |
-
import numpy as np
|
| 241 |
-
from PIL import Image
|
| 242 |
-
from pathlib import Path
|
| 243 |
-
from io import BytesIO
|
| 244 |
-
import json
|
| 245 |
-
|
| 246 |
-
# def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
|
| 247 |
-
# """
|
| 248 |
-
# Lazy-initialize DINOv2 processor & model (call once before embedding).
|
| 249 |
-
# """
|
| 250 |
-
# global _dinov2_processor, _dinov2_model
|
| 251 |
-
# if _dinov2_processor is None or _dinov2_model is None:
|
| 252 |
-
# _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
|
| 253 |
-
# _dinov2_model = AutoModel.from_pretrained(model_name)
|
| 254 |
-
# _dinov2_model.eval().to(device)
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
# def embed_bytesio_list(bytesio_list, batch_size: int = 8):
|
| 258 |
-
# """
|
| 259 |
-
# Accepts a list of BytesIO objects (each contains an image).
|
| 260 |
-
# Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
|
| 261 |
-
# """
|
| 262 |
-
# if _dinov2_processor is None or _dinov2_model is None:
|
| 263 |
-
# init_dinov2()
|
| 264 |
-
|
| 265 |
-
# imgs = []
|
| 266 |
-
# for b in bytesio_list:
|
| 267 |
-
# with Image.open(b) as original_img:
|
| 268 |
-
# # Create a new image with a white background in RGB mode
|
| 269 |
-
# final_img = Image.new("RGB", original_img.size, (255, 255, 255))
|
| 270 |
-
# # Paste the original image onto the white background, using the alpha channel as a mask if it exists
|
| 271 |
-
# if original_img.mode == 'RGBA':
|
| 272 |
-
# final_img.paste(original_img, mask=original_img.split()[-1])
|
| 273 |
-
# else:
|
| 274 |
-
# final_img.paste(original_img)
|
| 275 |
-
# imgs.append(final_img.copy())
|
| 276 |
-
|
| 277 |
-
# embs = []
|
| 278 |
-
# for i in range(0, len(imgs), batch_size):
|
| 279 |
-
# batch = imgs[i: i + batch_size]
|
| 280 |
-
# inputs = _dinov2_processor(images=batch, return_tensors="pt")
|
| 281 |
-
# inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 282 |
-
# with torch.no_grad():
|
| 283 |
-
# out = _dinov2_model(**inputs)
|
| 284 |
-
# cls = out.last_hidden_state[:, 0, :] # (B, D)
|
| 285 |
-
# cls = torch.nn.functional.normalize(cls, p=2, dim=1)
|
| 286 |
-
# embs.append(cls.cpu().numpy())
|
| 287 |
-
|
| 288 |
-
# if not embs:
|
| 289 |
-
# return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
|
| 290 |
-
|
| 291 |
-
# return np.vstack(embs).astype(np.float32)
|
| 292 |
-
|
| 293 |
-
# def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
|
| 294 |
-
# """
|
| 295 |
-
# Row-wise L2 normalization for numpy arrays.
|
| 296 |
-
# """
|
| 297 |
-
# norm = np.linalg.norm(a, axis=1, keepdims=True)
|
| 298 |
-
# return a / (norm + eps)
|
| 299 |
-
|
| 300 |
# -----------------------
|
| 301 |
# SERIALIZABLE HELPER
|
| 302 |
# -----------------------
|
|
@@ -457,8 +341,7 @@ def cosine_similarity(a, b):
|
|
| 457 |
from collections import defaultdict
|
| 458 |
import math
|
| 459 |
|
| 460 |
-
def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10,
|
| 461 |
-
method_weights=(0.5, 0.3, 0.2), verbose=True):
|
| 462 |
"""
|
| 463 |
embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
|
| 464 |
phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
|
|
@@ -600,8 +483,7 @@ def is_subpath(path: str, base: str) -> bool:
|
|
| 600 |
try:
|
| 601 |
p = os.path.normpath(os.path.abspath(path))
|
| 602 |
b = os.path.normpath(os.path.abspath(base))
|
| 603 |
-
if os.name == "nt":
|
| 604 |
-
p = p.lower(); b = b.lower()
|
| 605 |
return os.path.commonpath([p, b]) == b
|
| 606 |
except Exception:
|
| 607 |
return False
|
|
@@ -619,7 +501,6 @@ def _load_block_catalog(block_type: str) -> Dict:
|
|
| 619 |
catalog = json.loads(text) # will raise JSONDecodeError if malformed
|
| 620 |
logger.info(f"Successfully loaded block catalog from {catalog_path}")
|
| 621 |
return catalog
|
| 622 |
-
|
| 623 |
except FileNotFoundError:
|
| 624 |
logger.error(f"Error: Block catalog file not found at {catalog_path}")
|
| 625 |
except json.JSONDecodeError as e:
|
|
@@ -634,8 +515,7 @@ def get_block_by_opcode(catalog_data: dict, opcode: str) -> dict | None:
|
|
| 634 |
Returns the block dict or None if not found.
|
| 635 |
"""
|
| 636 |
for block in catalog_data["blocks"]:
|
| 637 |
-
if block.get("op_code") == opcode:
|
| 638 |
-
return block
|
| 639 |
return None
|
| 640 |
|
| 641 |
# Helper function to find a block in all catalogs by opcode
|
|
@@ -646,8 +526,7 @@ def find_block_in_all(opcode: str, all_catalogs: list[dict]) -> dict | None:
|
|
| 646 |
"""
|
| 647 |
for catalog in all_catalogs:
|
| 648 |
blk = get_block_by_opcode(catalog, opcode)
|
| 649 |
-
if blk is not None:
|
| 650 |
-
return blk
|
| 651 |
return None
|
| 652 |
|
| 653 |
def variable_intialization(project_data):
|
|
@@ -655,24 +534,18 @@ def variable_intialization(project_data):
|
|
| 655 |
Updates variable and broadcast definitions in a Scratch project JSON,
|
| 656 |
populating the 'variables' and 'broadcasts' sections of the Stage target
|
| 657 |
and extracting initial values for variables.
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
project_data (dict): The loaded JSON data of the Scratch project.
|
| 661 |
-
|
| 662 |
-
Returns:
|
| 663 |
-
dict: The updated project JSON data.
|
| 664 |
"""
|
| 665 |
|
| 666 |
stage_target = None
|
| 667 |
for target in project_data['targets']:
|
| 668 |
-
if target.get('isStage'):
|
| 669 |
stage_target = target
|
| 670 |
break
|
| 671 |
-
|
| 672 |
if stage_target is None:
|
| 673 |
print("Error: Stage target not found in the project data.")
|
| 674 |
return project_data
|
| 675 |
-
|
| 676 |
# Ensure 'variables' and 'broadcasts' exist in the Stage target
|
| 677 |
if "variables" not in stage_target:
|
| 678 |
stage_target["variables"] = {}
|
|
@@ -741,14 +614,9 @@ def deduplicate_variables(project_data):
|
|
| 741 |
"""
|
| 742 |
Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
|
| 743 |
prioritizing entries with non-empty values.
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
project_data (dict): The loaded JSON data of the Scratch project.
|
| 747 |
-
|
| 748 |
-
Returns:
|
| 749 |
-
dict: The updated project JSON data with deduplicated variables.
|
| 750 |
"""
|
| 751 |
-
|
| 752 |
stage_target = None
|
| 753 |
for target in project_data['targets']:
|
| 754 |
if target.get('isStage'):
|
|
@@ -762,36 +630,17 @@ def deduplicate_variables(project_data):
|
|
| 762 |
if "variables" not in stage_target:
|
| 763 |
return project_data # No variables to deduplicate
|
| 764 |
|
| 765 |
-
# Use a temporary dictionary to store the preferred variable entry by name
|
| 766 |
-
# Format: {variable_name: [variable_id, variable_name, variable_value]}
|
| 767 |
resolved_variables = {}
|
| 768 |
|
| 769 |
for var_id, var_info in stage_target["variables"].items():
|
| 770 |
var_name = var_info[0]
|
| 771 |
var_value = var_info[1]
|
| 772 |
|
| 773 |
-
if var_name not in resolved_variables:
|
| 774 |
-
|
| 775 |
-
resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
existing_id, existing_name, existing_value = resolved_variables[var_name]
|
| 779 |
-
|
| 780 |
-
# Prioritize the entry with a non-empty value
|
| 781 |
-
if var_value != "" and existing_value == "":
|
| 782 |
-
resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 783 |
-
# If both have non-empty values, or both are empty, keep the current one (arbitrary choice, but consistent)
|
| 784 |
-
# The current logic will effectively keep the last one encountered that has a value,
|
| 785 |
-
# or the very last one if all are empty.
|
| 786 |
-
elif var_value != "" and existing_value != "":
|
| 787 |
-
# If there are multiple non-empty values for the same variable name
|
| 788 |
-
# this keeps the one from the most recent iteration.
|
| 789 |
-
# For the given example, this will correctly keep "5".
|
| 790 |
-
resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 791 |
-
elif var_value == "" and existing_value == "":
|
| 792 |
-
# If both are empty, just keep the current one (arbitrary)
|
| 793 |
-
resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 794 |
-
|
| 795 |
|
| 796 |
# Reconstruct the 'variables' dictionary using the resolved entries
|
| 797 |
new_variables_dict = {}
|
|
@@ -800,9 +649,7 @@ def deduplicate_variables(project_data):
|
|
| 800 |
var_name_to_keep = var_data[1]
|
| 801 |
var_value_to_keep = var_data[2]
|
| 802 |
new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
|
| 803 |
-
|
| 804 |
stage_target["variables"] = new_variables_dict
|
| 805 |
-
|
| 806 |
return project_data
|
| 807 |
|
| 808 |
def variable_adder_main(project_data):
|
|
@@ -819,81 +666,81 @@ def variable_adder_main(project_data):
|
|
| 819 |
print(f"Error error in the variable initialization opcodes: {e}")
|
| 820 |
|
| 821 |
|
| 822 |
-
# --- Global variable for the block catalog ---
|
| 823 |
-
ALL_SCRATCH_BLOCKS_CATALOG = {}
|
| 824 |
-
BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
|
| 825 |
-
HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
|
| 826 |
-
STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
|
| 827 |
-
REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
|
| 828 |
-
BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
|
| 829 |
-
C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
|
| 830 |
-
CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
|
| 831 |
-
|
| 832 |
-
# Load the block catalogs from their respective JSON files
|
| 833 |
-
hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
|
| 834 |
-
hat_description = hat_block_data["description"]
|
| 835 |
-
#hat_description = hat_block_data.get("description", "No description available")
|
| 836 |
-
# hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
|
| 837 |
-
hat_opcodes_functionalities = "\n".join([
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
|
| 842 |
-
#hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
|
| 843 |
-
print("Hat blocks loaded successfully.", hat_description)
|
| 844 |
-
|
| 845 |
-
boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
|
| 846 |
-
boolean_description = boolean_block_data["description"]
|
| 847 |
-
# boolean_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
|
| 848 |
-
boolean_opcodes_functionalities = "\n".join([
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
|
| 853 |
-
#boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
|
| 854 |
-
|
| 855 |
-
c_block_data = _load_block_catalog(C_BLOCKS_PATH)
|
| 856 |
-
c_description = c_block_data["description"]
|
| 857 |
-
# c_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
|
| 858 |
-
c_opcodes_functionalities = "\n".join([
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
|
| 863 |
-
#c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
|
| 864 |
-
|
| 865 |
-
cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
|
| 866 |
-
cap_description = cap_block_data["description"]
|
| 867 |
-
# cap_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
|
| 868 |
-
cap_opcodes_functionalities = "\n".join([
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
|
| 873 |
-
#cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
|
| 874 |
-
|
| 875 |
-
reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
|
| 876 |
-
reporter_description = reporter_block_data["description"]
|
| 877 |
-
# reporter_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
|
| 878 |
-
reporter_opcodes_functionalities = "\n".join([
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
|
| 883 |
-
#reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
|
| 884 |
-
|
| 885 |
-
stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
|
| 886 |
-
stack_description = stack_block_data["description"]
|
| 887 |
-
# stack_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
|
| 888 |
-
stack_opcodes_functionalities = "\n".join([
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
|
| 893 |
-
#stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
|
| 894 |
-
|
| 895 |
-
# This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
|
| 896 |
-
ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
|
| 897 |
|
| 898 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 899 |
"""
|
|
@@ -1288,19 +1135,6 @@ end
|
|
| 1288 |
print(f"result:\n\n {result}")
|
| 1289 |
|
| 1290 |
except json.JSONDecodeError as error_json:
|
| 1291 |
-
# If JSON parsing fails, use the json resolver agent
|
| 1292 |
-
# correction_prompt = (
|
| 1293 |
-
# "Your task is to correct the provided JSON string to ensure it is **syntactically perfect and adheres strictly to JSON rules**.\n"
|
| 1294 |
-
# "It must be a JSON object with `refined_logic` (string) and `block_relationships` (array of objects).\n"
|
| 1295 |
-
# f"- **Error Details**: {error_json}\n\n"
|
| 1296 |
-
# "**Strict Instructions for your response:**\n"
|
| 1297 |
-
# "1. **ONLY** output the corrected JSON. Do not include any other text or explanations.\n"
|
| 1298 |
-
# "2. Ensure all keys and string values are enclosed in **double quotes**. Escape internal quotes (`\\`).\n"
|
| 1299 |
-
# "3. No trailing commas. Correct nesting.\n\n"
|
| 1300 |
-
# "Here is the problematic JSON string to correct:\n"
|
| 1301 |
-
# f"```json\n{llm_output_raw}\n```\n"
|
| 1302 |
-
# "Corrected JSON:\n"
|
| 1303 |
-
# )
|
| 1304 |
correction_prompt = f"""
|
| 1305 |
Fix this malformed response and return only the corrected JSON:
|
| 1306 |
|
|
@@ -1317,7 +1151,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
|
|
| 1317 |
try:
|
| 1318 |
correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
|
| 1319 |
corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
|
| 1320 |
-
#block_relationships = corrected_output.get("block_relationships", [])
|
| 1321 |
result = corrected_output
|
| 1322 |
print(f"result:\n\n {result}")
|
| 1323 |
except Exception as e_corr:
|
|
@@ -1328,8 +1161,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
|
|
| 1328 |
state["pseudo_code"] = result
|
| 1329 |
state["temp_pseudo_code"] += [result]
|
| 1330 |
Data = state["temp_pseudo_code"]
|
| 1331 |
-
# with open("debug_state.json", "w", encoding="utf-8") as f:
|
| 1332 |
-
# json.dump(state, f, indent=2, ensure_ascii=False)
|
| 1333 |
print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
|
| 1334 |
print(f"[OVREALL LISTS OF LOGICS]: {Data}")
|
| 1335 |
logger.info("Plan refinement and block relation analysis completed for all plans.")
|
|
@@ -1464,14 +1295,7 @@ def overall_block_builder_node_2(state: GameState):
|
|
| 1464 |
logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
|
| 1465 |
except Exception as e:
|
| 1466 |
logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
|
| 1467 |
-
|
| 1468 |
-
# from block_builder should cause a specific state change, but generally
|
| 1469 |
-
# avoid nulling the entire project_json.
|
| 1470 |
-
|
| 1471 |
-
state["project_json"] = project_json
|
| 1472 |
-
# with open("debug_state.json", "w", encoding="utf-8") as f:
|
| 1473 |
-
# json.dump(state, f, indent=2, ensure_ascii=False)
|
| 1474 |
-
|
| 1475 |
return state
|
| 1476 |
|
| 1477 |
# Node 6: variable adder node
|
|
@@ -1553,15 +1377,11 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 1553 |
|
| 1554 |
try:
|
| 1555 |
elements = partition_pdf(
|
| 1556 |
-
|
| 1557 |
-
file=pdf_stream, # 'file=', inplace of 'filename'
|
| 1558 |
strategy="hi_res",
|
| 1559 |
extract_image_block_types=["Image"],
|
| 1560 |
hi_res_model_name="yolox",
|
| 1561 |
extract_image_block_to_payload=True,
|
| 1562 |
-
# ocr_languages=ocr_lang,
|
| 1563 |
-
# extract_images_in_pdf=False,
|
| 1564 |
-
# extract_image_block_output_dir=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\pdf_output"
|
| 1565 |
)
|
| 1566 |
print(f"ELEMENTS")
|
| 1567 |
except Exception as e:
|
|
@@ -1578,260 +1398,19 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
|
|
| 1578 |
continue
|
| 1579 |
|
| 1580 |
manipulated_json[f"Sprite {sprite_count}"] = {
|
| 1581 |
-
# "id":auto_id,
|
| 1582 |
-
# "name": name,
|
| 1583 |
"base64": el["metadata"]["image_base64"],
|
| 1584 |
"file-path": pdf_id,
|
| 1585 |
-
# "description": description
|
| 1586 |
}
|
| 1587 |
sprite_count += 1
|
| 1588 |
return manipulated_json
|
| 1589 |
except Exception as e:
|
| 1590 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
| 1591 |
-
|
| 1592 |
-
# def similarity_matching(sprites_data: str, project_folder: str) -> str:
|
| 1593 |
-
# logger.info("🔍 Running similarity matching…")
|
| 1594 |
-
# os.makedirs(project_folder, exist_ok=True)
|
| 1595 |
-
|
| 1596 |
-
# # ----------------------------------------
|
| 1597 |
-
# # CHANGED: define normalized base-paths so startswith() checks work
|
| 1598 |
-
# backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 1599 |
-
# sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 1600 |
-
# code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|
| 1601 |
-
# # ----------------------------------------
|
| 1602 |
-
|
| 1603 |
-
# project_json_path = os.path.join(project_folder, "project.json")
|
| 1604 |
-
|
| 1605 |
-
# # ==============================
|
| 1606 |
-
# # READ SPRITE METADATA
|
| 1607 |
-
# # ==============================
|
| 1608 |
-
# # with open(input_json_path, 'r') as f:
|
| 1609 |
-
# # sprites_data = json.load(f)
|
| 1610 |
-
|
| 1611 |
-
# sprite_ids, sprite_base64 = [], []
|
| 1612 |
-
# for sid, sprite in sprites_data.items():
|
| 1613 |
-
# sprite_ids.append(sid)
|
| 1614 |
-
# # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
|
| 1615 |
-
# sprite_base64.append(sprite["base64"])
|
| 1616 |
-
|
| 1617 |
-
# sprite_images_bytes = []
|
| 1618 |
-
# for b64 in sprite_base64:
|
| 1619 |
-
# img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
|
| 1620 |
-
# buffer = BytesIO()
|
| 1621 |
-
# img.save(buffer, format="PNG")
|
| 1622 |
-
# buffer.seek(0)
|
| 1623 |
-
# sprite_images_bytes.append(buffer)
|
| 1624 |
-
|
| 1625 |
-
# # =========================================
|
| 1626 |
-
# # Build the list of all candidate images
|
| 1627 |
-
# # =========================================
|
| 1628 |
-
# folder_image_paths = [
|
| 1629 |
-
# BACKDROP_DIR/"Baseball 2.sb3"/"7be1f5b3e682813dac1f297e52ff7dca.png",
|
| 1630 |
-
# BACKDROP_DIR/"Beach Malibu.sb3"/"050615fe992a00d6af0e664e497ebf53.png",
|
| 1631 |
-
# BACKDROP_DIR/"Bedroom 3.sb3"/"8cc0b88d53345b3e337e8f028a32a4e7.png",
|
| 1632 |
-
# BACKDROP_DIR/"Blue Sky.sb3"/"e7c147730f19d284bcd7b3f00af19bb6.png",
|
| 1633 |
-
# BACKDROP_DIR/"Castle 2.sb3"/"951765ee7f7370f120c9df20b577c22f.png",
|
| 1634 |
-
# BACKDROP_DIR/"Colorful City.sb3"/"04d18ddd1b85f0ea30beb14b8da49f60.png",
|
| 1635 |
-
# BACKDROP_DIR/"Hall.sb3"/"ea86ca30b346f27ca5faf1254f6a31e3.png",
|
| 1636 |
-
# BACKDROP_DIR/"Jungle.sb3"/"f4f908da19e2753f3ed679d7b37650ca.png",
|
| 1637 |
-
# BACKDROP_DIR/"Soccer.sb3"/"04a63154f04b09494354090f7cc2f1b9.png",
|
| 1638 |
-
# BACKDROP_DIR/"Theater.sb3"/"c2b097bc5cdb6a14ef5485202bc5ee76.png",
|
| 1639 |
-
|
| 1640 |
-
# SPRITE_DIR/"Batter.sprite3"/"592ee9ab2aeefe65cb4fb95fcd046f33.png",
|
| 1641 |
-
# SPRITE_DIR/"Batter.sprite3"/"9d193bef6e3d6d8eba6d1470b8bf9351.png",
|
| 1642 |
-
# SPRITE_DIR/"Batter.sprite3"/"baseball_sprite_motion_1.png",
|
| 1643 |
-
# SPRITE_DIR/"Batter.sprite3"/"bd4fc003528acfa847e45ff82f346eee.png",
|
| 1644 |
-
# SPRITE_DIR/"Batter.sprite3"/"fdfde4bcbaca0f68e83fdf3f4ef0c660.png",
|
| 1645 |
-
# SPRITE_DIR/"Bear.sprite3"/"6f303e972f33fcb7ef36d0d8012d0975.png",
|
| 1646 |
-
# SPRITE_DIR/"Bear.sprite3"/"bear_motion_2.png",
|
| 1647 |
-
# SPRITE_DIR/"Bear.sprite3"/"deef1eaa96d550ae6fc11524a1935024.png",
|
| 1648 |
-
# SPRITE_DIR/"Beetle.sprite3"/"46d0dfd4ae7e9bfe3a6a2e35a4905eae.png",
|
| 1649 |
-
# SPRITE_DIR/"Butterfly 1.sprite3"/"34b76c1835c6a7fc2c47956e49bb0f52.png",
|
| 1650 |
-
# SPRITE_DIR/"Butterfly 1.sprite3"/"49c9f952007d870a046cff93b6e5e098.png",
|
| 1651 |
-
# SPRITE_DIR/"Butterfly 1.sprite3"/"fe98df7367e314d9640bfaa54fc239be.png",
|
| 1652 |
-
# SPRITE_DIR/"Cat.sprite3"/"0fb9be3e8397c983338cb71dc84d0b25.png",
|
| 1653 |
-
# SPRITE_DIR/"Cat.sprite3"/"bcf454acf82e4504149f7ffe07081dbc.png",
|
| 1654 |
-
# SPRITE_DIR/"Centaur.sprite3"/"2373556e776cad3ba4d6ee04fc34550b.png",
|
| 1655 |
-
# SPRITE_DIR/"Centaur.sprite3"/"c00ffa6c5dd0baf9f456b897ff974377.png",
|
| 1656 |
-
# SPRITE_DIR/"Centaur.sprite3"/"d722329bd9373ad80625e5be6d52f3ed.png",
|
| 1657 |
-
# SPRITE_DIR/"Centaur.sprite3"/"d7aa990538915b7ef1f496d7e8486ade.png",
|
| 1658 |
-
# SPRITE_DIR/"City Bus.sprite3"/"7d7e26014a346b894db8ab1819f2167f.png",
|
| 1659 |
-
# SPRITE_DIR/"City Bus.sprite3"/"e9694adbff9422363e2ea03166015393.png",
|
| 1660 |
-
# SPRITE_DIR/"Crab.sprite3"/"49839aa1b0feed02a3c759db5f8dee71.png",
|
| 1661 |
-
# SPRITE_DIR/"Crab.sprite3"/"bear_element.png",
|
| 1662 |
-
# SPRITE_DIR/"Crab.sprite3"/"f7cdd2acbc6d7559d33be8675059c79e.png",
|
| 1663 |
-
# SPRITE_DIR/"Glow-G.sprite3"/"56839bc48957869d980c6f9b6f5a2a91.png",
|
| 1664 |
-
# SPRITE_DIR/"Jordyn.sprite3"/"00c8c464c19460df693f8d5ae69afdab.png",
|
| 1665 |
-
# SPRITE_DIR/"Jordyn.sprite3"/"768c4601174f0dfcb96b3080ccc3a192.png",
|
| 1666 |
-
# SPRITE_DIR/"Jordyn.sprite3"/"a7cc1e5f02b58ecc8095cfc18eef0289.png",
|
| 1667 |
-
# SPRITE_DIR/"Jordyn.sprite3"/"db4d97cbf24e2b8af665bfbf06f67fa0.png",
|
| 1668 |
-
# SPRITE_DIR/"Soccer Ball.sprite3"/"5d973d7a3a8be3f3bd6e1cd0f73c32b5.png",
|
| 1669 |
-
# SPRITE_DIR/"Soccer Ball.sprite3"/"cat_football.png",
|
| 1670 |
-
# SPRITE_DIR/"Star.sprite3"/"551629f2a64c1f3703e57aaa133effa6.png",
|
| 1671 |
-
# SPRITE_DIR/"Wizard.sprite3"/"55ba51188af86ca16ef30267e874c1ed.png",
|
| 1672 |
-
# SPRITE_DIR/"Wizard.sprite3"/"91d495085eb4d02a375c42f6318071e7.png",
|
| 1673 |
-
# SPRITE_DIR/"Wizard.sprite3"/"df943c9894ee4b9df8c5893ce30c2a5f.png",
|
| 1674 |
-
|
| 1675 |
-
# # CODE_BLOCKS_DIR/"client_code_block_1.jpg",
|
| 1676 |
-
# # CODE_BLOCKS_DIR/"client_code_block_2.jpg",
|
| 1677 |
-
# CODE_BLOCKS_DIR/"script1.JPG",
|
| 1678 |
-
# CODE_BLOCKS_DIR/"script2.JPG",
|
| 1679 |
-
# CODE_BLOCKS_DIR/"script3.JPG",
|
| 1680 |
-
# CODE_BLOCKS_DIR/"script4.JPG",
|
| 1681 |
-
# CODE_BLOCKS_DIR/"script5.JPG",
|
| 1682 |
-
# CODE_BLOCKS_DIR/"script6.JPG",
|
| 1683 |
-
# CODE_BLOCKS_DIR/"script7.JPG",
|
| 1684 |
-
# CODE_BLOCKS_DIR/"script8.JPG",
|
| 1685 |
-
# CODE_BLOCKS_DIR/"script9.JPG",
|
| 1686 |
-
# CODE_BLOCKS_DIR/"static_white.png"]
|
| 1687 |
-
# folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
|
| 1688 |
-
# # =========================================
|
| 1689 |
-
|
| 1690 |
-
# # -----------------------------------------
|
| 1691 |
-
# # Load reference embeddings from JSON
|
| 1692 |
-
# # -----------------------------------------
|
| 1693 |
-
# with open(f"{BLOCKS_DIR}/dinov2_embeddings.json", "r") as f:
|
| 1694 |
-
# embedding_json = json.load(f)
|
| 1695 |
-
|
| 1696 |
-
# # ============================== #
|
| 1697 |
-
# # EMBED SPRITE IMAGES #
|
| 1698 |
-
# # ============================== #
|
| 1699 |
-
# # ensure model is initialized (fast no-op after first call)
|
| 1700 |
-
# init_dinov2()
|
| 1701 |
-
|
| 1702 |
-
# # embed the incoming sprite BytesIO images (same data structure you already use)
|
| 1703 |
-
# sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
|
| 1704 |
-
|
| 1705 |
-
# # load reference embeddings from JSON (they must be numeric lists)
|
| 1706 |
-
# img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
|
| 1707 |
-
|
| 1708 |
-
# # normalize both sides (important — stored embeddings may not be normalized)
|
| 1709 |
-
# sprite_matrix = l2_normalize_rows(sprite_matrix)
|
| 1710 |
-
# img_matrix = l2_normalize_rows(img_matrix)
|
| 1711 |
-
|
| 1712 |
-
# # =========================================
|
| 1713 |
-
# # Compute similarities & pick best match
|
| 1714 |
-
# # =========================================
|
| 1715 |
-
# similarity = np.matmul(sprite_matrix, img_matrix.T)
|
| 1716 |
-
# most_similar_indices = np.argmax(similarity, axis=1)
|
| 1717 |
-
|
| 1718 |
-
# # =========================================
|
| 1719 |
-
# # Copy matched sprite assets + collect data
|
| 1720 |
-
# # =========================================
|
| 1721 |
-
# project_data = []
|
| 1722 |
-
# copied_folders = set()
|
| 1723 |
-
|
| 1724 |
-
# for sprite_idx, matched_idx in enumerate(most_similar_indices):
|
| 1725 |
-
# matched_image_path = folder_image_paths[matched_idx]
|
| 1726 |
-
# matched_folder = os.path.dirname(matched_image_path)
|
| 1727 |
-
|
| 1728 |
-
# # CHANGED: use our new normalized sprite_base_path
|
| 1729 |
-
# if not matched_folder.startswith(sprite_base_path):
|
| 1730 |
-
# continue
|
| 1731 |
-
|
| 1732 |
-
# if matched_folder in copied_folders:
|
| 1733 |
-
# continue
|
| 1734 |
-
# copied_folders.add(matched_folder)
|
| 1735 |
-
# logger.info(f"Matched sprite: {matched_image_path}")
|
| 1736 |
-
|
| 1737 |
-
# sprite_json_path = os.path.join(matched_folder, 'sprite.json')
|
| 1738 |
-
# if not os.path.exists(sprite_json_path):
|
| 1739 |
-
# logger.warning(f"No sprite.json in {matched_folder}")
|
| 1740 |
-
# continue
|
| 1741 |
-
|
| 1742 |
-
# with open(sprite_json_path, 'r') as f:
|
| 1743 |
-
# sprite_info = json.load(f)
|
| 1744 |
-
# # copy all non‐matched files
|
| 1745 |
-
# for fname in os.listdir(matched_folder):
|
| 1746 |
-
# if fname in (os.path.basename(matched_image_path), 'sprite.json'):
|
| 1747 |
-
# continue
|
| 1748 |
-
# shutil.copy2(os.path.join(matched_folder, fname),
|
| 1749 |
-
# os.path.join(project_folder, fname))
|
| 1750 |
-
# project_data.append(sprite_info)
|
| 1751 |
-
|
| 1752 |
-
# # =========================================
|
| 1753 |
-
# # Copy matched backdrop assets + collect
|
| 1754 |
-
# # =========================================
|
| 1755 |
-
# backdrop_data = []
|
| 1756 |
-
# copied_backdrop_folders = set()
|
| 1757 |
-
# for backdrop_idx, matched_idx in enumerate(most_similar_indices):
|
| 1758 |
-
# matched_image_path = folder_image_paths[matched_idx]
|
| 1759 |
-
# matched_folder = os.path.dirname(matched_image_path)
|
| 1760 |
-
# matched_filename = os.path.basename(matched_image_path)
|
| 1761 |
-
|
| 1762 |
-
# # CHANGED: use our new normalized backdrop_base_path
|
| 1763 |
-
# if not matched_folder.startswith(backdrop_base_path):
|
| 1764 |
-
# continue
|
| 1765 |
-
|
| 1766 |
-
# # skip if backdrop folder already processed
|
| 1767 |
-
# if matched_folder in copied_backdrop_folders:
|
| 1768 |
-
# continue
|
| 1769 |
-
# copied_backdrop_folders.add(matched_folder)
|
| 1770 |
-
|
| 1771 |
-
# logger.info(f"Matched backdrop: {matched_image_path}")
|
| 1772 |
-
|
| 1773 |
-
# # 1) Copy the matched backdrop image itself
|
| 1774 |
-
# try:
|
| 1775 |
-
# shutil.copy2(
|
| 1776 |
-
# matched_image_path,
|
| 1777 |
-
# os.path.join(project_folder, matched_filename)
|
| 1778 |
-
# )
|
| 1779 |
-
# logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
|
| 1780 |
-
# except Exception as e:
|
| 1781 |
-
# logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
|
| 1782 |
|
| 1783 |
-
# # copy non‐matched files
|
| 1784 |
-
# for fname in os.listdir(matched_folder):
|
| 1785 |
-
# # if fname in (os.path.basename(matched_image_path), 'project.json'):
|
| 1786 |
-
# if fname in {matched_filename, 'project.json'}:
|
| 1787 |
-
# continue
|
| 1788 |
-
# # shutil.copy2(os.path.join(matched_folder, fname),
|
| 1789 |
-
# # os.path.join(project_folder, fname))
|
| 1790 |
-
# src = os.path.join(matched_folder, fname)
|
| 1791 |
-
# dst = os.path.join(project_folder, fname)
|
| 1792 |
-
# if os.path.isfile(src):
|
| 1793 |
-
# try:
|
| 1794 |
-
# shutil.copy2(src, dst)
|
| 1795 |
-
# logger.info(f"Copied additional backdrop asset {fname} to project folder")
|
| 1796 |
-
# except Exception as e:
|
| 1797 |
-
# logger.error(f"Failed to copy {src}: {e}")
|
| 1798 |
-
|
| 1799 |
-
# # append the stage‐target from its project.json
|
| 1800 |
-
# pj = os.path.join(matched_folder, 'project.json')
|
| 1801 |
-
# if os.path.exists(pj):
|
| 1802 |
-
# with open(pj, 'r') as f:
|
| 1803 |
-
# bd_json = json.load(f)
|
| 1804 |
-
# for tgt in bd_json.get("targets", []):
|
| 1805 |
-
# if tgt.get("isStage"):
|
| 1806 |
-
# backdrop_data.append(tgt)
|
| 1807 |
-
# else:
|
| 1808 |
-
# logger.warning(f"No project.json in {matched_folder}")
|
| 1809 |
-
|
| 1810 |
-
|
| 1811 |
-
# # =========================================
|
| 1812 |
-
# # Merge into final Scratch project.json
|
| 1813 |
-
# # =========================================
|
| 1814 |
-
# final_project = {
|
| 1815 |
-
# "targets": [], "monitors": [], "extensions": [],
|
| 1816 |
-
# "meta": {
|
| 1817 |
-
# "semver": "3.0.0",
|
| 1818 |
-
# "vm": "11.3.0",
|
| 1819 |
-
# "agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
| 1820 |
-
# }
|
| 1821 |
-
# }
|
| 1822 |
-
# # sprites first
|
| 1823 |
-
# for spr in project_data:
|
| 1824 |
-
# if not spr.get("isStage", False):
|
| 1825 |
-
# final_project["targets"].append(spr)
|
| 1826 |
''' It appends all the list and paths from json files and pick the best match's path'''
|
| 1827 |
-
|
| 1828 |
def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
|
| 1829 |
print("🔍 Running similarity matching…")
|
| 1830 |
os.makedirs(project_folder, exist_ok=True)
|
| 1831 |
|
| 1832 |
-
# backdrop_base_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\Backdrops"
|
| 1833 |
-
# sprite_base_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\sprites"
|
| 1834 |
-
# code_blocks_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\code_blocks"
|
| 1835 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 1836 |
sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 1837 |
code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|
|
|
|
| 2 |
import cv2, json,base64,io,os,tempfile,logging, re
|
| 3 |
import numpy as np
|
| 4 |
from unstructured.partition.pdf import partition_pdf
|
| 5 |
+
from PIL import Image, ImageOps, ImageEnhance
|
|
|
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
+
# import pytesseract
|
| 8 |
from werkzeug.utils import secure_filename
|
| 9 |
from langchain_groq import ChatGroq
|
| 10 |
from langgraph.prebuilt import create_react_agent
|
| 11 |
from pdf2image import convert_from_path, convert_from_bytes
|
|
|
|
|
|
|
| 12 |
from typing import Dict, TypedDict, Optional, Any
|
| 13 |
from langgraph.graph import StateGraph, END
|
| 14 |
import uuid
|
| 15 |
import shutil, time, functools
|
|
|
|
|
|
|
|
|
|
| 16 |
from io import BytesIO
|
| 17 |
from pathlib import Path
|
| 18 |
import os
|
| 19 |
from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
|
|
|
|
|
|
|
|
|
|
| 20 |
from difflib import get_close_matches
|
| 21 |
import torch
|
| 22 |
from transformers import AutoImageProcessor, AutoModel
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from io import BytesIO
|
| 25 |
+
import torch
|
| 26 |
+
import json
|
|
|
|
| 27 |
import cv2
|
|
|
|
| 28 |
# hashing & image-match
|
| 29 |
from imagededup.methods import PHash
|
| 30 |
from image_match.goldberg import ImageSignature
|
|
|
|
| 31 |
# --- Config (tune threads as needed) ---
|
| 32 |
# DINOv2 model id
|
| 33 |
DINOV2_MODEL = "facebook/dinov2-small"
|
|
|
|
| 51 |
phash = PHash()
|
| 52 |
gis = ImageSignature()
|
| 53 |
|
|
|
|
| 54 |
def log_execution_time(func):
|
| 55 |
@functools.wraps(func)
|
| 56 |
def wrapper(*args, **kwargs):
|
|
|
|
| 85 |
|
| 86 |
app = Flask(__name__)
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
backdrop_images_path = r"app\blocks\Backdrops"
|
| 89 |
sprite_images_path = r"app\blocks\sprites"
|
| 90 |
code_blocks_image_path = r"app\blocks\code_blocks"
|
|
|
|
| 100 |
CODE_BLOCKS_DIR = BLOCKS_DIR / "code_blocks"
|
| 101 |
# === new: outputs rooted under BASE_DIR ===
|
| 102 |
OUTPUT_DIR = BASE_DIR / "outputs"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
# Global variables to hold the model and index, loaded only once.
|
| 105 |
MODEL = None
|
|
|
|
| 115 |
SPRITE_DIR,
|
| 116 |
CODE_BLOCKS_DIR,
|
| 117 |
OUTPUT_DIR,
|
|
|
|
|
|
|
|
|
|
| 118 |
):
|
| 119 |
d.mkdir(parents=True, exist_ok=True)
|
| 120 |
|
|
|
|
| 168 |
4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
|
| 169 |
"""
|
| 170 |
|
|
|
|
| 171 |
# Main agent of the system agent for Scratch 3.0
|
| 172 |
agent = create_react_agent(
|
| 173 |
model=llm,
|
| 174 |
tools=[], # No specific tools are defined here, but could be added later
|
| 175 |
prompt=SYSTEM_PROMPT
|
| 176 |
)
|
| 177 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
agent_json_resolver = create_react_agent(
|
| 179 |
model=llm,
|
| 180 |
tools=[], # No specific tools are defined here, but could be added later
|
| 181 |
prompt=SYSTEM_PROMPT_JSON_CORRECTOR
|
| 182 |
)
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
# -----------------------
|
| 185 |
# SERIALIZABLE HELPER
|
| 186 |
# -----------------------
|
|
|
|
| 341 |
from collections import defaultdict
|
| 342 |
import math
|
| 343 |
|
| 344 |
+
def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10, method_weights=(0.5, 0.3, 0.2), verbose=True):
|
|
|
|
| 345 |
"""
|
| 346 |
embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
|
| 347 |
phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
|
|
|
|
| 483 |
try:
|
| 484 |
p = os.path.normpath(os.path.abspath(path))
|
| 485 |
b = os.path.normpath(os.path.abspath(base))
|
| 486 |
+
if os.name == "nt": p = p.lower(); b = b.lower()
|
|
|
|
| 487 |
return os.path.commonpath([p, b]) == b
|
| 488 |
except Exception:
|
| 489 |
return False
|
|
|
|
| 501 |
catalog = json.loads(text) # will raise JSONDecodeError if malformed
|
| 502 |
logger.info(f"Successfully loaded block catalog from {catalog_path}")
|
| 503 |
return catalog
|
|
|
|
| 504 |
except FileNotFoundError:
|
| 505 |
logger.error(f"Error: Block catalog file not found at {catalog_path}")
|
| 506 |
except json.JSONDecodeError as e:
|
|
|
|
| 515 |
Returns the block dict or None if not found.
|
| 516 |
"""
|
| 517 |
for block in catalog_data["blocks"]:
|
| 518 |
+
if block.get("op_code") == opcode: return block
|
|
|
|
| 519 |
return None
|
| 520 |
|
| 521 |
# Helper function to find a block in all catalogs by opcode
|
|
|
|
| 526 |
"""
|
| 527 |
for catalog in all_catalogs:
|
| 528 |
blk = get_block_by_opcode(catalog, opcode)
|
| 529 |
+
if blk is not None: return blk
|
|
|
|
| 530 |
return None
|
| 531 |
|
| 532 |
def variable_intialization(project_data):
|
|
|
|
| 534 |
Updates variable and broadcast definitions in a Scratch project JSON,
|
| 535 |
populating the 'variables' and 'broadcasts' sections of the Stage target
|
| 536 |
and extracting initial values for variables.
|
| 537 |
+
Args: project_data (dict): The loaded JSON data of the Scratch project.
|
| 538 |
+
Returns: dict: The updated project JSON data.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
"""
|
| 540 |
|
| 541 |
stage_target = None
|
| 542 |
for target in project_data['targets']:
|
| 543 |
+
if target.get('isStage'):
|
| 544 |
stage_target = target
|
| 545 |
break
|
|
|
|
| 546 |
if stage_target is None:
|
| 547 |
print("Error: Stage target not found in the project data.")
|
| 548 |
return project_data
|
|
|
|
| 549 |
# Ensure 'variables' and 'broadcasts' exist in the Stage target
|
| 550 |
if "variables" not in stage_target:
|
| 551 |
stage_target["variables"] = {}
|
|
|
|
| 614 |
"""
|
| 615 |
Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
|
| 616 |
prioritizing entries with non-empty values.
|
| 617 |
+
Args: project_data (dict): The loaded JSON data of the Scratch project.
|
| 618 |
+
Returns: dict: The updated project JSON data with deduplicated variables.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 619 |
"""
|
|
|
|
| 620 |
stage_target = None
|
| 621 |
for target in project_data['targets']:
|
| 622 |
if target.get('isStage'):
|
|
|
|
| 630 |
if "variables" not in stage_target:
|
| 631 |
return project_data # No variables to deduplicate
|
| 632 |
|
|
|
|
|
|
|
| 633 |
resolved_variables = {}
|
| 634 |
|
| 635 |
for var_id, var_info in stage_target["variables"].items():
|
| 636 |
var_name = var_info[0]
|
| 637 |
var_value = var_info[1]
|
| 638 |
|
| 639 |
+
if var_name not in resolved_variables: resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 640 |
+
else: existing_id, existing_name, existing_value = resolved_variables[var_name]
|
| 641 |
+
if var_value != "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 642 |
+
elif var_value != "" and existing_value != "": resolved_variables[var_name] = [var_id, var_name, var_value]
|
| 643 |
+
elif var_value == "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 644 |
|
| 645 |
# Reconstruct the 'variables' dictionary using the resolved entries
|
| 646 |
new_variables_dict = {}
|
|
|
|
| 649 |
var_name_to_keep = var_data[1]
|
| 650 |
var_value_to_keep = var_data[2]
|
| 651 |
new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
|
|
|
|
| 652 |
stage_target["variables"] = new_variables_dict
|
|
|
|
| 653 |
return project_data
|
| 654 |
|
| 655 |
def variable_adder_main(project_data):
|
|
|
|
| 666 |
print(f"Error error in the variable initialization opcodes: {e}")
|
| 667 |
|
| 668 |
|
| 669 |
+
# # --- Global variable for the block catalog ---
|
| 670 |
+
# ALL_SCRATCH_BLOCKS_CATALOG = {}
|
| 671 |
+
# BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
|
| 672 |
+
# HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
|
| 673 |
+
# STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
|
| 674 |
+
# REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
|
| 675 |
+
# BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
|
| 676 |
+
# C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
|
| 677 |
+
# CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
|
| 678 |
+
|
| 679 |
+
# # Load the block catalogs from their respective JSON files
|
| 680 |
+
# hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
|
| 681 |
+
# hat_description = hat_block_data["description"]
|
| 682 |
+
# #hat_description = hat_block_data.get("description", "No description available")
|
| 683 |
+
# # hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
|
| 684 |
+
# hat_opcodes_functionalities = "\n".join([
|
| 685 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 686 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 687 |
+
# for block in hat_block_data.get("blocks", [])
|
| 688 |
+
# ]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
|
| 689 |
+
# #hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
|
| 690 |
+
# print("Hat blocks loaded successfully.", hat_description)
|
| 691 |
+
|
| 692 |
+
# boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
|
| 693 |
+
# boolean_description = boolean_block_data["description"]
|
| 694 |
+
# # boolean_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
|
| 695 |
+
# boolean_opcodes_functionalities = "\n".join([
|
| 696 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 697 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 698 |
+
# for block in boolean_block_data.get("blocks", [])
|
| 699 |
+
# ]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
|
| 700 |
+
# #boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
|
| 701 |
+
|
| 702 |
+
# c_block_data = _load_block_catalog(C_BLOCKS_PATH)
|
| 703 |
+
# c_description = c_block_data["description"]
|
| 704 |
+
# # c_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
|
| 705 |
+
# c_opcodes_functionalities = "\n".join([
|
| 706 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 707 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 708 |
+
# for block in c_block_data.get("blocks", [])
|
| 709 |
+
# ]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
|
| 710 |
+
# #c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
|
| 711 |
+
|
| 712 |
+
# cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
|
| 713 |
+
# cap_description = cap_block_data["description"]
|
| 714 |
+
# # cap_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
|
| 715 |
+
# cap_opcodes_functionalities = "\n".join([
|
| 716 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 717 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 718 |
+
# for block in cap_block_data.get("blocks", [])
|
| 719 |
+
# ]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
|
| 720 |
+
# #cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
|
| 721 |
+
|
| 722 |
+
# reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
|
| 723 |
+
# reporter_description = reporter_block_data["description"]
|
| 724 |
+
# # reporter_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
|
| 725 |
+
# reporter_opcodes_functionalities = "\n".join([
|
| 726 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 727 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 728 |
+
# for block in reporter_block_data.get("blocks", [])
|
| 729 |
+
# ]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
|
| 730 |
+
# #reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
|
| 731 |
+
|
| 732 |
+
# stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
|
| 733 |
+
# stack_description = stack_block_data["description"]
|
| 734 |
+
# # stack_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
|
| 735 |
+
# stack_opcodes_functionalities = "\n".join([
|
| 736 |
+
# # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 737 |
+
# f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
|
| 738 |
+
# for block in stack_block_data.get("blocks", [])
|
| 739 |
+
# ]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
|
| 740 |
+
# #stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
|
| 741 |
+
|
| 742 |
+
# # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
|
| 743 |
+
# ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
|
| 744 |
|
| 745 |
def extract_json_from_llm_response(raw_response: str) -> dict:
|
| 746 |
"""
|
|
|
|
| 1135 |
print(f"result:\n\n {result}")
|
| 1136 |
|
| 1137 |
except json.JSONDecodeError as error_json:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1138 |
correction_prompt = f"""
|
| 1139 |
Fix this malformed response and return only the corrected JSON:
|
| 1140 |
|
|
|
|
| 1151 |
try:
|
| 1152 |
correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
|
| 1153 |
corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
|
|
|
|
| 1154 |
result = corrected_output
|
| 1155 |
print(f"result:\n\n {result}")
|
| 1156 |
except Exception as e_corr:
|
|
|
|
| 1161 |
state["pseudo_code"] = result
|
| 1162 |
state["temp_pseudo_code"] += [result]
|
| 1163 |
Data = state["temp_pseudo_code"]
|
|
|
|
|
|
|
| 1164 |
print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
|
| 1165 |
print(f"[OVREALL LISTS OF LOGICS]: {Data}")
|
| 1166 |
logger.info("Plan refinement and block relation analysis completed for all plans.")
|
|
|
|
| 1295 |
logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
|
| 1296 |
except Exception as e:
|
| 1297 |
logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
|
| 1298 |
+
state["project_json"] = project_json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1299 |
return state
|
| 1300 |
|
| 1301 |
# Node 6: variable adder node
|
|
|
|
| 1377 |
|
| 1378 |
try:
|
| 1379 |
elements = partition_pdf(
|
| 1380 |
+
file=pdf_stream,
|
|
|
|
| 1381 |
strategy="hi_res",
|
| 1382 |
extract_image_block_types=["Image"],
|
| 1383 |
hi_res_model_name="yolox",
|
| 1384 |
extract_image_block_to_payload=True,
|
|
|
|
|
|
|
|
|
|
| 1385 |
)
|
| 1386 |
print(f"ELEMENTS")
|
| 1387 |
except Exception as e:
|
|
|
|
| 1398 |
continue
|
| 1399 |
|
| 1400 |
manipulated_json[f"Sprite {sprite_count}"] = {
|
|
|
|
|
|
|
| 1401 |
"base64": el["metadata"]["image_base64"],
|
| 1402 |
"file-path": pdf_id,
|
|
|
|
| 1403 |
}
|
| 1404 |
sprite_count += 1
|
| 1405 |
return manipulated_json
|
| 1406 |
except Exception as e:
|
| 1407 |
raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1409 |
''' It appends all the list and paths from json files and pick the best match's path'''
|
|
|
|
| 1410 |
def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
|
| 1411 |
print("🔍 Running similarity matching…")
|
| 1412 |
os.makedirs(project_folder, exist_ok=True)
|
| 1413 |
|
|
|
|
|
|
|
|
|
|
| 1414 |
backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
|
| 1415 |
sprite_base_path = os.path.normpath(str(SPRITE_DIR))
|
| 1416 |
code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
|