prthm11 commited on
Commit
4805ab5
·
verified ·
1 Parent(s): 9a0e352

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -519
app.py CHANGED
@@ -2,44 +2,32 @@ from flask import Flask, request, jsonify, render_template, send_from_directory,
2
  import cv2, json,base64,io,os,tempfile,logging, re
3
  import numpy as np
4
  from unstructured.partition.pdf import partition_pdf
5
- from PIL import Image
6
- # from imutils.perspective import four_point_transform
7
  from dotenv import load_dotenv
8
- import pytesseract
9
  from werkzeug.utils import secure_filename
10
  from langchain_groq import ChatGroq
11
  from langgraph.prebuilt import create_react_agent
12
  from pdf2image import convert_from_path, convert_from_bytes
13
- from concurrent.futures import ThreadPoolExecutor
14
- from pdf2image.exceptions import PDFInfoNotInstalledError
15
  from typing import Dict, TypedDict, Optional, Any
16
  from langgraph.graph import StateGraph, END
17
  import uuid
18
  import shutil, time, functools
19
- from langchain_experimental.open_clip.open_clip import OpenCLIPEmbeddings
20
- from langchain_core.utils.utils import secret_from_env
21
- # from matplotlib.offsetbox import OffsetImage, AnnotationBbox
22
  from io import BytesIO
23
  from pathlib import Path
24
  import os
25
  from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
26
- from langchain.chat_models import ChatOpenAI
27
- from langchain_openai import ChatOpenAI
28
- from pydantic import Field, SecretStr
29
  from difflib import get_close_matches
30
  import torch
31
  from transformers import AutoImageProcessor, AutoModel
32
- import faiss
33
- from sentence_transformers import SentenceTransformer
34
-
35
- # image tools
36
- from PIL import Image, ImageOps, ImageEnhance
37
  import cv2
38
-
39
  # hashing & image-match
40
  from imagededup.methods import PHash
41
  from image_match.goldberg import ImageSignature
42
-
43
  # --- Config (tune threads as needed) ---
44
  # DINOv2 model id
45
  DINOV2_MODEL = "facebook/dinov2-small"
@@ -63,7 +51,6 @@ dinov2_model.eval()
63
  phash = PHash()
64
  gis = ImageSignature()
65
 
66
-
67
  def log_execution_time(func):
68
  @functools.wraps(func)
69
  def wrapper(*args, **kwargs):
@@ -98,12 +85,6 @@ llm = ChatGroq(
98
 
99
  app = Flask(__name__)
100
 
101
- # ============================== #
102
- # TESSERACT CONFIGURATION #
103
- # ============================== #
104
- pytesseract.pytesseract.tesseract_cmd = (r'/usr/bin/tesseract')
105
-
106
- # poppler_path = r"C:\poppler\Library\bin"
107
  backdrop_images_path = r"app\blocks\Backdrops"
108
  sprite_images_path = r"app\blocks\sprites"
109
  code_blocks_image_path = r"app\blocks\code_blocks"
@@ -119,11 +100,6 @@ SPRITE_DIR = BLOCKS_DIR / "sprites"
119
  CODE_BLOCKS_DIR = BLOCKS_DIR / "code_blocks"
120
  # === new: outputs rooted under BASE_DIR ===
121
  OUTPUT_DIR = BASE_DIR / "outputs"
122
- INDEX_PATH = os.path.join(BLOCKS_DIR, "faiss_index.bin")
123
- PATHS_JSON_PATH = os.path.join(BLOCKS_DIR, "image_paths.json")
124
- # DETECTED_IMAGE_DIR = OUTPUT_DIR / "DETECTED_IMAGE"
125
- # SCANNED_IMAGE_DIR = OUTPUT_DIR / "SCANNED_IMAGE"
126
- # JSON_DIR = OUTPUT_DIR / "EXTRACTED_JSON"
127
 
128
  # Global variables to hold the model and index, loaded only once.
129
  MODEL = None
@@ -139,9 +115,6 @@ for d in (
139
  SPRITE_DIR,
140
  CODE_BLOCKS_DIR,
141
  OUTPUT_DIR,
142
- # DETECTED_IMAGE_DIR,
143
- # SCANNED_IMAGE_DIR,
144
- # JSON_DIR,
145
  ):
146
  d.mkdir(parents=True, exist_ok=True)
147
 
@@ -195,108 +168,19 @@ RULES:
195
  4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
196
  """
197
 
198
- # debugger and resolver agent for Scratch 3.0
199
  # Main agent of the system agent for Scratch 3.0
200
  agent = create_react_agent(
201
  model=llm,
202
  tools=[], # No specific tools are defined here, but could be added later
203
  prompt=SYSTEM_PROMPT
204
  )
205
- # agent_2 = create_react_agent(
206
- # model=llm2,
207
- # tools=[], # No specific tools are defined here, but could be added later
208
- # prompt=SYSTEM_PROMPT
209
- # )
210
  agent_json_resolver = create_react_agent(
211
  model=llm,
212
  tools=[], # No specific tools are defined here, but could be added later
213
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
214
  )
215
 
216
- def load_model_and_index():
217
- """
218
- Loads the SentenceTransformer model, FAISS index, and image paths into global variables.
219
- This function is called once on the first run to avoid reloading heavy assets.
220
- """
221
- global MODEL, FAISS_INDEX, IMAGE_PATHS
222
-
223
- # This check ensures we only load everything once
224
- if MODEL is None:
225
- logger.info("Loading CLIP model 'clip-ViT-L-14' for the first time...")
226
- MODEL = SentenceTransformer('clip-ViT-L-14')
227
- logger.info("✅ CLIP model loaded.")
228
-
229
- logger.info(f"Loading FAISS index from: {INDEX_PATH}")
230
- FAISS_INDEX = faiss.read_index(INDEX_PATH)
231
- logger.info("✅ FAISS index loaded.")
232
-
233
- logger.info(f"Loading image paths from: {PATHS_JSON_PATH}")
234
- with open(PATHS_JSON_PATH, "r") as f:
235
- IMAGE_PATHS = json.load(f)
236
- logger.info("✅ Image paths loaded.")
237
-
238
- import torch
239
- from transformers import AutoImageProcessor, AutoModel
240
- import numpy as np
241
- from PIL import Image
242
- from pathlib import Path
243
- from io import BytesIO
244
- import json
245
-
246
- # def init_dinov2(model_name: str = DINOV2_MODEL, device: torch.device = DEVICE):
247
- # """
248
- # Lazy-initialize DINOv2 processor & model (call once before embedding).
249
- # """
250
- # global _dinov2_processor, _dinov2_model
251
- # if _dinov2_processor is None or _dinov2_model is None:
252
- # _dinov2_processor = AutoImageProcessor.from_pretrained(model_name)
253
- # _dinov2_model = AutoModel.from_pretrained(model_name)
254
- # _dinov2_model.eval().to(device)
255
-
256
-
257
- # def embed_bytesio_list(bytesio_list, batch_size: int = 8):
258
- # """
259
- # Accepts a list of BytesIO objects (each contains an image).
260
- # Returns: np.ndarray shape (N, D) of L2-normalized embeddings (dtype float32).
261
- # """
262
- # if _dinov2_processor is None or _dinov2_model is None:
263
- # init_dinov2()
264
-
265
- # imgs = []
266
- # for b in bytesio_list:
267
- # with Image.open(b) as original_img:
268
- # # Create a new image with a white background in RGB mode
269
- # final_img = Image.new("RGB", original_img.size, (255, 255, 255))
270
- # # Paste the original image onto the white background, using the alpha channel as a mask if it exists
271
- # if original_img.mode == 'RGBA':
272
- # final_img.paste(original_img, mask=original_img.split()[-1])
273
- # else:
274
- # final_img.paste(original_img)
275
- # imgs.append(final_img.copy())
276
-
277
- # embs = []
278
- # for i in range(0, len(imgs), batch_size):
279
- # batch = imgs[i: i + batch_size]
280
- # inputs = _dinov2_processor(images=batch, return_tensors="pt")
281
- # inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
282
- # with torch.no_grad():
283
- # out = _dinov2_model(**inputs)
284
- # cls = out.last_hidden_state[:, 0, :] # (B, D)
285
- # cls = torch.nn.functional.normalize(cls, p=2, dim=1)
286
- # embs.append(cls.cpu().numpy())
287
-
288
- # if not embs:
289
- # return np.zeros((0, _dinov2_model.config.hidden_size), dtype=np.float32)
290
-
291
- # return np.vstack(embs).astype(np.float32)
292
-
293
- # def l2_normalize_rows(a: np.ndarray, eps: float = 1e-12) -> np.ndarray:
294
- # """
295
- # Row-wise L2 normalization for numpy arrays.
296
- # """
297
- # norm = np.linalg.norm(a, axis=1, keepdims=True)
298
- # return a / (norm + eps)
299
-
300
  # -----------------------
301
  # SERIALIZABLE HELPER
302
  # -----------------------
@@ -457,8 +341,7 @@ def cosine_similarity(a, b):
457
  from collections import defaultdict
458
  import math
459
 
460
- def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10,
461
- method_weights=(0.5, 0.3, 0.2), verbose=True):
462
  """
463
  embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
464
  phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
@@ -600,8 +483,7 @@ def is_subpath(path: str, base: str) -> bool:
600
  try:
601
  p = os.path.normpath(os.path.abspath(path))
602
  b = os.path.normpath(os.path.abspath(base))
603
- if os.name == "nt":
604
- p = p.lower(); b = b.lower()
605
  return os.path.commonpath([p, b]) == b
606
  except Exception:
607
  return False
@@ -619,7 +501,6 @@ def _load_block_catalog(block_type: str) -> Dict:
619
  catalog = json.loads(text) # will raise JSONDecodeError if malformed
620
  logger.info(f"Successfully loaded block catalog from {catalog_path}")
621
  return catalog
622
-
623
  except FileNotFoundError:
624
  logger.error(f"Error: Block catalog file not found at {catalog_path}")
625
  except json.JSONDecodeError as e:
@@ -634,8 +515,7 @@ def get_block_by_opcode(catalog_data: dict, opcode: str) -> dict | None:
634
  Returns the block dict or None if not found.
635
  """
636
  for block in catalog_data["blocks"]:
637
- if block.get("op_code") == opcode:
638
- return block
639
  return None
640
 
641
  # Helper function to find a block in all catalogs by opcode
@@ -646,8 +526,7 @@ def find_block_in_all(opcode: str, all_catalogs: list[dict]) -> dict | None:
646
  """
647
  for catalog in all_catalogs:
648
  blk = get_block_by_opcode(catalog, opcode)
649
- if blk is not None:
650
- return blk
651
  return None
652
 
653
  def variable_intialization(project_data):
@@ -655,24 +534,18 @@ def variable_intialization(project_data):
655
  Updates variable and broadcast definitions in a Scratch project JSON,
656
  populating the 'variables' and 'broadcasts' sections of the Stage target
657
  and extracting initial values for variables.
658
-
659
- Args:
660
- project_data (dict): The loaded JSON data of the Scratch project.
661
-
662
- Returns:
663
- dict: The updated project JSON data.
664
  """
665
 
666
  stage_target = None
667
  for target in project_data['targets']:
668
- if target.get('isStage'):
669
  stage_target = target
670
  break
671
-
672
  if stage_target is None:
673
  print("Error: Stage target not found in the project data.")
674
  return project_data
675
-
676
  # Ensure 'variables' and 'broadcasts' exist in the Stage target
677
  if "variables" not in stage_target:
678
  stage_target["variables"] = {}
@@ -741,14 +614,9 @@ def deduplicate_variables(project_data):
741
  """
742
  Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
743
  prioritizing entries with non-empty values.
744
-
745
- Args:
746
- project_data (dict): The loaded JSON data of the Scratch project.
747
-
748
- Returns:
749
- dict: The updated project JSON data with deduplicated variables.
750
  """
751
-
752
  stage_target = None
753
  for target in project_data['targets']:
754
  if target.get('isStage'):
@@ -762,36 +630,17 @@ def deduplicate_variables(project_data):
762
  if "variables" not in stage_target:
763
  return project_data # No variables to deduplicate
764
 
765
- # Use a temporary dictionary to store the preferred variable entry by name
766
- # Format: {variable_name: [variable_id, variable_name, variable_value]}
767
  resolved_variables = {}
768
 
769
  for var_id, var_info in stage_target["variables"].items():
770
  var_name = var_info[0]
771
  var_value = var_info[1]
772
 
773
- if var_name not in resolved_variables:
774
- # If the variable name is not yet seen, add it
775
- resolved_variables[var_name] = [var_id, var_name, var_value]
776
- else:
777
- # If the variable name is already seen, decide which one to keep
778
- existing_id, existing_name, existing_value = resolved_variables[var_name]
779
-
780
- # Prioritize the entry with a non-empty value
781
- if var_value != "" and existing_value == "":
782
- resolved_variables[var_name] = [var_id, var_name, var_value]
783
- # If both have non-empty values, or both are empty, keep the current one (arbitrary choice, but consistent)
784
- # The current logic will effectively keep the last one encountered that has a value,
785
- # or the very last one if all are empty.
786
- elif var_value != "" and existing_value != "":
787
- # If there are multiple non-empty values for the same variable name
788
- # this keeps the one from the most recent iteration.
789
- # For the given example, this will correctly keep "5".
790
- resolved_variables[var_name] = [var_id, var_name, var_value]
791
- elif var_value == "" and existing_value == "":
792
- # If both are empty, just keep the current one (arbitrary)
793
- resolved_variables[var_name] = [var_id, var_name, var_value]
794
-
795
 
796
  # Reconstruct the 'variables' dictionary using the resolved entries
797
  new_variables_dict = {}
@@ -800,9 +649,7 @@ def deduplicate_variables(project_data):
800
  var_name_to_keep = var_data[1]
801
  var_value_to_keep = var_data[2]
802
  new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
803
-
804
  stage_target["variables"] = new_variables_dict
805
-
806
  return project_data
807
 
808
  def variable_adder_main(project_data):
@@ -819,81 +666,81 @@ def variable_adder_main(project_data):
819
  print(f"Error error in the variable initialization opcodes: {e}")
820
 
821
 
822
- # --- Global variable for the block catalog ---
823
- ALL_SCRATCH_BLOCKS_CATALOG = {}
824
- BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
825
- HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
826
- STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
827
- REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
828
- BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
829
- C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
830
- CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
831
-
832
- # Load the block catalogs from their respective JSON files
833
- hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
834
- hat_description = hat_block_data["description"]
835
- #hat_description = hat_block_data.get("description", "No description available")
836
- # hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
837
- hat_opcodes_functionalities = "\n".join([
838
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
839
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
840
- for block in hat_block_data.get("blocks", [])
841
- ]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
842
- #hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
843
- print("Hat blocks loaded successfully.", hat_description)
844
-
845
- boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
846
- boolean_description = boolean_block_data["description"]
847
- # boolean_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
848
- boolean_opcodes_functionalities = "\n".join([
849
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
850
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
851
- for block in boolean_block_data.get("blocks", [])
852
- ]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
853
- #boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
854
-
855
- c_block_data = _load_block_catalog(C_BLOCKS_PATH)
856
- c_description = c_block_data["description"]
857
- # c_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
858
- c_opcodes_functionalities = "\n".join([
859
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
860
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
861
- for block in c_block_data.get("blocks", [])
862
- ]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
863
- #c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
864
-
865
- cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
866
- cap_description = cap_block_data["description"]
867
- # cap_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
868
- cap_opcodes_functionalities = "\n".join([
869
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
870
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
871
- for block in cap_block_data.get("blocks", [])
872
- ]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
873
- #cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
874
-
875
- reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
876
- reporter_description = reporter_block_data["description"]
877
- # reporter_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
878
- reporter_opcodes_functionalities = "\n".join([
879
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
880
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
881
- for block in reporter_block_data.get("blocks", [])
882
- ]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
883
- #reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
884
-
885
- stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
886
- stack_description = stack_block_data["description"]
887
- # stack_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
888
- stack_opcodes_functionalities = "\n".join([
889
- # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
890
- f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
891
- for block in stack_block_data.get("blocks", [])
892
- ]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
893
- #stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
894
-
895
- # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
896
- ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
897
 
898
  def extract_json_from_llm_response(raw_response: str) -> dict:
899
  """
@@ -1288,19 +1135,6 @@ end
1288
  print(f"result:\n\n {result}")
1289
 
1290
  except json.JSONDecodeError as error_json:
1291
- # If JSON parsing fails, use the json resolver agent
1292
- # correction_prompt = (
1293
- # "Your task is to correct the provided JSON string to ensure it is **syntactically perfect and adheres strictly to JSON rules**.\n"
1294
- # "It must be a JSON object with `refined_logic` (string) and `block_relationships` (array of objects).\n"
1295
- # f"- **Error Details**: {error_json}\n\n"
1296
- # "**Strict Instructions for your response:**\n"
1297
- # "1. **ONLY** output the corrected JSON. Do not include any other text or explanations.\n"
1298
- # "2. Ensure all keys and string values are enclosed in **double quotes**. Escape internal quotes (`\\`).\n"
1299
- # "3. No trailing commas. Correct nesting.\n\n"
1300
- # "Here is the problematic JSON string to correct:\n"
1301
- # f"```json\n{llm_output_raw}\n```\n"
1302
- # "Corrected JSON:\n"
1303
- # )
1304
  correction_prompt = f"""
1305
  Fix this malformed response and return only the corrected JSON:
1306
 
@@ -1317,7 +1151,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
1317
  try:
1318
  correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
1319
  corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
1320
- #block_relationships = corrected_output.get("block_relationships", [])
1321
  result = corrected_output
1322
  print(f"result:\n\n {result}")
1323
  except Exception as e_corr:
@@ -1328,8 +1161,6 @@ Extract the sprite name and pseudocode, then return in this exact format:
1328
  state["pseudo_code"] = result
1329
  state["temp_pseudo_code"] += [result]
1330
  Data = state["temp_pseudo_code"]
1331
- # with open("debug_state.json", "w", encoding="utf-8") as f:
1332
- # json.dump(state, f, indent=2, ensure_ascii=False)
1333
  print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
1334
  print(f"[OVREALL LISTS OF LOGICS]: {Data}")
1335
  logger.info("Plan refinement and block relation analysis completed for all plans.")
@@ -1464,14 +1295,7 @@ def overall_block_builder_node_2(state: GameState):
1464
  logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
1465
  except Exception as e:
1466
  logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
1467
- # Consider adding more specific error handling here if a malformed output
1468
- # from block_builder should cause a specific state change, but generally
1469
- # avoid nulling the entire project_json.
1470
-
1471
- state["project_json"] = project_json
1472
- # with open("debug_state.json", "w", encoding="utf-8") as f:
1473
- # json.dump(state, f, indent=2, ensure_ascii=False)
1474
-
1475
  return state
1476
 
1477
  # Node 6: variable adder node
@@ -1553,15 +1377,11 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
1553
 
1554
  try:
1555
  elements = partition_pdf(
1556
- # filename=str(pdf_path), # partition_pdf might expect a string
1557
- file=pdf_stream, # 'file=', inplace of 'filename'
1558
  strategy="hi_res",
1559
  extract_image_block_types=["Image"],
1560
  hi_res_model_name="yolox",
1561
  extract_image_block_to_payload=True,
1562
- # ocr_languages=ocr_lang,
1563
- # extract_images_in_pdf=False,
1564
- # extract_image_block_output_dir=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\pdf_output"
1565
  )
1566
  print(f"ELEMENTS")
1567
  except Exception as e:
@@ -1578,260 +1398,19 @@ def extract_images_from_pdf(pdf_stream: io.BytesIO):
1578
  continue
1579
 
1580
  manipulated_json[f"Sprite {sprite_count}"] = {
1581
- # "id":auto_id,
1582
- # "name": name,
1583
  "base64": el["metadata"]["image_base64"],
1584
  "file-path": pdf_id,
1585
- # "description": description
1586
  }
1587
  sprite_count += 1
1588
  return manipulated_json
1589
  except Exception as e:
1590
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
1591
-
1592
- # def similarity_matching(sprites_data: str, project_folder: str) -> str:
1593
- # logger.info("🔍 Running similarity matching…")
1594
- # os.makedirs(project_folder, exist_ok=True)
1595
-
1596
- # # ----------------------------------------
1597
- # # CHANGED: define normalized base-paths so startswith() checks work
1598
- # backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
1599
- # sprite_base_path = os.path.normpath(str(SPRITE_DIR))
1600
- # code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
1601
- # # ----------------------------------------
1602
-
1603
- # project_json_path = os.path.join(project_folder, "project.json")
1604
-
1605
- # # ==============================
1606
- # # READ SPRITE METADATA
1607
- # # ==============================
1608
- # # with open(input_json_path, 'r') as f:
1609
- # # sprites_data = json.load(f)
1610
-
1611
- # sprite_ids, sprite_base64 = [], []
1612
- # for sid, sprite in sprites_data.items():
1613
- # sprite_ids.append(sid)
1614
- # # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
1615
- # sprite_base64.append(sprite["base64"])
1616
-
1617
- # sprite_images_bytes = []
1618
- # for b64 in sprite_base64:
1619
- # img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
1620
- # buffer = BytesIO()
1621
- # img.save(buffer, format="PNG")
1622
- # buffer.seek(0)
1623
- # sprite_images_bytes.append(buffer)
1624
-
1625
- # # =========================================
1626
- # # Build the list of all candidate images
1627
- # # =========================================
1628
- # folder_image_paths = [
1629
- # BACKDROP_DIR/"Baseball 2.sb3"/"7be1f5b3e682813dac1f297e52ff7dca.png",
1630
- # BACKDROP_DIR/"Beach Malibu.sb3"/"050615fe992a00d6af0e664e497ebf53.png",
1631
- # BACKDROP_DIR/"Bedroom 3.sb3"/"8cc0b88d53345b3e337e8f028a32a4e7.png",
1632
- # BACKDROP_DIR/"Blue Sky.sb3"/"e7c147730f19d284bcd7b3f00af19bb6.png",
1633
- # BACKDROP_DIR/"Castle 2.sb3"/"951765ee7f7370f120c9df20b577c22f.png",
1634
- # BACKDROP_DIR/"Colorful City.sb3"/"04d18ddd1b85f0ea30beb14b8da49f60.png",
1635
- # BACKDROP_DIR/"Hall.sb3"/"ea86ca30b346f27ca5faf1254f6a31e3.png",
1636
- # BACKDROP_DIR/"Jungle.sb3"/"f4f908da19e2753f3ed679d7b37650ca.png",
1637
- # BACKDROP_DIR/"Soccer.sb3"/"04a63154f04b09494354090f7cc2f1b9.png",
1638
- # BACKDROP_DIR/"Theater.sb3"/"c2b097bc5cdb6a14ef5485202bc5ee76.png",
1639
-
1640
- # SPRITE_DIR/"Batter.sprite3"/"592ee9ab2aeefe65cb4fb95fcd046f33.png",
1641
- # SPRITE_DIR/"Batter.sprite3"/"9d193bef6e3d6d8eba6d1470b8bf9351.png",
1642
- # SPRITE_DIR/"Batter.sprite3"/"baseball_sprite_motion_1.png",
1643
- # SPRITE_DIR/"Batter.sprite3"/"bd4fc003528acfa847e45ff82f346eee.png",
1644
- # SPRITE_DIR/"Batter.sprite3"/"fdfde4bcbaca0f68e83fdf3f4ef0c660.png",
1645
- # SPRITE_DIR/"Bear.sprite3"/"6f303e972f33fcb7ef36d0d8012d0975.png",
1646
- # SPRITE_DIR/"Bear.sprite3"/"bear_motion_2.png",
1647
- # SPRITE_DIR/"Bear.sprite3"/"deef1eaa96d550ae6fc11524a1935024.png",
1648
- # SPRITE_DIR/"Beetle.sprite3"/"46d0dfd4ae7e9bfe3a6a2e35a4905eae.png",
1649
- # SPRITE_DIR/"Butterfly 1.sprite3"/"34b76c1835c6a7fc2c47956e49bb0f52.png",
1650
- # SPRITE_DIR/"Butterfly 1.sprite3"/"49c9f952007d870a046cff93b6e5e098.png",
1651
- # SPRITE_DIR/"Butterfly 1.sprite3"/"fe98df7367e314d9640bfaa54fc239be.png",
1652
- # SPRITE_DIR/"Cat.sprite3"/"0fb9be3e8397c983338cb71dc84d0b25.png",
1653
- # SPRITE_DIR/"Cat.sprite3"/"bcf454acf82e4504149f7ffe07081dbc.png",
1654
- # SPRITE_DIR/"Centaur.sprite3"/"2373556e776cad3ba4d6ee04fc34550b.png",
1655
- # SPRITE_DIR/"Centaur.sprite3"/"c00ffa6c5dd0baf9f456b897ff974377.png",
1656
- # SPRITE_DIR/"Centaur.sprite3"/"d722329bd9373ad80625e5be6d52f3ed.png",
1657
- # SPRITE_DIR/"Centaur.sprite3"/"d7aa990538915b7ef1f496d7e8486ade.png",
1658
- # SPRITE_DIR/"City Bus.sprite3"/"7d7e26014a346b894db8ab1819f2167f.png",
1659
- # SPRITE_DIR/"City Bus.sprite3"/"e9694adbff9422363e2ea03166015393.png",
1660
- # SPRITE_DIR/"Crab.sprite3"/"49839aa1b0feed02a3c759db5f8dee71.png",
1661
- # SPRITE_DIR/"Crab.sprite3"/"bear_element.png",
1662
- # SPRITE_DIR/"Crab.sprite3"/"f7cdd2acbc6d7559d33be8675059c79e.png",
1663
- # SPRITE_DIR/"Glow-G.sprite3"/"56839bc48957869d980c6f9b6f5a2a91.png",
1664
- # SPRITE_DIR/"Jordyn.sprite3"/"00c8c464c19460df693f8d5ae69afdab.png",
1665
- # SPRITE_DIR/"Jordyn.sprite3"/"768c4601174f0dfcb96b3080ccc3a192.png",
1666
- # SPRITE_DIR/"Jordyn.sprite3"/"a7cc1e5f02b58ecc8095cfc18eef0289.png",
1667
- # SPRITE_DIR/"Jordyn.sprite3"/"db4d97cbf24e2b8af665bfbf06f67fa0.png",
1668
- # SPRITE_DIR/"Soccer Ball.sprite3"/"5d973d7a3a8be3f3bd6e1cd0f73c32b5.png",
1669
- # SPRITE_DIR/"Soccer Ball.sprite3"/"cat_football.png",
1670
- # SPRITE_DIR/"Star.sprite3"/"551629f2a64c1f3703e57aaa133effa6.png",
1671
- # SPRITE_DIR/"Wizard.sprite3"/"55ba51188af86ca16ef30267e874c1ed.png",
1672
- # SPRITE_DIR/"Wizard.sprite3"/"91d495085eb4d02a375c42f6318071e7.png",
1673
- # SPRITE_DIR/"Wizard.sprite3"/"df943c9894ee4b9df8c5893ce30c2a5f.png",
1674
-
1675
- # # CODE_BLOCKS_DIR/"client_code_block_1.jpg",
1676
- # # CODE_BLOCKS_DIR/"client_code_block_2.jpg",
1677
- # CODE_BLOCKS_DIR/"script1.JPG",
1678
- # CODE_BLOCKS_DIR/"script2.JPG",
1679
- # CODE_BLOCKS_DIR/"script3.JPG",
1680
- # CODE_BLOCKS_DIR/"script4.JPG",
1681
- # CODE_BLOCKS_DIR/"script5.JPG",
1682
- # CODE_BLOCKS_DIR/"script6.JPG",
1683
- # CODE_BLOCKS_DIR/"script7.JPG",
1684
- # CODE_BLOCKS_DIR/"script8.JPG",
1685
- # CODE_BLOCKS_DIR/"script9.JPG",
1686
- # CODE_BLOCKS_DIR/"static_white.png"]
1687
- # folder_image_paths = [os.path.normpath(str(p)) for p in folder_image_paths]
1688
- # # =========================================
1689
-
1690
- # # -----------------------------------------
1691
- # # Load reference embeddings from JSON
1692
- # # -----------------------------------------
1693
- # with open(f"{BLOCKS_DIR}/dinov2_embeddings.json", "r") as f:
1694
- # embedding_json = json.load(f)
1695
-
1696
- # # ============================== #
1697
- # # EMBED SPRITE IMAGES #
1698
- # # ============================== #
1699
- # # ensure model is initialized (fast no-op after first call)
1700
- # init_dinov2()
1701
-
1702
- # # embed the incoming sprite BytesIO images (same data structure you already use)
1703
- # sprite_matrix = embed_bytesio_list(sprite_images_bytes, batch_size=8) # shape (N, D)
1704
-
1705
- # # load reference embeddings from JSON (they must be numeric lists)
1706
- # img_matrix = np.array([img["embeddings"] for img in embedding_json], dtype=np.float32)
1707
-
1708
- # # normalize both sides (important — stored embeddings may not be normalized)
1709
- # sprite_matrix = l2_normalize_rows(sprite_matrix)
1710
- # img_matrix = l2_normalize_rows(img_matrix)
1711
-
1712
- # # =========================================
1713
- # # Compute similarities & pick best match
1714
- # # =========================================
1715
- # similarity = np.matmul(sprite_matrix, img_matrix.T)
1716
- # most_similar_indices = np.argmax(similarity, axis=1)
1717
-
1718
- # # =========================================
1719
- # # Copy matched sprite assets + collect data
1720
- # # =========================================
1721
- # project_data = []
1722
- # copied_folders = set()
1723
-
1724
- # for sprite_idx, matched_idx in enumerate(most_similar_indices):
1725
- # matched_image_path = folder_image_paths[matched_idx]
1726
- # matched_folder = os.path.dirname(matched_image_path)
1727
-
1728
- # # CHANGED: use our new normalized sprite_base_path
1729
- # if not matched_folder.startswith(sprite_base_path):
1730
- # continue
1731
-
1732
- # if matched_folder in copied_folders:
1733
- # continue
1734
- # copied_folders.add(matched_folder)
1735
- # logger.info(f"Matched sprite: {matched_image_path}")
1736
-
1737
- # sprite_json_path = os.path.join(matched_folder, 'sprite.json')
1738
- # if not os.path.exists(sprite_json_path):
1739
- # logger.warning(f"No sprite.json in {matched_folder}")
1740
- # continue
1741
-
1742
- # with open(sprite_json_path, 'r') as f:
1743
- # sprite_info = json.load(f)
1744
- # # copy all non‐matched files
1745
- # for fname in os.listdir(matched_folder):
1746
- # if fname in (os.path.basename(matched_image_path), 'sprite.json'):
1747
- # continue
1748
- # shutil.copy2(os.path.join(matched_folder, fname),
1749
- # os.path.join(project_folder, fname))
1750
- # project_data.append(sprite_info)
1751
-
1752
- # # =========================================
1753
- # # Copy matched backdrop assets + collect
1754
- # # =========================================
1755
- # backdrop_data = []
1756
- # copied_backdrop_folders = set()
1757
- # for backdrop_idx, matched_idx in enumerate(most_similar_indices):
1758
- # matched_image_path = folder_image_paths[matched_idx]
1759
- # matched_folder = os.path.dirname(matched_image_path)
1760
- # matched_filename = os.path.basename(matched_image_path)
1761
-
1762
- # # CHANGED: use our new normalized backdrop_base_path
1763
- # if not matched_folder.startswith(backdrop_base_path):
1764
- # continue
1765
-
1766
- # # skip if backdrop folder already processed
1767
- # if matched_folder in copied_backdrop_folders:
1768
- # continue
1769
- # copied_backdrop_folders.add(matched_folder)
1770
-
1771
- # logger.info(f"Matched backdrop: {matched_image_path}")
1772
-
1773
- # # 1) Copy the matched backdrop image itself
1774
- # try:
1775
- # shutil.copy2(
1776
- # matched_image_path,
1777
- # os.path.join(project_folder, matched_filename)
1778
- # )
1779
- # logger.info(f"✅ Copied matched backdrop image {matched_filename} to {project_folder}")
1780
- # except Exception as e:
1781
- # logger.error(f"❌ Failed to copy matched backdrop {matched_image_path}: {e}")
1782
 
1783
- # # copy non‐matched files
1784
- # for fname in os.listdir(matched_folder):
1785
- # # if fname in (os.path.basename(matched_image_path), 'project.json'):
1786
- # if fname in {matched_filename, 'project.json'}:
1787
- # continue
1788
- # # shutil.copy2(os.path.join(matched_folder, fname),
1789
- # # os.path.join(project_folder, fname))
1790
- # src = os.path.join(matched_folder, fname)
1791
- # dst = os.path.join(project_folder, fname)
1792
- # if os.path.isfile(src):
1793
- # try:
1794
- # shutil.copy2(src, dst)
1795
- # logger.info(f"Copied additional backdrop asset {fname} to project folder")
1796
- # except Exception as e:
1797
- # logger.error(f"Failed to copy {src}: {e}")
1798
-
1799
- # # append the stage‐target from its project.json
1800
- # pj = os.path.join(matched_folder, 'project.json')
1801
- # if os.path.exists(pj):
1802
- # with open(pj, 'r') as f:
1803
- # bd_json = json.load(f)
1804
- # for tgt in bd_json.get("targets", []):
1805
- # if tgt.get("isStage"):
1806
- # backdrop_data.append(tgt)
1807
- # else:
1808
- # logger.warning(f"No project.json in {matched_folder}")
1809
-
1810
-
1811
- # # =========================================
1812
- # # Merge into final Scratch project.json
1813
- # # =========================================
1814
- # final_project = {
1815
- # "targets": [], "monitors": [], "extensions": [],
1816
- # "meta": {
1817
- # "semver": "3.0.0",
1818
- # "vm": "11.3.0",
1819
- # "agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
1820
- # }
1821
- # }
1822
- # # sprites first
1823
- # for spr in project_data:
1824
- # if not spr.get("isStage", False):
1825
- # final_project["targets"].append(spr)
1826
  ''' It appends all the list and paths from json files and pick the best match's path'''
1827
-
1828
  def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
1829
  print("🔍 Running similarity matching…")
1830
  os.makedirs(project_folder, exist_ok=True)
1831
 
1832
- # backdrop_base_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\Backdrops"
1833
- # sprite_base_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\sprites"
1834
- # code_blocks_path = r"D:\DEV PATEL\2025\scratch_VLM\scratch_agent\blocks\code_blocks"
1835
  backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
1836
  sprite_base_path = os.path.normpath(str(SPRITE_DIR))
1837
  code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))
 
2
  import cv2, json,base64,io,os,tempfile,logging, re
3
  import numpy as np
4
  from unstructured.partition.pdf import partition_pdf
5
+ from PIL import Image, ImageOps, ImageEnhance
 
6
  from dotenv import load_dotenv
7
+ # import pytesseract
8
  from werkzeug.utils import secure_filename
9
  from langchain_groq import ChatGroq
10
  from langgraph.prebuilt import create_react_agent
11
  from pdf2image import convert_from_path, convert_from_bytes
 
 
12
  from typing import Dict, TypedDict, Optional, Any
13
  from langgraph.graph import StateGraph, END
14
  import uuid
15
  import shutil, time, functools
 
 
 
16
  from io import BytesIO
17
  from pathlib import Path
18
  import os
19
  from utils.block_relation_builder import block_builder, separate_scripts, transform_logic_to_action_flow, analyze_opcode_counts
 
 
 
20
  from difflib import get_close_matches
21
  import torch
22
  from transformers import AutoImageProcessor, AutoModel
23
+ from pathlib import Path
24
+ from io import BytesIO
25
+ import torch
26
+ import json
 
27
  import cv2
 
28
  # hashing & image-match
29
  from imagededup.methods import PHash
30
  from image_match.goldberg import ImageSignature
 
31
  # --- Config (tune threads as needed) ---
32
  # DINOv2 model id
33
  DINOV2_MODEL = "facebook/dinov2-small"
 
51
  phash = PHash()
52
  gis = ImageSignature()
53
 
 
54
  def log_execution_time(func):
55
  @functools.wraps(func)
56
  def wrapper(*args, **kwargs):
 
85
 
86
  app = Flask(__name__)
87
 
 
 
 
 
 
 
88
  backdrop_images_path = r"app\blocks\Backdrops"
89
  sprite_images_path = r"app\blocks\sprites"
90
  code_blocks_image_path = r"app\blocks\code_blocks"
 
100
  CODE_BLOCKS_DIR = BLOCKS_DIR / "code_blocks"
101
  # === new: outputs rooted under BASE_DIR ===
102
  OUTPUT_DIR = BASE_DIR / "outputs"
 
 
 
 
 
103
 
104
  # Global variables to hold the model and index, loaded only once.
105
  MODEL = None
 
115
  SPRITE_DIR,
116
  CODE_BLOCKS_DIR,
117
  OUTPUT_DIR,
 
 
 
118
  ):
119
  d.mkdir(parents=True, exist_ok=True)
120
 
 
168
  4. If you can't find the data, use "Unknown" for name_variable and "No pseudocode found" for pseudocode
169
  """
170
 
 
171
  # Main agent of the system agent for Scratch 3.0
172
  agent = create_react_agent(
173
  model=llm,
174
  tools=[], # No specific tools are defined here, but could be added later
175
  prompt=SYSTEM_PROMPT
176
  )
177
+
 
 
 
 
178
  agent_json_resolver = create_react_agent(
179
  model=llm,
180
  tools=[], # No specific tools are defined here, but could be added later
181
  prompt=SYSTEM_PROMPT_JSON_CORRECTOR
182
  )
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  # -----------------------
185
  # SERIALIZABLE HELPER
186
  # -----------------------
 
341
  from collections import defaultdict
342
  import math
343
 
344
+ def choose_top_candidates(embedding_results, phash_results, imgmatch_results, top_k=10, method_weights=(0.5, 0.3, 0.2), verbose=True):
 
345
  """
346
  embedding_results: list of (path, emb_sim) where emb_sim roughly in [-1,1] (we'll clamp to 0..1)
347
  phash_results: list of (path, hamming, ph_sim) where ph_sim in [0,1]
 
483
  try:
484
  p = os.path.normpath(os.path.abspath(path))
485
  b = os.path.normpath(os.path.abspath(base))
486
+ if os.name == "nt": p = p.lower(); b = b.lower()
 
487
  return os.path.commonpath([p, b]) == b
488
  except Exception:
489
  return False
 
501
  catalog = json.loads(text) # will raise JSONDecodeError if malformed
502
  logger.info(f"Successfully loaded block catalog from {catalog_path}")
503
  return catalog
 
504
  except FileNotFoundError:
505
  logger.error(f"Error: Block catalog file not found at {catalog_path}")
506
  except json.JSONDecodeError as e:
 
515
  Returns the block dict or None if not found.
516
  """
517
  for block in catalog_data["blocks"]:
518
+ if block.get("op_code") == opcode: return block
 
519
  return None
520
 
521
  # Helper function to find a block in all catalogs by opcode
 
526
  """
527
  for catalog in all_catalogs:
528
  blk = get_block_by_opcode(catalog, opcode)
529
+ if blk is not None: return blk
 
530
  return None
531
 
532
  def variable_intialization(project_data):
 
534
  Updates variable and broadcast definitions in a Scratch project JSON,
535
  populating the 'variables' and 'broadcasts' sections of the Stage target
536
  and extracting initial values for variables.
537
+ Args: project_data (dict): The loaded JSON data of the Scratch project.
538
+ Returns: dict: The updated project JSON data.
 
 
 
 
539
  """
540
 
541
  stage_target = None
542
  for target in project_data['targets']:
543
+ if target.get('isStage'):
544
  stage_target = target
545
  break
 
546
  if stage_target is None:
547
  print("Error: Stage target not found in the project data.")
548
  return project_data
 
549
  # Ensure 'variables' and 'broadcasts' exist in the Stage target
550
  if "variables" not in stage_target:
551
  stage_target["variables"] = {}
 
614
  """
615
  Removes duplicate variable entries in the 'variables' dictionary of the Stage target,
616
  prioritizing entries with non-empty values.
617
+ Args: project_data (dict): The loaded JSON data of the Scratch project.
618
+ Returns: dict: The updated project JSON data with deduplicated variables.
 
 
 
 
619
  """
 
620
  stage_target = None
621
  for target in project_data['targets']:
622
  if target.get('isStage'):
 
630
  if "variables" not in stage_target:
631
  return project_data # No variables to deduplicate
632
 
 
 
633
  resolved_variables = {}
634
 
635
  for var_id, var_info in stage_target["variables"].items():
636
  var_name = var_info[0]
637
  var_value = var_info[1]
638
 
639
+ if var_name not in resolved_variables: resolved_variables[var_name] = [var_id, var_name, var_value]
640
+ else: existing_id, existing_name, existing_value = resolved_variables[var_name]
641
+ if var_value != "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
642
+ elif var_value != "" and existing_value != "": resolved_variables[var_name] = [var_id, var_name, var_value]
643
+ elif var_value == "" and existing_value == "": resolved_variables[var_name] = [var_id, var_name, var_value]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
  # Reconstruct the 'variables' dictionary using the resolved entries
646
  new_variables_dict = {}
 
649
  var_name_to_keep = var_data[1]
650
  var_value_to_keep = var_data[2]
651
  new_variables_dict[var_id_to_keep] = [var_name_to_keep, var_value_to_keep]
 
652
  stage_target["variables"] = new_variables_dict
 
653
  return project_data
654
 
655
  def variable_adder_main(project_data):
 
666
  print(f"Error error in the variable initialization opcodes: {e}")
667
 
668
 
669
+ # # --- Global variable for the block catalog ---
670
+ # ALL_SCRATCH_BLOCKS_CATALOG = {}
671
+ # BLOCK_CATALOG_PATH = "blocks" # Define the path to your JSON file
672
+ # HAT_BLOCKS_PATH = "hat_blocks" # Path to the hat blocks JSON file
673
+ # STACK_BLOCKS_PATH = "stack_blocks" # Path to the stack blocks JSON file
674
+ # REPORTER_BLOCKS_PATH = "reporter_blocks" # Path to the reporter blocks JSON file
675
+ # BOOLEAN_BLOCKS_PATH = "boolean_blocks" # Path to the boolean blocks JSON file
676
+ # C_BLOCKS_PATH = "c_blocks" # Path to the C blocks JSON file
677
+ # CAP_BLOCKS_PATH = "cap_blocks" # Path to the cap blocks JSON file
678
+
679
+ # # Load the block catalogs from their respective JSON files
680
+ # hat_block_data = _load_block_catalog(HAT_BLOCKS_PATH)
681
+ # hat_description = hat_block_data["description"]
682
+ # #hat_description = hat_block_data.get("description", "No description available")
683
+ # # hat_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in hat_block_data["blocks"]])
684
+ # hat_opcodes_functionalities = "\n".join([
685
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
686
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
687
+ # for block in hat_block_data.get("blocks", [])
688
+ # ]) if isinstance(hat_block_data.get("blocks"), list) else " No blocks information available."
689
+ # #hat_opcodes_functionalities = os.path.join(BLOCKS_DIR, "hat_blocks.txt")
690
+ # print("Hat blocks loaded successfully.", hat_description)
691
+
692
+ # boolean_block_data = _load_block_catalog(BOOLEAN_BLOCKS_PATH)
693
+ # boolean_description = boolean_block_data["description"]
694
+ # # boolean_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in boolean_block_data["blocks"]])
695
+ # boolean_opcodes_functionalities = "\n".join([
696
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
697
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
698
+ # for block in boolean_block_data.get("blocks", [])
699
+ # ]) if isinstance(boolean_block_data.get("blocks"), list) else " No blocks information available."
700
+ # #boolean_opcodes_functionalities = os.path.join(BLOCKS_DIR, "boolean_blocks.txt")
701
+
702
+ # c_block_data = _load_block_catalog(C_BLOCKS_PATH)
703
+ # c_description = c_block_data["description"]
704
+ # # c_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in c_block_data["blocks"]])
705
+ # c_opcodes_functionalities = "\n".join([
706
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
707
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
708
+ # for block in c_block_data.get("blocks", [])
709
+ # ]) if isinstance(c_block_data.get("blocks"), list) else " No blocks information available."
710
+ # #c_opcodes_functionalities = os.path.join(BLOCKS_DIR, "c_blocks.txt")
711
+
712
+ # cap_block_data = _load_block_catalog(CAP_BLOCKS_PATH)
713
+ # cap_description = cap_block_data["description"]
714
+ # # cap_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in cap_block_data["blocks"]])
715
+ # cap_opcodes_functionalities = "\n".join([
716
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
717
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
718
+ # for block in cap_block_data.get("blocks", [])
719
+ # ]) if isinstance(cap_block_data.get("blocks"), list) else " No blocks information available."
720
+ # #cap_opcodes_functionalities = os.path.join(BLOCKS_DIR, "cap_blocks.txt")
721
+
722
+ # reporter_block_data = _load_block_catalog(REPORTER_BLOCKS_PATH)
723
+ # reporter_description = reporter_block_data["description"]
724
+ # # reporter_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in reporter_block_data["blocks"]])
725
+ # reporter_opcodes_functionalities = "\n".join([
726
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
727
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
728
+ # for block in reporter_block_data.get("blocks", [])
729
+ # ]) if isinstance(reporter_block_data.get("blocks"), list) else " No blocks information available."
730
+ # #reporter_opcodes_functionalities = os.path.join(BLOCKS_DIR, "reporter_blocks.txt")
731
+
732
+ # stack_block_data = _load_block_catalog(STACK_BLOCKS_PATH)
733
+ # stack_description = stack_block_data["description"]
734
+ # # stack_opcodes_functionalities = "\n".join([f" - Opcode: {block['op_code']}, functionality: {block['functionality']} example: standalone use: {block['example_standalone']}" for block in stack_block_data["blocks"]])
735
+ # stack_opcodes_functionalities = "\n".join([
736
+ # # f" - Opcode: {block.get('op_code', 'N/A')}, functionality: {block.get('functionality', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
737
+ # f" - Opcode: {block.get('op_code', 'N/A')}, example: standalone use {block.get('example_standalone', 'N/A')}"
738
+ # for block in stack_block_data.get("blocks", [])
739
+ # ]) if isinstance(stack_block_data.get("blocks"), list) else " No blocks information available."
740
+ # #stack_opcodes_functionalities = os.path.join(BLOCKS_DIR, "stack_blocks.txt")
741
+
742
+ # # This makes ALL_SCRATCH_BLOCKS_CATALOG available globally
743
+ # ALL_SCRATCH_BLOCKS_CATALOG = _load_block_catalog(BLOCK_CATALOG_PATH)
744
 
745
  def extract_json_from_llm_response(raw_response: str) -> dict:
746
  """
 
1135
  print(f"result:\n\n {result}")
1136
 
1137
  except json.JSONDecodeError as error_json:
 
 
 
 
 
 
 
 
 
 
 
 
 
1138
  correction_prompt = f"""
1139
  Fix this malformed response and return only the corrected JSON:
1140
 
 
1151
  try:
1152
  correction_response = agent_json_resolver.invoke({"messages": [{"role": "user", "content": correction_prompt}]})
1153
  corrected_output = extract_json_from_llm_response(correction_response['messages'][-1].content)
 
1154
  result = corrected_output
1155
  print(f"result:\n\n {result}")
1156
  except Exception as e_corr:
 
1161
  state["pseudo_code"] = result
1162
  state["temp_pseudo_code"] += [result]
1163
  Data = state["temp_pseudo_code"]
 
 
1164
  print(f"[OVREALL REFINED PSEUDO CODE LOGIC]: {result}")
1165
  print(f"[OVREALL LISTS OF LOGICS]: {Data}")
1166
  logger.info("Plan refinement and block relation analysis completed for all plans.")
 
1295
  logger.info(f"Action blocks added for sprite '{sprite_name}' by OverallBlockBuilderNode.")
1296
  except Exception as e:
1297
  logger.error(f"Error generating blocks for sprite '{sprite_name}': {e}")
1298
+ state["project_json"] = project_json
 
 
 
 
 
 
 
1299
  return state
1300
 
1301
  # Node 6: variable adder node
 
1377
 
1378
  try:
1379
  elements = partition_pdf(
1380
+ file=pdf_stream,
 
1381
  strategy="hi_res",
1382
  extract_image_block_types=["Image"],
1383
  hi_res_model_name="yolox",
1384
  extract_image_block_to_payload=True,
 
 
 
1385
  )
1386
  print(f"ELEMENTS")
1387
  except Exception as e:
 
1398
  continue
1399
 
1400
  manipulated_json[f"Sprite {sprite_count}"] = {
 
 
1401
  "base64": el["metadata"]["image_base64"],
1402
  "file-path": pdf_id,
 
1403
  }
1404
  sprite_count += 1
1405
  return manipulated_json
1406
  except Exception as e:
1407
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
  ''' It appends all the list and paths from json files and pick the best match's path'''
 
1410
  def similarity_matching(sprites_data: dict, project_folder: str, top_k: int = 1, min_similarity: float = None) -> str:
1411
  print("🔍 Running similarity matching…")
1412
  os.makedirs(project_folder, exist_ok=True)
1413
 
 
 
 
1414
  backdrop_base_path = os.path.normpath(str(BACKDROP_DIR))
1415
  sprite_base_path = os.path.normpath(str(SPRITE_DIR))
1416
  code_blocks_path = os.path.normpath(str(CODE_BLOCKS_DIR))