Spaces:

SaiChamakura
/

test

Runtime error

File size: 6,874 Bytes

092b806

import gradio as gr
import os
import torch
import cadquery as cq
from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
from PIL import Image
import ast  # For safe evaluation of string-formatted lists
from io import BytesIO

# --- CONFIGURATION (Keep as constants) ---
MODEL_PATH = "/raid/home/posahemanth/miniconda3/Sai/FinalYearProject/1000_gpusoutput"
OUTPUT_DIRECTORY = "/raid/home/posahemanth/miniconda3/Sai/FinalYearProject/Gradio_Output"  # Separate output
USE_FLASH_ATTENTION = True
PRE_TRAINED_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
os.makedirs(OUTPUT_DIRECTORY, exist_ok=True)  # Ensure the output directory exists


# --- MODEL LOADING (Global Scope) ---
# Load only once, outside the functions, to improve performance
try:
    config = AutoConfig.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
    config.attn_implementation = "flash_attention_2" if USE_FLASH_ATTENTION else "sdpa"
    config.num_logits_to_keep = 20

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        config=config,
        trust_remote_code=True,
        torch_dtype=torch.bfloat16 if USE_FLASH_ATTENTION else torch.float32,
        local_files_only=True
    ).to("cuda").eval()  # .eval() is crucial for inference

    processor = AutoProcessor.from_pretrained(
        PRE_TRAINED_MODEL_NAME,
        trust_remote_code=True,
        local_files_only=False,
        config=config,
    )
except Exception as e:
    print(f"Error loading model/processor: {e}")
    raise  # Re-raise to halt execution


# --- CAPTION GENERATION ---
def generate_caption(image):
    """Generates a caption for the given image."""
    if image is None:
        return "Please upload an image."

    try:
        # Convert numpy array to PIL Image
        image = Image.fromarray(image).convert("RGB")
    except Exception as e:
        print(f"Error converting image: {e}")
        return "Error processing image."
    
    prompt = "Describe this image."
    user_message = {'role': 'user', 'content': f'<|image_1|>{prompt}'}
    prompt_tokenized = processor.tokenizer.apply_chat_template([user_message], tokenize=False, add_generation_prompt=True)
    inputs = processor(prompt_tokenized, images=[image], return_tensors='pt').to("cuda")

    try:
        with torch.no_grad(): # Ensure no gradients are calculated
            generated_ids = model.generate(
                **inputs,
                eos_token_id=processor.tokenizer.eos_token_id,
                max_new_tokens=512,
                num_logits_to_keep=20,
            )

        input_len = inputs.input_ids.size(1)
        generated_text = processor.decode(
            generated_ids[0, input_len:],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        ).strip()
    except Exception as e:
        print(f"Error during generation: {e}")
        return "Error during caption generation."

    return generated_text

# --- CAD MODEL BUILDING ---
def build_model(sequence):
    """Builds a CAD model from the sequence and returns the STEP file path."""
    workplane = cq.Workplane("XY")
    model = None
    primitive = None

    if isinstance(sequence, str):
        try:
            sequence = ast.literal_eval(sequence)
        except (ValueError, SyntaxError):
            return "Invalid sequence format.  Could not convert to list."
        if not isinstance(sequence, list):
            return "Invalid sequence format.  Expected a list."
    elif not isinstance(sequence, list):
        return "Invalid sequence format.  Expected a list."


    for step in sequence:
        index = step[0]
        if index == 0:  # Cube
            _, length, width, height, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.box(length, width, height).translate((loc_x, loc_y, loc_z))
        elif index == 1:  # Cylinder
            _, height, radius, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.cylinder(radius, height).translate((loc_x, loc_y, loc_z))
        elif index == 2:  # Sphere
            _, radius, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.sphere(radius).translate((loc_x, loc_y, loc_z))

        if primitive is None:
            print(f"Skipping step {step} because primitive was not initialized.")
            continue

        if index in [3, 4, 5]:  # Operations
            if model is None:
                model = primitive
            _, loc_x, loc_y, loc_z = step
            if index == 3:
                model = model.union(primitive.translate((loc_x, loc_y, loc_z)))
            elif index == 4:
                model = model.cut(primitive.translate((loc_x, loc_y, loc_z)))
            elif index == 5:
                model = model.intersect(primitive.translate((loc_x, loc_y, loc_z)))

        if model is None:
            model = primitive

    if model is None:
        return "Error: No valid CAD model was created."

    # Create a unique filename using a timestamp (more robust)
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    model_name = f"generated_model_{timestamp}"
    step_file_path = os.path.join(OUTPUT_DIRECTORY, f"{model_name}.step")
    cq.exporters.export(model, step_file_path)
    return step_file_path




def process_image(image):
    """Combines caption generation and model building."""
    if image is None:
        return "Please upload an image first.", None

    caption = generate_caption(image)
    if not caption or caption.startswith("Error"):
        return caption, None

    step_file_path = build_model(caption)
    if step_file_path.startswith("Error"):
        return step_file_path, None

    return "CAD model generated successfully!", step_file_path



# --- GRADIO INTERFACE ---

css = """
.container {
    max-width: 800px;
    margin: auto;
    padding: 20px;
    border: 2px solid #ddd;
    border-radius: 10px;
}
h1 {
    text-align: center;
    color: #333;
}
.description {
    text-align: center;
    margin-bottom: 20px;
}
.input-section, .output-section {
    margin-bottom: 20px;
    padding: 10px;
    border: 1px solid #ccc;
    border-radius: 5px;
}
.input-section h2, .output-section h2 {
    margin-top: 0;
    color: #555;
}
.output-section p {
    font-weight: bold;
}

"""

iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(label="Upload Image", type="numpy"),
    outputs=[
        gr.Textbox(label="Status"),  # Show status messages
        gr.File(label="Download STEP File")  # Download link for the file
    ],
    title="Image to CAD Converter",
    description="Upload an image of a mechanical drawing, and this app will attempt to generate a corresponding STEP CAD file.",
    css=css, # Apply the CSS
    allow_flagging="never",  # Disable flagging
    theme=gr.themes.Soft()

)

iface.launch(share=True)