File size: 6,874 Bytes
092b806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import gradio as gr
import os
import torch
import cadquery as cq
from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
from PIL import Image
import ast  # For safe evaluation of string-formatted lists
from io import BytesIO

# --- CONFIGURATION (Keep as constants) ---
MODEL_PATH = "/raid/home/posahemanth/miniconda3/Sai/FinalYearProject/1000_gpusoutput"
OUTPUT_DIRECTORY = "/raid/home/posahemanth/miniconda3/Sai/FinalYearProject/Gradio_Output"  # Separate output
USE_FLASH_ATTENTION = True
PRE_TRAINED_MODEL_NAME = "microsoft/Phi-4-multimodal-instruct"
os.makedirs(OUTPUT_DIRECTORY, exist_ok=True)  # Ensure the output directory exists


# --- MODEL LOADING (Global Scope) ---
# Load only once, outside the functions, to improve performance
try:
    config = AutoConfig.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
    config.attn_implementation = "flash_attention_2" if USE_FLASH_ATTENTION else "sdpa"
    config.num_logits_to_keep = 20

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_PATH,
        config=config,
        trust_remote_code=True,
        torch_dtype=torch.bfloat16 if USE_FLASH_ATTENTION else torch.float32,
        local_files_only=True
    ).to("cuda").eval()  # .eval() is crucial for inference

    processor = AutoProcessor.from_pretrained(
        PRE_TRAINED_MODEL_NAME,
        trust_remote_code=True,
        local_files_only=False,
        config=config,
    )
except Exception as e:
    print(f"Error loading model/processor: {e}")
    raise  # Re-raise to halt execution


# --- CAPTION GENERATION ---
def generate_caption(image):
    """Generates a caption for the given image."""
    if image is None:
        return "Please upload an image."

    try:
        # Convert numpy array to PIL Image
        image = Image.fromarray(image).convert("RGB")
    except Exception as e:
        print(f"Error converting image: {e}")
        return "Error processing image."
    
    prompt = "Describe this image."
    user_message = {'role': 'user', 'content': f'<|image_1|>{prompt}'}
    prompt_tokenized = processor.tokenizer.apply_chat_template([user_message], tokenize=False, add_generation_prompt=True)
    inputs = processor(prompt_tokenized, images=[image], return_tensors='pt').to("cuda")

    try:
        with torch.no_grad(): # Ensure no gradients are calculated
            generated_ids = model.generate(
                **inputs,
                eos_token_id=processor.tokenizer.eos_token_id,
                max_new_tokens=512,
                num_logits_to_keep=20,
            )

        input_len = inputs.input_ids.size(1)
        generated_text = processor.decode(
            generated_ids[0, input_len:],
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False,
        ).strip()
    except Exception as e:
        print(f"Error during generation: {e}")
        return "Error during caption generation."

    return generated_text

# --- CAD MODEL BUILDING ---
def build_model(sequence):
    """Builds a CAD model from the sequence and returns the STEP file path."""
    workplane = cq.Workplane("XY")
    model = None
    primitive = None

    if isinstance(sequence, str):
        try:
            sequence = ast.literal_eval(sequence)
        except (ValueError, SyntaxError):
            return "Invalid sequence format.  Could not convert to list."
        if not isinstance(sequence, list):
            return "Invalid sequence format.  Expected a list."
    elif not isinstance(sequence, list):
        return "Invalid sequence format.  Expected a list."


    for step in sequence:
        index = step[0]
        if index == 0:  # Cube
            _, length, width, height, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.box(length, width, height).translate((loc_x, loc_y, loc_z))
        elif index == 1:  # Cylinder
            _, height, radius, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.cylinder(radius, height).translate((loc_x, loc_y, loc_z))
        elif index == 2:  # Sphere
            _, radius, loc_x, loc_y, loc_z, axis = step
            primitive = workplane.sphere(radius).translate((loc_x, loc_y, loc_z))

        if primitive is None:
            print(f"Skipping step {step} because primitive was not initialized.")
            continue

        if index in [3, 4, 5]:  # Operations
            if model is None:
                model = primitive
            _, loc_x, loc_y, loc_z = step
            if index == 3:
                model = model.union(primitive.translate((loc_x, loc_y, loc_z)))
            elif index == 4:
                model = model.cut(primitive.translate((loc_x, loc_y, loc_z)))
            elif index == 5:
                model = model.intersect(primitive.translate((loc_x, loc_y, loc_z)))

        if model is None:
            model = primitive

    if model is None:
        return "Error: No valid CAD model was created."

    # Create a unique filename using a timestamp (more robust)
    import datetime
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    model_name = f"generated_model_{timestamp}"
    step_file_path = os.path.join(OUTPUT_DIRECTORY, f"{model_name}.step")
    cq.exporters.export(model, step_file_path)
    return step_file_path




def process_image(image):
    """Combines caption generation and model building."""
    if image is None:
        return "Please upload an image first.", None

    caption = generate_caption(image)
    if not caption or caption.startswith("Error"):
        return caption, None

    step_file_path = build_model(caption)
    if step_file_path.startswith("Error"):
        return step_file_path, None

    return "CAD model generated successfully!", step_file_path



# --- GRADIO INTERFACE ---

css = """
.container {
    max-width: 800px;
    margin: auto;
    padding: 20px;
    border: 2px solid #ddd;
    border-radius: 10px;
}
h1 {
    text-align: center;
    color: #333;
}
.description {
    text-align: center;
    margin-bottom: 20px;
}
.input-section, .output-section {
    margin-bottom: 20px;
    padding: 10px;
    border: 1px solid #ccc;
    border-radius: 5px;
}
.input-section h2, .output-section h2 {
    margin-top: 0;
    color: #555;
}
.output-section p {
    font-weight: bold;
}

"""

iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(label="Upload Image", type="numpy"),
    outputs=[
        gr.Textbox(label="Status"),  # Show status messages
        gr.File(label="Download STEP File")  # Download link for the file
    ],
    title="Image to CAD Converter",
    description="Upload an image of a mechanical drawing, and this app will attempt to generate a corresponding STEP CAD file.",
    css=css, # Apply the CSS
    allow_flagging="never",  # Disable flagging
    theme=gr.themes.Soft()

)

iface.launch(share=True)