File size: 5,411 Bytes
547c16c
 
 
 
 
 
 
 
eee2032
547c16c
eee2032
 
 
 
 
 
 
 
 
 
 
547c16c
b4cb782
547c16c
 
b4cb782
547c16c
 
 
 
 
 
 
 
 
f0b450b
 
 
 
 
 
 
 
5af0d0a
f0b450b
 
 
 
5af0d0a
 
f0b450b
 
 
 
 
b91c702
f0b450b
 
 
 
 
 
 
 
01800d9
b91c702
 
f0b450b
 
 
 
 
 
 
 
0241c1e
547c16c
 
 
 
b4cb782
547c16c
 
 
 
07fddd1
461bfcc
547c16c
 
 
 
 
 
00c1960
 
 
 
 
 
 
 
 
b4cb782
 
 
 
 
 
 
 
 
 
 
 
 
9ed859c
b4cb782
 
547c16c
 
b4cb782
 
547c16c
 
b4cb782
 
 
 
547c16c
 
 
55a8bcb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import time
import base64
import io
from PIL import Image
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

def resize_and_encode_image(image_file, max_long_side=1024): 
    img = Image.open(image_file)

    width, height = img.size
    if max(width, height) > max_long_side:
        if width > height:
            new_width = max_long_side
            new_height = int(max_long_side * (height / width))
        else:
            new_height = max_long_side
            new_width = int(max_long_side * (width / height))
        img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

    buffered = io.BytesIO()
    img.save(buffered, format="PNG")
    image_bytes = buffered.getvalue()
    base64_str = base64.b64encode(image_bytes).decode("utf-8")
    return f"data:image/png;base64,{base64_str}"

def beautify_html(html_code):
    soup = BeautifulSoup(html_code, "html.parser")
    return soup.prettify()

def generate_html_css_from_image(image_file):
    image_data_url = resize_and_encode_image(image_file)

    prompt_text = """
You are an expert front-end developer.

The input is a screenshot of a website UI. Carefully analyze its layout and generate accurate, semantic, and maintainable HTML and CSS.

Follow these professional guidelines:

1) Structure & Semantics:
- Use HTML5 semantic tags that match the visual hierarchy (e.g., <header>, <nav>, <main>, <section>, <article>, <aside>, <footer>)
- Use HTML5 non-semantic tags that match the visual hierarchy (e.g., <div>, <span>)
- Reflect layout grouping using appropriate containers and divs where needed

2) Layout & Responsiveness:
- Use Flexbox or CSS Grid for layout
- Include responsive breakpoints (laptop and desktop-first) with at least one media query
- Ensure layout adapts well to **laptop and desktop screen sizes (min-width: 1024px)** using responsive design techniques like media queries

3) CSS Practices:
- Keep CSS in a <style> block or separate file (no inline styles)
- Use class names that follow a clean naming convention (e.g., BEM or descriptive naming)
- Group CSS rules logically (layout, typography, components)
- Use `max-width` + `white-space` + `line-height` for text containers to ensure headings/subheadings **wrap exactly** as in screenshot (e.g., 1 line or 2 lines).

4) Accessibility & UX:
- Add accessible markup: alt text, ARIA roles, labels
- Ensure good contrast and keyboard navigability

5) Content & Comments:
- Use meaningful placeholder text (not lorem ipsum)
- Add short code comments to explain each major section
- Accurately match the **font-size, font-weight, text-size and line-height** of all visible text, especially major headings and hero titles.
- Use **CSS font-size in rem or px** to replicate the exact visual size of headings and subheadings from the screenshot.
- Ensure text blocks wrap correctly as in the screenshot (e.g., long headings on one line, subheadings on two lines if seen). Use **max-width, white-space, and line-height** to control wrapping.

6) Output:
- The output should be a complete single HTML file with embedded CSS
- Preserve the visual structure and content flow of the original screenshot as closely as possible
- Do not skip or summarize any sections

Assume this is for real production-ready front-end code generation from a web UI screenshot.
"""

    prompt = [
        HumanMessage(
            content=[
                {"type": "text", "text": prompt_text},
                {"type": "image_url", "image_url": {"url": image_data_url, "mime_type": "image/png"}}
            ]
        )
    ]

    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
    max_retries = 5
    generated_code = None

    for attempt in range(max_retries):
        try:
            response = llm.invoke(prompt)
            generated_code = response.content

            if generated_code.strip().startswith("```html"):
                generated_code = (
                    generated_code.strip()
                    .removeprefix("```html")
                    .removesuffix("```")
                    .strip()
                )

            elif generated_code.strip().startswith("<!DOCTYPE html>"):
                pass 
            else:
                start_index = generated_code.find("<!DOCTYPE html>")
                if start_index != -1:
                    generated_code = generated_code[start_index:].strip()

            if "<html" in generated_code.lower() and "<body" in generated_code.lower():
                break 
            else:
                print(f"Attempt {attempt+1}: Generated code missing HTML/BODY tags. Retrying...")
                time.sleep(5) 

        except Exception as e:
            if "ResourceExhausted" in str(e) or "429" in str(e) or "500" in str(e): 
                print(f"Attempt {attempt+1}: Rate limit or server error. Retrying in {30 * (attempt + 1)} seconds...")
                time.sleep(30 * (attempt + 1))
            else:
                print(f"Attempt {attempt+1}: Unexpected error: {e}. Retrying...")
                time.sleep(5) 

    if generated_code:
        final_output = beautify_html(generated_code)

        if not final_output.strip().startswith("<!DOCTYPE html>"):
            final_output = "<!DOCTYPE html>\n" + final_output

        return final_output
    else:
        return None