File size: 5,411 Bytes
547c16c eee2032 547c16c eee2032 547c16c b4cb782 547c16c b4cb782 547c16c f0b450b 5af0d0a f0b450b 5af0d0a f0b450b b91c702 f0b450b 01800d9 b91c702 f0b450b 0241c1e 547c16c b4cb782 547c16c 07fddd1 461bfcc 547c16c 00c1960 b4cb782 9ed859c b4cb782 547c16c b4cb782 547c16c b4cb782 547c16c 55a8bcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import time
import base64
import io
from PIL import Image
from bs4 import BeautifulSoup
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
def resize_and_encode_image(image_file, max_long_side=1024):
img = Image.open(image_file)
width, height = img.size
if max(width, height) > max_long_side:
if width > height:
new_width = max_long_side
new_height = int(max_long_side * (height / width))
else:
new_height = max_long_side
new_width = int(max_long_side * (width / height))
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
buffered = io.BytesIO()
img.save(buffered, format="PNG")
image_bytes = buffered.getvalue()
base64_str = base64.b64encode(image_bytes).decode("utf-8")
return f"data:image/png;base64,{base64_str}"
def beautify_html(html_code):
soup = BeautifulSoup(html_code, "html.parser")
return soup.prettify()
def generate_html_css_from_image(image_file):
image_data_url = resize_and_encode_image(image_file)
prompt_text = """
You are an expert front-end developer.
The input is a screenshot of a website UI. Carefully analyze its layout and generate accurate, semantic, and maintainable HTML and CSS.
Follow these professional guidelines:
1) Structure & Semantics:
- Use HTML5 semantic tags that match the visual hierarchy (e.g., <header>, <nav>, <main>, <section>, <article>, <aside>, <footer>)
- Use HTML5 non-semantic tags that match the visual hierarchy (e.g., <div>, <span>)
- Reflect layout grouping using appropriate containers and divs where needed
2) Layout & Responsiveness:
- Use Flexbox or CSS Grid for layout
- Include responsive breakpoints (laptop and desktop-first) with at least one media query
- Ensure layout adapts well to **laptop and desktop screen sizes (min-width: 1024px)** using responsive design techniques like media queries
3) CSS Practices:
- Keep CSS in a <style> block or separate file (no inline styles)
- Use class names that follow a clean naming convention (e.g., BEM or descriptive naming)
- Group CSS rules logically (layout, typography, components)
- Use `max-width` + `white-space` + `line-height` for text containers to ensure headings/subheadings **wrap exactly** as in screenshot (e.g., 1 line or 2 lines).
4) Accessibility & UX:
- Add accessible markup: alt text, ARIA roles, labels
- Ensure good contrast and keyboard navigability
5) Content & Comments:
- Use meaningful placeholder text (not lorem ipsum)
- Add short code comments to explain each major section
- Accurately match the **font-size, font-weight, text-size and line-height** of all visible text, especially major headings and hero titles.
- Use **CSS font-size in rem or px** to replicate the exact visual size of headings and subheadings from the screenshot.
- Ensure text blocks wrap correctly as in the screenshot (e.g., long headings on one line, subheadings on two lines if seen). Use **max-width, white-space, and line-height** to control wrapping.
6) Output:
- The output should be a complete single HTML file with embedded CSS
- Preserve the visual structure and content flow of the original screenshot as closely as possible
- Do not skip or summarize any sections
Assume this is for real production-ready front-end code generation from a web UI screenshot.
"""
prompt = [
HumanMessage(
content=[
{"type": "text", "text": prompt_text},
{"type": "image_url", "image_url": {"url": image_data_url, "mime_type": "image/png"}}
]
)
]
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
max_retries = 5
generated_code = None
for attempt in range(max_retries):
try:
response = llm.invoke(prompt)
generated_code = response.content
if generated_code.strip().startswith("```html"):
generated_code = (
generated_code.strip()
.removeprefix("```html")
.removesuffix("```")
.strip()
)
elif generated_code.strip().startswith("<!DOCTYPE html>"):
pass
else:
start_index = generated_code.find("<!DOCTYPE html>")
if start_index != -1:
generated_code = generated_code[start_index:].strip()
if "<html" in generated_code.lower() and "<body" in generated_code.lower():
break
else:
print(f"Attempt {attempt+1}: Generated code missing HTML/BODY tags. Retrying...")
time.sleep(5)
except Exception as e:
if "ResourceExhausted" in str(e) or "429" in str(e) or "500" in str(e):
print(f"Attempt {attempt+1}: Rate limit or server error. Retrying in {30 * (attempt + 1)} seconds...")
time.sleep(30 * (attempt + 1))
else:
print(f"Attempt {attempt+1}: Unexpected error: {e}. Retrying...")
time.sleep(5)
if generated_code:
final_output = beautify_html(generated_code)
if not final_output.strip().startswith("<!DOCTYPE html>"):
final_output = "<!DOCTYPE html>\n" + final_output
return final_output
else:
return None |