Spaces:
No application file
No application file
# import asyncio | |
from pyppeteer import launch | |
from pyppeteer.errors import ElementHandleError | |
import gradio as gr | |
async def highlight_element(page, selector): | |
try: | |
element = await page.querySelector(selector) | |
if element: | |
await page.evaluate('element => element.style.backgroundColor = "yellow"', element) | |
styles = await page.evaluate('(element) => { const computedStyles = window.getComputedStyle(element); return Array.from(computedStyles).map(prop => `${prop}: ${computedStyles.getPropertyValue(prop)}`); }', element) | |
return styles | |
except ElementHandleError: | |
pass | |
return None | |
async def getStyles(page, selector, text_file): | |
try: | |
await page.waitForSelector(selector) | |
styles = await page.evaluate('''(selector) => { | |
const contentTypeIndex = {}; | |
const childs = {}; | |
let finalString = ""; | |
function elementIndentifier(element) { | |
const tagName = element.tagName.toLowerCase(); | |
// Check for different element types | |
if (tagName === 'p' || tagName[0] === 'h' || tagName === 'span' || tagName === 'div' || tagName === 'ul' || tagName === 'ol') { | |
// Text-based elements | |
return `Text Content = "${element.textContent}"`; | |
} else if (tagName === 'img' || tagName === 'audio' || tagName === 'video') { | |
// Image, audio or video elements | |
return `src = "${element.src}"`; | |
} else if (tagName === 'a' || tagName === 'link') { | |
// Link elements | |
return `href = "${element.href}"`; | |
} else if (tagName === 'input' || tagName === 'textarea' || tagName === 'select') { | |
// Form elements | |
return `value = "${element.value}"`; | |
} else if (tagName === 'table') { | |
// Table-related elements | |
// Example to retrieve cell content | |
const cellContent = Array.from(element.querySelectorAll('td, th')).map(cell => cell.innerHTML); | |
return `rows = "${element.rows}", "cellContent = "${cellContent.join(', ')}"`; | |
} else if (tagName === 'input' && (element.type === 'checkbox' || element.type === 'radio')) { | |
// Checkbox and Radio Button elements | |
return `checked = "${element.checked}"`; | |
} | |
} | |
function contentType(element) { | |
const tagName = element.tagName.toLowerCase(); | |
// Check for different element types | |
if (tagName === 'p' || tagName[0] === 'h' || tagName === 'span' || tagName === 'div' || tagName === 'ul' || tagName === 'ol') { | |
return "text"; | |
} else if (tagName === 'img') { | |
return "image"; | |
} else if (tagName === 'audio') { | |
return "audio"; | |
} else if (tagName === 'video') { | |
return "video"; | |
} else if (tagName === 'a' || tagName === 'link') { | |
return "link"; | |
} else if (tagName === 'input' || tagName === 'textarea' || tagName === 'select') { | |
return "form"; | |
} else if (tagName === 'table') { | |
return "table"; | |
} else if (tagName === 'input' && (element.type === 'checkbox' || element.type === 'radio')) { | |
// Checkbox and Radio Button elements | |
return "button"; | |
} | |
} | |
function nameElems(element) { | |
for (let i = element.children.length - 1; i >= 0; i--) { | |
nameElems(element.children[i]); | |
} | |
if(element.children.length !== 1 && element.tagName.toLowerCase() !== "link"){ | |
const s1 = element.children.length ? "container" : contentType(element); | |
contentTypeIndex[s1] ? contentTypeIndex[s1]++ : contentTypeIndex[s1] = 1; | |
const elemName = `${s1}${contentTypeIndex[s1]}`; | |
element.name = elemName; | |
} | |
if(element.children.length === 1){ | |
element.name = element.children[0].name; | |
} | |
} | |
function postorderTraversal(element) { | |
// Recursively traverse child nodes in postorder | |
for (let i = element.children.length - 1; i >= 0; i--) { | |
postorderTraversal(element.children[i]); | |
} | |
// Process the current element | |
const s1 = element.children.length ? "container" : contentType(element); | |
if (element.children.length > 1) { | |
for (let i = element.children.length - 1; i >= 0; i--) { | |
childs[element.name] ? childs[element.name].push(element.children[i].name) : childs[element.name] = [element.children[i].name]; | |
} | |
finalString = finalString + `${childs[element.name]} are nested inside a container. Lets name this container as ${element.name}. The computed styles of ${element.name} are:-\n`; | |
} else if (!element.children.length && element.tagName.toLowerCase() !== "link") { | |
finalString += `Lets name ${s1} with ${elementIndentifier(element)} as ${element.name}. The computed styles of ${element.name} are:-\n`; | |
} | |
if(element.children.length !== 1 && element.tagName.toLowerCase() !== "link"){ | |
const computedStyles = getComputedStyle(element); | |
desired_properties = [ | |
"background-color", "box-sizing", "clear", "color", "display", | |
"flex-direction", "float", "font-family", "font-size", "font-weight", | |
"height", "line-height", "margin-bottom", "margin-left", "margin-right", | |
"margin-top", "padding-bottom", "padding-left", "padding-right", | |
"padding-top", "text-align", "width" | |
]; | |
finalString += `\"\"` | |
for (let i = 0; i < desired_properties.length; i++) { | |
const prop = desired_properties[i]; | |
finalString += `\t${prop}: ${computedStyles.getPropertyValue(prop)}`; | |
if(prop !== "width"){ | |
finalString += `\n`; | |
} | |
} | |
finalString += `\"\"\n\n` | |
} | |
} | |
const rootElement = document.querySelector(selector); | |
nameElems(rootElement); | |
postorderTraversal(rootElement); | |
return finalString; | |
}''', selector) | |
with open(text_file, 'w') as f: | |
f.write(styles) | |
except ElementHandleError: | |
pass | |
return None | |
async def highlightAndStyles(url, selector, franklinHTML): | |
browser = await launch(handleSIGINT=False, handleSIGTERM=False, handleSIGHUP=False) | |
page = await browser.newPage() | |
await page.goto(url) | |
text_file = "styles.txt" | |
image_file = "screenshot.png" | |
await getStyles(page, selector, text_file) | |
await highlight_element(page, selector) | |
await page.setViewport({"width": 3072, "height": 1920}) | |
await page.screenshot({'path': image_file, 'fullPage': True}) | |
await browser.close() | |
return image_file, text_file | |
# def run_script(url, selector): | |
# return await highlightAndStyles(url, selector) | |
# print("Screenshot and styles saved successfully!") | |
iface = gr.Interface(fn=highlightAndStyles, inputs=["text", "text", "text"], outputs=["image", "file"]) | |
iface.launch() | |
# default signal handlers caused the program to only run on main thread of main interpreter. KEyboard INterrupt | |
# caused program to go out of main thread. Switching off those default handlers made it work. |