theekshanamadumal
commited on
Commit
•
984bb90
1
Parent(s):
8c1f8f7
title view
Browse files- app.py +11 -5
- extract.py +6 -3
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
from extract import
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
@@ -20,12 +20,18 @@ def visualize(url):
|
|
20 |
# Fetch and display the website content
|
21 |
with st.spinner("loading website data ..."):
|
22 |
# innerHTML = get_innerHTML(url)
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
st.subheader("Website preview:")
|
25 |
-
if
|
26 |
-
st.image(
|
27 |
else:
|
28 |
-
st.error("Error: empty html")
|
|
|
|
|
29 |
|
30 |
except Exception as e:
|
31 |
st.error(f"Error: {e}")
|
|
|
1 |
import streamlit as st
|
2 |
+
from extract import take_webdata
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
|
|
20 |
# Fetch and display the website content
|
21 |
with st.spinner("loading website data ..."):
|
22 |
# innerHTML = get_innerHTML(url)
|
23 |
+
html_image, html_content = take_webdata(url)
|
24 |
+
if html_content:
|
25 |
+
st.info(html_content)
|
26 |
+
else:
|
27 |
+
st.error("Error: empty html content")
|
28 |
st.subheader("Website preview:")
|
29 |
+
if html_image:
|
30 |
+
st.image(html_image)
|
31 |
else:
|
32 |
+
st.error("Error: empty html preview")
|
33 |
+
st.subheader("Website title:")
|
34 |
+
|
35 |
|
36 |
except Exception as e:
|
37 |
st.error(f"Error: {e}")
|
extract.py
CHANGED
@@ -3,7 +3,7 @@ from selenium.common.exceptions import WebDriverException
|
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
6 |
-
def
|
7 |
options = webdriver.ChromeOptions()
|
8 |
options.add_argument('--headless')
|
9 |
options.add_argument('--no-sandbox')
|
@@ -14,11 +14,14 @@ def take_screenshot(url):
|
|
14 |
wd.set_window_size(1080, 720) # Adjust the window size here
|
15 |
wd.get(url)
|
16 |
wd.implicitly_wait(5)
|
|
|
|
|
17 |
screenshot = wd.get_screenshot_as_png()
|
|
|
18 |
except WebDriverException as e:
|
19 |
-
return Image.new('RGB', (1, 1))
|
20 |
finally:
|
21 |
if wd:
|
22 |
wd.quit()
|
23 |
|
24 |
-
return Image.open(BytesIO(screenshot))
|
|
|
3 |
from PIL import Image
|
4 |
from io import BytesIO
|
5 |
|
6 |
+
def take_webdata(url):
|
7 |
options = webdriver.ChromeOptions()
|
8 |
options.add_argument('--headless')
|
9 |
options.add_argument('--no-sandbox')
|
|
|
14 |
wd.set_window_size(1080, 720) # Adjust the window size here
|
15 |
wd.get(url)
|
16 |
wd.implicitly_wait(5)
|
17 |
+
# Get the page title
|
18 |
+
page_title = wd.title
|
19 |
screenshot = wd.get_screenshot_as_png()
|
20 |
+
|
21 |
except WebDriverException as e:
|
22 |
+
return Image.new('RGB', (1, 1)), page_title
|
23 |
finally:
|
24 |
if wd:
|
25 |
wd.quit()
|
26 |
|
27 |
+
return Image.open(BytesIO(screenshot)) , page_title
|