theekshanamadumal commited on
Commit
984bb90
1 Parent(s): 8c1f8f7

title view

Browse files
Files changed (2) hide show
  1. app.py +11 -5
  2. extract.py +6 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from extract import take_screenshot
3
  from PIL import Image
4
  from io import BytesIO
5
 
@@ -20,12 +20,18 @@ def visualize(url):
20
  # Fetch and display the website content
21
  with st.spinner("loading website data ..."):
22
  # innerHTML = get_innerHTML(url)
23
- innerHTML = take_screenshot(url)
 
 
 
 
24
  st.subheader("Website preview:")
25
- if innerHTML:
26
- st.image(innerHTML)
27
  else:
28
- st.error("Error: empty html")
 
 
29
 
30
  except Exception as e:
31
  st.error(f"Error: {e}")
 
1
  import streamlit as st
2
+ from extract import take_webdata
3
  from PIL import Image
4
  from io import BytesIO
5
 
 
20
  # Fetch and display the website content
21
  with st.spinner("loading website data ..."):
22
  # innerHTML = get_innerHTML(url)
23
+ html_image, html_content = take_webdata(url)
24
+ if html_content:
25
+ st.info(html_content)
26
+ else:
27
+ st.error("Error: empty html content")
28
  st.subheader("Website preview:")
29
+ if html_image:
30
+ st.image(html_image)
31
  else:
32
+ st.error("Error: empty html preview")
33
+ st.subheader("Website title:")
34
+
35
 
36
  except Exception as e:
37
  st.error(f"Error: {e}")
extract.py CHANGED
@@ -3,7 +3,7 @@ from selenium.common.exceptions import WebDriverException
3
  from PIL import Image
4
  from io import BytesIO
5
 
6
- def take_screenshot(url):
7
  options = webdriver.ChromeOptions()
8
  options.add_argument('--headless')
9
  options.add_argument('--no-sandbox')
@@ -14,11 +14,14 @@ def take_screenshot(url):
14
  wd.set_window_size(1080, 720) # Adjust the window size here
15
  wd.get(url)
16
  wd.implicitly_wait(5)
 
 
17
  screenshot = wd.get_screenshot_as_png()
 
18
  except WebDriverException as e:
19
- return Image.new('RGB', (1, 1))
20
  finally:
21
  if wd:
22
  wd.quit()
23
 
24
- return Image.open(BytesIO(screenshot))
 
3
  from PIL import Image
4
  from io import BytesIO
5
 
6
+ def take_webdata(url):
7
  options = webdriver.ChromeOptions()
8
  options.add_argument('--headless')
9
  options.add_argument('--no-sandbox')
 
14
  wd.set_window_size(1080, 720) # Adjust the window size here
15
  wd.get(url)
16
  wd.implicitly_wait(5)
17
+ # Get the page title
18
+ page_title = wd.title
19
  screenshot = wd.get_screenshot_as_png()
20
+
21
  except WebDriverException as e:
22
+ return Image.new('RGB', (1, 1)), page_title
23
  finally:
24
  if wd:
25
  wd.quit()
26
 
27
+ return Image.open(BytesIO(screenshot)) , page_title