Omnibus commited on
Commit
ae4e988
1 Parent(s): 1d13724

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import bs4
4
+
5
+ def sitemap(url):
6
+ if url != "" and url != None:
7
+ out = []
8
+ source = requests.get(url)
9
+ if source.status_code ==200:
10
+ #soup = bs4.BeautifulSoup(source.content,'lxml')
11
+ soup = bs4.BeautifulSoup(source.content,'html.parser')
12
+
13
+ rawp=(f'RAW TEXT RETURNED: {soup.text}')
14
+ cnt=0
15
+ cnt+=len(rawp)
16
+ out.append(rawp)
17
+ out.append("HTML fragments: ")
18
+ q=("a","p","span","content","article")
19
+ for p in soup.find_all("a"):
20
+ out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string})
21
+ with gr.Blocks() as app:
22
+ inp=gr.Textbox()
23
+ btn=gr.Button()
24
+ outp=gr.JSON()
25
+ btn.click(sitemap,inp,outp)
26
+ app.launch()