Omnibus commited on
Commit
437cf54
1 Parent(s): 6660108

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -2,23 +2,33 @@ import gradio as gr
2
  import requests
3
  import bs4
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def sitemap(url):
6
  if url != "" and url != None:
7
- out = []
8
- source = requests.get(url)
9
- if source.status_code ==200:
10
- #soup = bs4.BeautifulSoup(source.content,'lxml')
11
- soup = bs4.BeautifulSoup(source.content,'html.parser')
12
-
13
- rawp=(f'RAW TEXT RETURNED: {soup.text}')
14
- cnt=0
15
- cnt+=len(rawp)
16
- out.append(rawp)
17
- out.append("HTML fragments: ")
18
- q=("a","p","span","content","article")
19
- for p in soup.find_all("a"):
20
- out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string})
21
- return out
22
  with gr.Blocks() as app:
23
  inp=gr.Textbox()
24
  btn=gr.Button()
 
2
  import requests
3
  import bs4
4
 
5
+ def link_find(url):
6
+ out = []
7
+ source = requests.get(url)
8
+ if source.status_code ==200:
9
+ #soup = bs4.BeautifulSoup(source.content,'lxml')
10
+ soup = bs4.BeautifulSoup(source.content,'html.parser')
11
+
12
+ rawp=(f'RAW TEXT RETURNED: {soup.text}')
13
+ cnt=0
14
+ cnt+=len(rawp)
15
+ out.append(rawp)
16
+ out.append("HTML fragments: ")
17
+ q=("a","p","span","content","article")
18
+ for p in soup.find_all("a"):
19
+ out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string,"TREE":[]})
20
+ else:
21
+ return "None"
22
+ return out
23
+
24
+
25
  def sitemap(url):
26
  if url != "" and url != None:
27
+ link1=link_find(url)
28
+ for ea in link1:
29
+ out_list=link_find(ea['URL'])
30
+ ea['TREE']=ea["TREE"].append(out)
31
+ return out_list
 
 
 
 
 
 
 
 
 
 
32
  with gr.Blocks() as app:
33
  inp=gr.Textbox()
34
  btn=gr.Button()