Omnibus commited on
Commit
827c354
1 Parent(s): 573c6d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -15,12 +15,14 @@ def link_find(url):
15
  rawt=soup.text
16
  #out.append(rawp)
17
  #out.append("HTML fragments: ")
 
 
18
  q=("a","p","span","content","article")
19
  for p in soup.find_all("a"):
20
- out.append({"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string,"TEXT":rawt,"TREE":[]})
21
  else:
22
  pass
23
- return out
24
  #https://huggingface.co/spaces/Omnibus/crawl
25
 
26
  def sitemap(url,level):
 
15
  rawt=soup.text
16
  #out.append(rawp)
17
  #out.append("HTML fragments: ")
18
+ node1 = ({"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]})
19
+
20
  q=("a","p","span","content","article")
21
  for p in soup.find_all("a"):
22
+ node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
23
  else:
24
  pass
25
+ return node1
26
  #https://huggingface.co/spaces/Omnibus/crawl
27
 
28
  def sitemap(url,level):