Omnibus commited on
Commit
366c803
1 Parent(s): 5e66ae2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -16,13 +16,16 @@ def link_find(url):
16
  rawt=soup.text
17
  #out.append(rawp)
18
  #out.append("HTML fragments: ")
19
- node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]}
20
- node2 = {"URL":url,"TREE":[]}
21
 
22
  q=("a","p","span","content","article")
23
  for p in soup.find_all("a"):
24
- node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
25
- node2['TREE'].append({"URL":p.get('href'),"TREE":[]})
 
 
 
26
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
27
 
28
  else:
 
16
  rawt=soup.text
17
  #out.append(rawp)
18
  #out.append("HTML fragments: ")
19
+ node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"LINKS":[],"TREE":[]}
20
+ node2 = {"URL":url,"LINKS":[],"TREE":[]}
21
 
22
  q=("a","p","span","content","article")
23
  for p in soup.find_all("a"):
24
+ node1['LINKS'].append(p.get('href'))
25
+ node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","LINKS":[],"TREE":[]})
26
+ node2['TREE'].append({"URL":p.get('href'),"LINKS":[],"TREE":[]})
27
+ node2['LINKS'].append(p.get('href'))
28
+
29
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
30
 
31
  else: