Update app.py
Browse files
app.py
CHANGED
@@ -15,12 +15,14 @@ def link_find(url):
|
|
15 |
rawt=soup.text
|
16 |
#out.append(rawp)
|
17 |
#out.append("HTML fragments: ")
|
|
|
|
|
18 |
q=("a","p","span","content","article")
|
19 |
for p in soup.find_all("a"):
|
20 |
-
|
21 |
else:
|
22 |
pass
|
23 |
-
return
|
24 |
#https://huggingface.co/spaces/Omnibus/crawl
|
25 |
|
26 |
def sitemap(url,level):
|
|
|
15 |
rawt=soup.text
|
16 |
#out.append(rawp)
|
17 |
#out.append("HTML fragments: ")
|
18 |
+
node1 = ({"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]})
|
19 |
+
|
20 |
q=("a","p","span","content","article")
|
21 |
for p in soup.find_all("a"):
|
22 |
+
node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
|
23 |
else:
|
24 |
pass
|
25 |
+
return node1
|
26 |
#https://huggingface.co/spaces/Omnibus/crawl
|
27 |
|
28 |
def sitemap(url,level):
|