Omnibus commited on
Commit
a57fdc7
1 Parent(s): 6c531ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -17,22 +17,24 @@ def link_find(url):
17
  #out.append(rawp)
18
  #out.append("HTML fragments: ")
19
  node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]}
 
20
 
21
  q=("a","p","span","content","article")
22
  for p in soup.find_all("a"):
23
  node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
 
24
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
25
 
26
  else:
27
  print("NO")
28
  pass
29
- return node1
30
  #https://huggingface.co/spaces/Omnibus/crawl
31
 
32
  def sitemap(url,level):
33
  uri=""
34
  if url != "" and url != None:
35
- link1=link_find(url)
36
  if level >=2:
37
  for i,ea in enumerate(link1['TREE']):
38
  print(ea)
@@ -43,9 +45,10 @@ def sitemap(url,level):
43
  uri3=uri2.split("/")[0]
44
  uri=f'{uri1}//{uri3}'
45
  print(uri)
46
- out_list=link_find(f"{uri}{ea['URL']}")
47
- #link1['TREE'][i]=out_list
48
- link1['TREE'].append(out_list)
 
49
 
50
  if level>=3:
51
  for n,na in enumerate(link1['TREE'][i]['TREE']):
@@ -57,14 +60,15 @@ def sitemap(url,level):
57
  uri33=uri22.split("/")[0]
58
  uri0=f'{uri11}//{uri33}'
59
  print(uri0)
60
- out_list1=link_find(f"{uri0}{na['URL']}")
61
- #link1['TREE'][i]['TREE'][n]=out_list1
62
- link1['TREE'][i]['TREE'].append(out_list1)
 
63
  except Exception as e:
64
  print (e)
65
  except Exception as e:
66
  print (e)
67
- return link1
68
 
69
 
70
 
@@ -103,9 +107,13 @@ def sitemap_OG(url,level):
103
  return link1
104
  with gr.Blocks() as app:
105
  with gr.Row():
106
- inp=gr.Textbox(label="URL")
107
- level=gr.Slider(minimum=1,maximum=3,step=1,value=2)
108
- btn=gr.Button()
109
- outp=gr.JSON()
110
- btn.click(sitemap,[inp,level],outp)
 
 
 
 
111
  app.launch()
 
17
  #out.append(rawp)
18
  #out.append("HTML fragments: ")
19
  node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]}
20
+ node2 = {"URL":url,"TREE":[]}
21
 
22
  q=("a","p","span","content","article")
23
  for p in soup.find_all("a"):
24
  node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
25
+ node2['TREE'].append({"URL":p.get('href'),"TREE":[]})
26
  #out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
27
 
28
  else:
29
  print("NO")
30
  pass
31
+ return node1,node2
32
  #https://huggingface.co/spaces/Omnibus/crawl
33
 
34
  def sitemap(url,level):
35
  uri=""
36
  if url != "" and url != None:
37
+ link1,link2=link_find(url)
38
  if level >=2:
39
  for i,ea in enumerate(link1['TREE']):
40
  print(ea)
 
45
  uri3=uri2.split("/")[0]
46
  uri=f'{uri1}//{uri3}'
47
  print(uri)
48
+ out_list1,out_list2=link_find(f"{uri}{ea['URL']}")
49
+ link1['TREE'][i]=out_list1
50
+ link2['TREE'][i]=out_list2
51
+ #link1['TREE'].append(out_list)
52
 
53
  if level>=3:
54
  for n,na in enumerate(link1['TREE'][i]['TREE']):
 
60
  uri33=uri22.split("/")[0]
61
  uri0=f'{uri11}//{uri33}'
62
  print(uri0)
63
+ out_list1,out_list2=link_find(f"{uri0}{na['URL']}")
64
+ link1['TREE'][i]['TREE'][n]=out_list1
65
+ link2['TREE'][i]['TREE'][n]=out_list2
66
+ #link1['TREE'][i]['TREE'].append(out_list1)
67
  except Exception as e:
68
  print (e)
69
  except Exception as e:
70
  print (e)
71
+ return link1,link2
72
 
73
 
74
 
 
107
  return link1
108
  with gr.Blocks() as app:
109
  with gr.Row():
110
+ with gr.Column(scale=3):
111
+ with gr.Row():
112
+ inp=gr.Textbox(label="URL")
113
+ level=gr.Slider(minimum=1,maximum=3,step=1,value=2)
114
+ btn=gr.Button()
115
+ outp=gr.JSON()
116
+ with gr.Column(scale=1):
117
+ outmap=gr.JSON()
118
+ btn.click(sitemap,[inp,level],[outp,outmap])
119
  app.launch()