Omnibus commited on
Commit
3b1a5cc
1 Parent(s): e30ed28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -3
app.py CHANGED
@@ -24,7 +24,7 @@ def link_find(url):
24
 
25
  else:
26
  pass
27
- return [node1]
28
  #https://huggingface.co/spaces/Omnibus/crawl
29
 
30
  def sitemap(url,level):
@@ -32,7 +32,7 @@ def sitemap(url,level):
32
  if url != "" and url != None:
33
  link1=link_find(url)
34
  if level >=2:
35
- for i,ea in enumerate(link1):
36
  print(ea)
37
  try:
38
  if not ea['URL'].startswith("http"):
@@ -44,7 +44,7 @@ def sitemap(url,level):
44
  out_list=link_find(f"{uri}{ea['URL']}")
45
  #link1[i]['TREE']=out_list
46
  if level>=3:
47
- for n,na in enumerate(link1[i]['TREE']):
48
  print(na)
49
  try:
50
  if not na['URL'].startswith("http"):
@@ -60,6 +60,42 @@ def sitemap(url,level):
60
  except Exception as e:
61
  print (e)
62
  return link1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  with gr.Blocks() as app:
64
  with gr.Row():
65
  inp=gr.Textbox(label="URL")
 
24
 
25
  else:
26
  pass
27
+ return node1
28
  #https://huggingface.co/spaces/Omnibus/crawl
29
 
30
  def sitemap(url,level):
 
32
  if url != "" and url != None:
33
  link1=link_find(url)
34
  if level >=2:
35
+ for i,ea in enumerate(link1['TREE']):
36
  print(ea)
37
  try:
38
  if not ea['URL'].startswith("http"):
 
44
  out_list=link_find(f"{uri}{ea['URL']}")
45
  #link1[i]['TREE']=out_list
46
  if level>=3:
47
+ for n,na in enumerate(link1['TREE'][i]['TREE']):
48
  print(na)
49
  try:
50
  if not na['URL'].startswith("http"):
 
60
  except Exception as e:
61
  print (e)
62
  return link1
63
+
64
+
65
+
66
+ def sitemap_OG(url,level):
67
+ uri=""
68
+ if url != "" and url != None:
69
+ link1=link_find(url)
70
+ if level >=2:
71
+ for i,ea in enumerate(link1):
72
+ print(ea)
73
+ try:
74
+ if not ea['URL'].startswith("http"):
75
+ uri1=url.split("//")[0]
76
+ uri2=url.split("//")[1]
77
+ uri3=uri2.split("/")[0]
78
+ uri=f'{uri1}//{uri3}'
79
+ print(uri)
80
+ out_list=link_find(f"{uri}{ea['URL']}")
81
+ link1[i]['TREE']=out_list
82
+ if level>=3:
83
+ for n,na in enumerate(link1[i]['TREE']):
84
+ print(na)
85
+ try:
86
+ if not na['URL'].startswith("http"):
87
+ uri11=url.split("//")[0]
88
+ uri22=url.split("//")[1]
89
+ uri33=uri22.split("/")[0]
90
+ uri0=f'{uri11}//{uri33}'
91
+ print(uri0)
92
+ out_list1=link_find(f"{uri0}{na['URL']}")
93
+ link1[i]['TREE'][n]['TREE']=out_list1
94
+ except Exception as e:
95
+ print (e)
96
+ except Exception as e:
97
+ print (e)
98
+ return link1
99
  with gr.Blocks() as app:
100
  with gr.Row():
101
  inp=gr.Textbox(label="URL")