Omnibus commited on
Commit
f0e1870
1 Parent(s): f79ce55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -22,43 +22,43 @@ def link_find(url):
22
  return out
23
  #https://huggingface.co/spaces/Omnibus/crawl
24
 
25
- def sitemap(url):
26
  uri=""
27
  if url != "" and url != None:
28
  link1=link_find(url)
29
- for i,ea in enumerate(link1):
30
- print(ea)
31
- try:
32
- if not ea['URL'].startswith("http"):
33
- uri1=url.split("//")[0]
34
- uri2=url.split("//")[1]
35
- uri3=uri2.split("/")[0]
36
- uri=f'{uri1}//{uri3}'
37
- print(uri)
38
- out_list=link_find(f"{uri}{ea['URL']}")
39
- link1[i]['TREE']=out_list
40
- for n,na in enumerate(link1[i]['TREE']):
41
- print(na)
42
- try:
43
- if not na['URL'].startswith("http"):
44
- uri11=url.split("//")[0]
45
- uri22=url.split("//")[1]
46
- uri33=uri22.split("/")[0]
47
- uri0=f'{uri11}//{uri33}'
48
- print(uri0)
49
- out_list1=link_find(f"{uri0}{na['URL']}")
50
- link1[i]['TREE'][n]['TREE']=out_list1
51
- except Exception as e:
52
- print (e)
53
-
54
-
55
-
56
- except Exception as e:
57
- print (e)
58
  return link1
59
  with gr.Blocks() as app:
60
- inp=gr.Textbox()
 
61
  btn=gr.Button()
62
  outp=gr.JSON()
63
- btn.click(sitemap,inp,outp)
64
  app.launch()
 
22
  return out
23
  #https://huggingface.co/spaces/Omnibus/crawl
24
 
25
+ def sitemap(url,level):
26
  uri=""
27
  if url != "" and url != None:
28
  link1=link_find(url)
29
+ if level >=2:
30
+ for i,ea in enumerate(link1):
31
+ print(ea)
32
+ try:
33
+ if not ea['URL'].startswith("http"):
34
+ uri1=url.split("//")[0]
35
+ uri2=url.split("//")[1]
36
+ uri3=uri2.split("/")[0]
37
+ uri=f'{uri1}//{uri3}'
38
+ print(uri)
39
+ out_list=link_find(f"{uri}{ea['URL']}")
40
+ link1[i]['TREE']=out_list
41
+ if level>=3:
42
+ for n,na in enumerate(link1[i]['TREE']):
43
+ print(na)
44
+ try:
45
+ if not na['URL'].startswith("http"):
46
+ uri11=url.split("//")[0]
47
+ uri22=url.split("//")[1]
48
+ uri33=uri22.split("/")[0]
49
+ uri0=f'{uri11}//{uri33}'
50
+ print(uri0)
51
+ out_list1=link_find(f"{uri0}{na['URL']}")
52
+ link1[i]['TREE'][n]['TREE']=out_list1
53
+ except Exception as e:
54
+ print (e)
55
+ except Exception as e:
56
+ print (e)
 
57
  return link1
58
  with gr.Blocks() as app:
59
+ inp=gr.Textbox(label="URL")
60
+ level=gr.Slider(minimum=1,maximum=3,step=1,value=2)
61
  btn=gr.Button()
62
  outp=gr.JSON()
63
+ btn.click(sitemap,[inp,level],outp)
64
  app.launch()