Omnibus commited on
Commit
fc4ed6d
1 Parent(s): 383c28f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -15,7 +15,9 @@ api=HfApi(token="")
15
  filename="urls"
16
  filename2="pages"
17
 
18
- def init():
 
 
19
  r = requests.get(f'{save_data}crawl/{filename}.json')
20
  print(f'status code main:: {r.status_code}')
21
  if r.status_code==200:
@@ -335,14 +337,22 @@ def sitemap(url,file_state,level):
335
  print (e)
336
  except Exception as e:
337
  print (e)
338
- #url_page=[]
339
  url_front=[]
 
340
  for ea_link in link2['TREE']:
341
  url_list=ea_link['URL'].split("/")
342
  url_front.append("".join(x for x in url_list[:3]))
 
343
  print(f'URL_FRONT:: {url_front}')
344
  #url_key=sort
345
- uri_key=sort_doc(url_front,file_state,8)
 
 
 
 
 
 
346
  ######## Save Database ########
347
  uid=uuid.uuid4()
348
  #for ea in list(uri_key.keys()):
 
15
  filename="urls"
16
  filename2="pages"
17
 
18
+ def init(filename=None):
19
+ if filename==None:
20
+ filename=filename
21
  r = requests.get(f'{save_data}crawl/{filename}.json')
22
  print(f'status code main:: {r.status_code}')
23
  if r.status_code==200:
 
337
  print (e)
338
  except Exception as e:
339
  print (e)
340
+ '''url_page=[]
341
  url_front=[]
342
+ url_json=[]
343
  for ea_link in link2['TREE']:
344
  url_list=ea_link['URL'].split("/")
345
  url_front.append("".join(x for x in url_list[:3]))
346
+ url_page.append("/".join(z for z in url_list[3:]))
347
  print(f'URL_FRONT:: {url_front}')
348
  #url_key=sort
349
+ for each_link in uri_key.keys():
350
+ out_file=init(f'{each_link}.json')
351
+
352
+ '''
353
+
354
+ uri_key=sort_doc(link2['TREE'],file_state,8)
355
+
356
  ######## Save Database ########
357
  uid=uuid.uuid4()
358
  #for ea in list(uri_key.keys()):