Update app.py
Browse files
app.py
CHANGED
@@ -15,7 +15,9 @@ api=HfApi(token="")
|
|
15 |
filename="urls"
|
16 |
filename2="pages"
|
17 |
|
18 |
-
def init():
|
|
|
|
|
19 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
20 |
print(f'status code main:: {r.status_code}')
|
21 |
if r.status_code==200:
|
@@ -335,14 +337,22 @@ def sitemap(url,file_state,level):
|
|
335 |
print (e)
|
336 |
except Exception as e:
|
337 |
print (e)
|
338 |
-
|
339 |
url_front=[]
|
|
|
340 |
for ea_link in link2['TREE']:
|
341 |
url_list=ea_link['URL'].split("/")
|
342 |
url_front.append("".join(x for x in url_list[:3]))
|
|
|
343 |
print(f'URL_FRONT:: {url_front}')
|
344 |
#url_key=sort
|
345 |
-
uri_key
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
######## Save Database ########
|
347 |
uid=uuid.uuid4()
|
348 |
#for ea in list(uri_key.keys()):
|
|
|
15 |
filename="urls"
|
16 |
filename2="pages"
|
17 |
|
18 |
+
def init(filename=None):
|
19 |
+
if filename==None:
|
20 |
+
filename=filename
|
21 |
r = requests.get(f'{save_data}crawl/{filename}.json')
|
22 |
print(f'status code main:: {r.status_code}')
|
23 |
if r.status_code==200:
|
|
|
337 |
print (e)
|
338 |
except Exception as e:
|
339 |
print (e)
|
340 |
+
'''url_page=[]
|
341 |
url_front=[]
|
342 |
+
url_json=[]
|
343 |
for ea_link in link2['TREE']:
|
344 |
url_list=ea_link['URL'].split("/")
|
345 |
url_front.append("".join(x for x in url_list[:3]))
|
346 |
+
url_page.append("/".join(z for z in url_list[3:]))
|
347 |
print(f'URL_FRONT:: {url_front}')
|
348 |
#url_key=sort
|
349 |
+
for each_link in uri_key.keys():
|
350 |
+
out_file=init(f'{each_link}.json')
|
351 |
+
|
352 |
+
'''
|
353 |
+
|
354 |
+
uri_key=sort_doc(link2['TREE'],file_state,8)
|
355 |
+
|
356 |
######## Save Database ########
|
357 |
uid=uuid.uuid4()
|
358 |
#for ea in list(uri_key.keys()):
|