Omnibus commited on
Commit
113888e
1 Parent(s): 1f7917b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -7
app.py CHANGED
@@ -2,13 +2,33 @@ import gradio as gr
2
  import requests
3
  import bs4
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def sort_doc(in_list,steps_in=0,control=None):
6
  control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
7
  text=str(in_list)
8
-
9
- ########################################
10
- sen_list=in_list
11
- ######################################
12
  key_cnt=len(in_list)
13
  print(key_cnt)
14
  control_char=list(control_json['control'])
@@ -76,9 +96,6 @@ def sort_doc(in_list,steps_in=0,control=None):
76
  print(j)
77
  out_js = out_js+control_char[j]
78
  sen_obj=in_list[i]
79
- #sen_obj=proc_sen(sen_list,i)
80
-
81
- #json_out[out_js]={'nouns':ea}
82
  json_out[out_js]=sen_obj
83
  print ("#################")
84
  print (out_js)
@@ -186,7 +203,25 @@ def sitemap(url,level):
186
  except Exception as e:
187
  print (e)
188
  uri_key=sort_doc(link_box,8)
 
 
 
 
 
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  return link1,link2,uri_key
191
 
192
 
 
2
  import requests
3
  import bs4
4
 
5
+ ######## Load Database ########
6
+
7
+ from huggingface_hub import HfApi, upload_file
8
+ import json
9
+ import uuid
10
+ token=os.environ.get("HF_TOKEN")
11
+ username="omnibus"
12
+ dataset_name="tmp"
13
+ save_data=f'https://huggingface.co/datasets/{username}/{dataset_name}/raw/main/'
14
+ api=HfApi(token="")
15
+ filename="test"
16
+
17
+ r = requests.get(f'{save_data}crawl/{file_n}.json')
18
+ print(f'status code main:: {r.status_code}')
19
+ if r.status_code==200:
20
+ lod = json.loads(r.text)
21
+ #print(f'lod:: {lod}')
22
+ #lod[0]['comment']=lod[0]['comment']+1
23
+ #lod[0]['comment_list'].append({'user':persona[persona2]['name'],'datetime':'','comment':output,'reply_list':[]})
24
+ else:
25
+ lod={}
26
+
27
+ #############################
28
+
29
  def sort_doc(in_list,steps_in=0,control=None):
30
  control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
31
  text=str(in_list)
 
 
 
 
32
  key_cnt=len(in_list)
33
  print(key_cnt)
34
  control_char=list(control_json['control'])
 
96
  print(j)
97
  out_js = out_js+control_char[j]
98
  sen_obj=in_list[i]
 
 
 
99
  json_out[out_js]=sen_obj
100
  print ("#################")
101
  print (out_js)
 
203
  except Exception as e:
204
  print (e)
205
  uri_key=sort_doc(link_box,8)
206
+ ######## Save Database ########
207
+ uid=uuid.uuid4()
208
+ for ea in list(uri_key.keys()):
209
+ if not uri_key[ea] == x for x in list(lod.values()):
210
+ lod[ea]=uri_key[ea]
211
 
212
+ with open(f'{uid}.json', 'w') as f:
213
+ json_hist=json.dumps(uri_key, indent=4)
214
+ f.write(json_hist)
215
+ f.close()
216
+
217
+ upload_file(
218
+ path_or_fileobj =f"{uid}.json",
219
+ path_in_repo = f"crawl/{filename}.json",
220
+ repo_id =f"{username}/{dataset_name}",
221
+ repo_type = "dataset",
222
+ token=token,
223
+ )
224
+ #################################
225
  return link1,link2,uri_key
226
 
227