Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,115 @@ import gradio as gr
|
|
2 |
import requests
|
3 |
import bs4
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def link_find(url):
|
6 |
out = []
|
7 |
source = requests.get(url)
|
|
|
2 |
import requests
|
3 |
import bs4
|
4 |
|
5 |
+
def sort_doc(text,steps_in=0,control=None):
|
6 |
+
text=str(text)
|
7 |
+
|
8 |
+
########################################
|
9 |
+
sen_list=get_sen_list(text)
|
10 |
+
key_cnt=len(sen_list)
|
11 |
+
|
12 |
+
sen_obj_box=[]
|
13 |
+
for ii,ee in enumerate(sen_list):
|
14 |
+
sen_obj=proc_sen(sen_list,ii)
|
15 |
+
sen_obj_box.append(sen_obj)
|
16 |
+
|
17 |
+
sen_list=sen_obj_box
|
18 |
+
######################################
|
19 |
+
key_cnt=len(sen_obj_box)
|
20 |
+
print(key_cnt)
|
21 |
+
#noun_cnt=len(noun_box)
|
22 |
+
#print(noun_cnt)
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
if not steps_in:
|
27 |
+
|
28 |
+
control_char=list(control_json['control'])
|
29 |
+
char_len=len(control_char)
|
30 |
+
n_cnt=0
|
31 |
+
nx=key_cnt
|
32 |
+
while True:
|
33 |
+
if nx >= 1:
|
34 |
+
n_cnt+=1
|
35 |
+
nx = nx/char_len
|
36 |
+
else:
|
37 |
+
print("#######")
|
38 |
+
print(n_cnt)
|
39 |
+
print(nx)
|
40 |
+
print("#######")
|
41 |
+
steps=n_cnt
|
42 |
+
break
|
43 |
+
if steps_in:
|
44 |
+
steps=steps_in
|
45 |
+
|
46 |
+
if control:
|
47 |
+
control_len=control_json['leng']-steps
|
48 |
+
control_char_val=list(control_json['control'][:control_len])
|
49 |
+
control_val=list(control_json['control'][control_len:])
|
50 |
+
val_len=len(control_val)
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
json_out={}
|
55 |
+
noun_list={}
|
56 |
+
step_list=[]
|
57 |
+
|
58 |
+
big_cnt=0
|
59 |
+
cnt=0
|
60 |
+
go=True
|
61 |
+
|
62 |
+
|
63 |
+
step_cont_box=[]
|
64 |
+
for ii in range(steps):
|
65 |
+
print(ii)
|
66 |
+
step_cont_box.append(0)
|
67 |
+
#print (step_cont_box)
|
68 |
+
mod=0
|
69 |
+
pos=len(step_cont_box)-1
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
if go:
|
74 |
+
for i, ea in enumerate(sen_list):
|
75 |
+
if go:
|
76 |
+
if cnt > char_len-1:
|
77 |
+
#print(step_cont_box)
|
78 |
+
go1=True
|
79 |
+
for ii,ev in enumerate(step_cont_box):
|
80 |
+
if go:
|
81 |
+
if ev >= char_len-1:
|
82 |
+
step_cont_box[ii]=0
|
83 |
+
if go1==True:
|
84 |
+
step_cont_box[ii-1]=step_cont_box[ii-1]+1
|
85 |
+
go1=False
|
86 |
+
cnt=1
|
87 |
+
else:
|
88 |
+
step_cont_box[pos]=cnt
|
89 |
+
cnt+=1
|
90 |
+
print(step_cont_box)
|
91 |
+
out_js=""
|
92 |
+
for iii,j in enumerate(step_cont_box):
|
93 |
+
print(j)
|
94 |
+
out_js = out_js+control_char[j]
|
95 |
+
sen_obj=sen_obj_box[i]
|
96 |
+
#sen_obj=proc_sen(sen_list,i)
|
97 |
+
|
98 |
+
#json_out[out_js]={'nouns':ea}
|
99 |
+
json_out[out_js]=sen_obj
|
100 |
+
print ("#################")
|
101 |
+
print (out_js)
|
102 |
+
print (sen_obj)
|
103 |
+
print ("#################")
|
104 |
+
|
105 |
+
big_cnt+=1
|
106 |
+
if big_cnt==key_cnt:
|
107 |
+
print("DONE")
|
108 |
+
go=False
|
109 |
+
noun_list=proc_nouns(json_out)
|
110 |
+
return json_out, noun_list
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
def link_find(url):
|
115 |
out = []
|
116 |
source = requests.get(url)
|