from textblob import TextBlob import gradio as gr import math import os os.system("python -m textblob.download_corpora") control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62} string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50} cont_list=list(string_json['control']) def get_sen_list(text): sen_list=[] blob = TextBlob(text) for sentence in blob.sentences: sen_list.append(str(sentence)) return sen_list def proc_sen(sen_list,cnt): blob_n = TextBlob(sen_list[cnt]) noun_p=blob_n.noun_phrases #print(dir(noun_p)) noun_box1=[] for ea in blob_n.parse().split(" "): n=ea.split("/") if n[1] == "NN": if not n[0] in noun_box1: noun_box1.append(n[0]) json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1} return json_object def proc_nouns(sen_list): print("get nouns") noun_list={} for nn in list(sen_list.keys()): try: #print(sen_list[nn]['nouns']) for nnn in sen_list[nn]['nouns']: #print(nnn) if noun_list.get(nnn) != None: noun_list[str(nnn)]=noun_list[str(nnn)].append(nn) else: noun_list[str(nnn)]=[nn] for nnnn in sen_list[nn]['noun_phrase']: #print(nnnn) if noun_list.get(nnnn) != None: noun_list[str(nnnn)]=noun_list[str(nnnn)].append(nn) else: noun_list[str(nnnn)]=[nn] except Exception as e: print (e) pass print("done nouns") return noun_list def sort_doc(text,steps_in=0,control=None): text=str(text) ######################################## sen_list=get_sen_list(text) key_cnt=len(sen_list) sen_obj_box=[] for ii,ee in enumerate(sen_list): sen_obj=proc_sen(sen_list,ii) sen_obj_box.append(sen_obj) #sen_list=sen_obj_box ###################################### key_cnt=len(sen_obj_box) print(key_cnt) #noun_cnt=len(noun_box) #print(noun_cnt) if not steps_in: control_char=list(control_json['control']) char_len=len(control_char) n_cnt=0 nx=key_cnt while True: if nx >= 1: n_cnt+=1 nx = nx/char_len else: print("#######") print(n_cnt) print(nx) print("#######") steps=n_cnt break if steps_in: steps=steps_in if control: control_len=control_json['leng']-steps control_char_val=list(control_json['control'][:control_len]) control_val=list(control_json['control'][control_len:]) val_len=len(control_val) json_out={} noun_list={} step_list=[] big_cnt=0 cnt=0 go=True step_cont_box=[] for ii in range(steps): print(ii) step_cont_box.append(0) #print (step_cont_box) mod=0 pos=len(step_cont_box)-1 if go: for i, ea in enumerate(sen_obj_box): if go: if cnt > char_len-1: #print(step_cont_box) go1=True for ii,ev in enumerate(step_cont_box): if go: if ev >= char_len-1: step_cont_box[ii]=0 if go1==True: step_cont_box[ii-1]=step_cont_box[ii-1]+1 go1=False cnt=1 else: step_cont_box[pos]=cnt cnt+=1 print(step_cont_box) out_js="" for iii,j in enumerate(step_cont_box): print(j) out_js = out_js+control_char[j] sen_obj_out=sen_obj_box[i] aa=3 bb=3 aa=i if i < 3 else 3 sen_obj_out['sentence']=sen_list[i-aa:i+bb] #sen_obj_out=sen_obj[i-3:i+3] #sen_obj=sen_obj_box[i] #sen_obj=proc_sen(sen_list,i) #json_out[out_js]={'nouns':ea} json_out[str(out_js)]=sen_obj_out #print ("#################") #print (out_js) #print (sen_obj) #print ("#################") big_cnt+=1 if big_cnt==key_cnt: print("DONE") go=False noun_list=proc_nouns(json_out) return json_out, [noun_list] def find_query(query,sen,nouns): blob_f = TextBlob(query) noun_box={} noun_list=[] sen_box=[] for ea in blob_f.parse().split(" "): n=ea.split("/") if n[1] == "NN": noun_list.append(n[0]) nouns_l=list(nouns.keys()) for nn in nouns_l: for nl in noun_list: if nl in nn: if nl in noun_box: for ea_n in nouns[nn]: noun_box[str(nl)].append(ea_n) else: noun_box[str(nl)]=[] for ea_n in nouns[nn]: noun_box[str(nl)].append(ea_n) for ea in noun_box.values(): for vals in ea: sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']}) return noun_box,sen_box def find_query_sen(query,sen,nouns): blob_f = TextBlob(query) noun_box={} noun_list=[] sen_box=[] for ea in blob_f.parse().split(" "): n=ea.split("/") if n[1] == "NN": noun_list.append(n[0]) nouns_l=list(nouns.keys()) for nn in nouns_l: for nl in noun_list: if nl in nn: if nl in noun_box: for ea_n in nouns[nn]: noun_box[str(nl)].append(ea_n) else: noun_box[str(nl)]=[] for ea_n in nouns[nn]: noun_box[str(nl)].append(ea_n) sen_out="" for ea in noun_box.values(): for vals in ea: print (f'SENETENCE VALS ::: {vals}') sen_out+=f"{sen[vals]['sentence']}\n" #sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']}) return sen_out with gr.Blocks() as app: inp = gr.Textbox(label="Paste Text",lines=10) btn = gr.Button("Load Document") with gr.Row(): query=gr.Textbox(label="Search query") search_btn=gr.Button("Search") search_btn2=gr.Button("Search2") out_box=gr.Textbox(label="Results") sen_box=gr.Textbox(label="Sentences") with gr.Row(): with gr.Column(scale=2): sen=gr.JSON(label="Sentences") with gr.Column(scale=1): nouns=gr.JSON(label="Nouns") search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box]) search_btn2.click(find_query_sen,[query,sen,nouns],[out_box]) btn.click(sort_doc,[inp],[sen,nouns]) app.launch()