Sa-m commited on
Commit
dd57fb3
1 Parent(s): f9871f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -3
app.py CHANGED
@@ -114,6 +114,33 @@ def concordance(text_Party,strng):
114
  s=result.getvalue().splitlines()
115
  return result.getvalue()
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  def normalize(d, target=1.0):
119
  raw = sum(d.values())
@@ -356,8 +383,8 @@ def analysis(Manifesto,Search):
356
  fdist_Party=fDistance(text_Party)
357
  img4=fDistancePlot(text_Party)
358
  img5=DispersionPlot(text_Party)
359
-
360
- searchRes=concordance(text_Party,Search)
361
  searChRes=clean(searchRes)
362
  searChRes=searchRes.replace(Search,"\u0332".join(Search))
363
  return searChRes,fdist_Party,img1,img2,img3,img4,img5
@@ -373,7 +400,7 @@ plot3=gr.outputs.Image(label='Word Cloud')
373
  plot4=gr.outputs.Image(label='Frequency Distribution')
374
  plot5=gr.outputs.Image(label='Dispersion Plot')
375
 
376
- io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='peach')
377
  io.launch(debug=True,share=False)
378
  #,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
379
  #allow_screenshot=False, allow_flagging="never",
 
114
  s=result.getvalue().splitlines()
115
  return result.getvalue()
116
 
117
+ def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10):
118
+ """
119
+ Function to get all the phases that contain the target word in a text/passage tar_passage.
120
+ Workaround to save the output given by nltk Concordance function
121
+
122
+ str target_word, str tar_passage int left_margin int right_margin --> list of str
123
+ left_margin and right_margin allocate the number of words/pununciation before and after target word
124
+ Left margin will take note of the beginning of the text
125
+ """
126
+ ## Create list of tokens using nltk function
127
+ tokens = nltk.word_tokenize(tar_passage)
128
+
129
+ ## Create the text of tokens
130
+ text = nltk.Text(tokens)
131
+
132
+ ## Collect all the index or offset position of the target word
133
+ c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
134
+
135
+ ## Collect the range of the words that is within the target word by using text.tokens[start;end].
136
+ ## The map function is use so that when the offset position - the target range < 0, it will be default to zero
137
+ concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
138
+
139
+ ## join the sentences for each of the target phrase and return it
140
+ result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
141
+ res='. '.join(result)
142
+ return res
143
+
144
 
145
  def normalize(d, target=1.0):
146
  raw = sum(d.values())
 
383
  fdist_Party=fDistance(text_Party)
384
  img4=fDistancePlot(text_Party)
385
  img5=DispersionPlot(text_Party)
386
+ #concordance(text_Party,Search)
387
+ searchRes=get_all_phases_containing_tar_wrd(text_Party,Search)
388
  searChRes=clean(searchRes)
389
  searChRes=searchRes.replace(Search,"\u0332".join(Search))
390
  return searChRes,fdist_Party,img1,img2,img3,img4,img5
 
400
  plot4=gr.outputs.Image(label='Frequency Distribution')
401
  plot5=gr.outputs.Image(label='Dispersion Plot')
402
 
403
+ io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='dark-peach')
404
  io.launch(debug=True,share=False)
405
  #,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
406
  #allow_screenshot=False, allow_flagging="never",