Sa-m commited on
Commit
5f546a1
1 Parent(s): 785f00f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -55,14 +55,18 @@ nltk.download('words')
55
 
56
  """## PARSING FILES"""
57
 
 
 
 
 
 
 
58
  def Parsing(parsed_text):
59
  parsed_text=parsed_text.name
60
- raw_party =parser.from_file(parsed_text)
61
- raw_party = raw_party['content']
62
  return clean(raw_party)
63
 
64
 
65
-
66
  #Added more stopwords to avoid irrelevant terms
67
  stop_words = set(stopwords.words('english'))
68
  stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
 
55
 
56
  """## PARSING FILES"""
57
 
58
+ #def Parsing(parsed_text):
59
+ #parsed_text=parsed_text.name
60
+ #raw_party =parser.from_file(parsed_text)
61
+ # raw_party = raw_party['content']
62
+ # return clean(raw_party)
63
+
64
  def Parsing(parsed_text):
65
  parsed_text=parsed_text.name
66
+ raw_party =textract.process(parsed_text, encoding='ascii',method='pdfminer')
 
67
  return clean(raw_party)
68
 
69
 
 
70
  #Added more stopwords to avoid irrelevant terms
71
  stop_words = set(stopwords.words('english'))
72
  stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')