Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -55,14 +55,18 @@ nltk.download('words')
|
|
55 |
|
56 |
"""## PARSING FILES"""
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def Parsing(parsed_text):
|
59 |
parsed_text=parsed_text.name
|
60 |
-
raw_party =
|
61 |
-
raw_party = raw_party['content']
|
62 |
return clean(raw_party)
|
63 |
|
64 |
|
65 |
-
|
66 |
#Added more stopwords to avoid irrelevant terms
|
67 |
stop_words = set(stopwords.words('english'))
|
68 |
stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|
|
|
55 |
|
56 |
"""## PARSING FILES"""
|
57 |
|
58 |
+
#def Parsing(parsed_text):
|
59 |
+
#parsed_text=parsed_text.name
|
60 |
+
#raw_party =parser.from_file(parsed_text)
|
61 |
+
# raw_party = raw_party['content']
|
62 |
+
# return clean(raw_party)
|
63 |
+
|
64 |
def Parsing(parsed_text):
|
65 |
parsed_text=parsed_text.name
|
66 |
+
raw_party =textract.process(parsed_text, encoding='ascii',method='pdfminer')
|
|
|
67 |
return clean(raw_party)
|
68 |
|
69 |
|
|
|
70 |
#Added more stopwords to avoid irrelevant terms
|
71 |
stop_words = set(stopwords.words('english'))
|
72 |
stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|