Spaces:
Runtime error
Runtime error
samarthagarwal23
commited on
Commit
•
d1391ee
1
Parent(s):
981b258
Update app.py
Browse files
app.py
CHANGED
@@ -29,11 +29,12 @@ def read_pdf(file):
|
|
29 |
# We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
|
30 |
|
31 |
def bm25_tokenizer(text):
|
|
|
32 |
tokenized_doc = []
|
33 |
for token in text.lower().split():
|
34 |
token = token.strip(string.punctuation)
|
35 |
|
36 |
-
if len(token) > 0 and token not in
|
37 |
tokenized_doc.append(token)
|
38 |
return tokenized_doc
|
39 |
|
|
|
29 |
# We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
|
30 |
|
31 |
def bm25_tokenizer(text):
|
32 |
+
stop_w = ['a', 'the', 'am', 'is' , 'are', 'who', 'how', 'where', 'when', 'why']
|
33 |
tokenized_doc = []
|
34 |
for token in text.lower().split():
|
35 |
token = token.strip(string.punctuation)
|
36 |
|
37 |
+
if len(token) > 0 and token not in stop_w:
|
38 |
tokenized_doc.append(token)
|
39 |
return tokenized_doc
|
40 |
|