Spaces:
Runtime error
Runtime error
Commit
·
d1391ee
1
Parent(s):
981b258
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,11 +29,12 @@ def read_pdf(file):
|
|
| 29 |
# We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
|
| 30 |
|
| 31 |
def bm25_tokenizer(text):
|
|
|
|
| 32 |
tokenized_doc = []
|
| 33 |
for token in text.lower().split():
|
| 34 |
token = token.strip(string.punctuation)
|
| 35 |
|
| 36 |
-
if len(token) > 0 and token not in
|
| 37 |
tokenized_doc.append(token)
|
| 38 |
return tokenized_doc
|
| 39 |
|
|
|
|
| 29 |
# We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
|
| 30 |
|
| 31 |
def bm25_tokenizer(text):
|
| 32 |
+
stop_w = ['a', 'the', 'am', 'is' , 'are', 'who', 'how', 'where', 'when', 'why']
|
| 33 |
tokenized_doc = []
|
| 34 |
for token in text.lower().split():
|
| 35 |
token = token.strip(string.punctuation)
|
| 36 |
|
| 37 |
+
if len(token) > 0 and token not in stop_w:
|
| 38 |
tokenized_doc.append(token)
|
| 39 |
return tokenized_doc
|
| 40 |
|