samarthagarwal23 commited on
Commit
d1391ee
1 Parent(s): 981b258

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -29,11 +29,12 @@ def read_pdf(file):
29
  # We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
30
 
31
  def bm25_tokenizer(text):
 
32
  tokenized_doc = []
33
  for token in text.lower().split():
34
  token = token.strip(string.punctuation)
35
 
36
- if len(token) > 0 and token not in _stop_words.ENGLISH_STOP_WORDS:
37
  tokenized_doc.append(token)
38
  return tokenized_doc
39
 
 
29
  # We use BM25 as retriver which will do 1st round of candidate filtering based on word based matching
30
 
31
  def bm25_tokenizer(text):
32
+ stop_w = ['a', 'the', 'am', 'is' , 'are', 'who', 'how', 'where', 'when', 'why']
33
  tokenized_doc = []
34
  for token in text.lower().split():
35
  token = token.strip(string.punctuation)
36
 
37
+ if len(token) > 0 and token not in stop_w:
38
  tokenized_doc.append(token)
39
  return tokenized_doc
40