mhsvieira commited on
Commit
a319ac9
1 Parent(s): e539b70

Remove inflect

Browse files
Files changed (2) hide show
  1. extractor/_utils.py +5 -3
  2. extractor/extract.py +0 -3
extractor/_utils.py CHANGED
@@ -1,12 +1,12 @@
1
  import nmslib
2
  import numpy as np
3
  import streamlit as st
4
- import inflect
5
  import torch
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
- p = inflect.engine()
10
 
11
  class FewDocumentsError(Exception):
12
  def __init__(self, documents, size, msg):
@@ -18,7 +18,9 @@ class FewDocumentsError(Exception):
18
  return repr(self.msg)
19
 
20
  def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
21
- word_in_text = lambda word, text: any([p.compare(word, w) for w in text.split()])
 
 
22
  lower_dataset = [document.lower() for document in dataset]
23
  lower_query = query.lower()
24
  lower_keywords = [keyword.lower() for keyword in keywords]
 
1
  import nmslib
2
  import numpy as np
3
  import streamlit as st
4
+ # import inflect
5
  import torch
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
 
9
+ # p = inflect.engine()
10
 
11
  class FewDocumentsError(Exception):
12
  def __init__(self, documents, size, msg):
 
18
  return repr(self.msg)
19
 
20
  def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
21
+ # TODO: compare inflected forms
22
+ # word_in_text = lambda word, text: any([p.compare(word, w) for w in text.split()])
23
+ word_in_text = lambda word, text: word in set(text.split())
24
  lower_dataset = [document.lower() for document in dataset]
25
  lower_query = query.lower()
26
  lower_keywords = [keyword.lower() for keyword in keywords]
extractor/extract.py CHANGED
@@ -61,9 +61,6 @@ def extract(query: str, n: int=3, extracted_documents: list=None) -> str:
61
  number_of_similar_files=10
62
  )
63
 
64
- from pprint import pprint
65
- pprint(selected_paragraphs[:n])
66
-
67
  text = '\n'.join(selected_paragraphs[:n])
68
 
69
  return text
 
61
  number_of_similar_files=10
62
  )
63
 
 
 
 
64
  text = '\n'.join(selected_paragraphs[:n])
65
 
66
  return text