Spaces:
Runtime error
Runtime error
Remove inflect
Browse files- extractor/_utils.py +5 -3
- extractor/extract.py +0 -3
extractor/_utils.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import nmslib
|
2 |
import numpy as np
|
3 |
import streamlit as st
|
4 |
-
import inflect
|
5 |
import torch
|
6 |
|
7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
|
9 |
-
p = inflect.engine()
|
10 |
|
11 |
class FewDocumentsError(Exception):
|
12 |
def __init__(self, documents, size, msg):
|
@@ -18,7 +18,9 @@ class FewDocumentsError(Exception):
|
|
18 |
return repr(self.msg)
|
19 |
|
20 |
def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
|
21 |
-
|
|
|
|
|
22 |
lower_dataset = [document.lower() for document in dataset]
|
23 |
lower_query = query.lower()
|
24 |
lower_keywords = [keyword.lower() for keyword in keywords]
|
|
|
1 |
import nmslib
|
2 |
import numpy as np
|
3 |
import streamlit as st
|
4 |
+
# import inflect
|
5 |
import torch
|
6 |
|
7 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
8 |
|
9 |
+
# p = inflect.engine()
|
10 |
|
11 |
class FewDocumentsError(Exception):
|
12 |
def __init__(self, documents, size, msg):
|
|
|
18 |
return repr(self.msg)
|
19 |
|
20 |
def document_extraction(dataset, query, keywords, min_document_size, min_just_one_paragraph_size):
|
21 |
+
# TODO: compare inflected forms
|
22 |
+
# word_in_text = lambda word, text: any([p.compare(word, w) for w in text.split()])
|
23 |
+
word_in_text = lambda word, text: word in set(text.split())
|
24 |
lower_dataset = [document.lower() for document in dataset]
|
25 |
lower_query = query.lower()
|
26 |
lower_keywords = [keyword.lower() for keyword in keywords]
|
extractor/extract.py
CHANGED
@@ -61,9 +61,6 @@ def extract(query: str, n: int=3, extracted_documents: list=None) -> str:
|
|
61 |
number_of_similar_files=10
|
62 |
)
|
63 |
|
64 |
-
from pprint import pprint
|
65 |
-
pprint(selected_paragraphs[:n])
|
66 |
-
|
67 |
text = '\n'.join(selected_paragraphs[:n])
|
68 |
|
69 |
return text
|
|
|
61 |
number_of_similar_files=10
|
62 |
)
|
63 |
|
|
|
|
|
|
|
64 |
text = '\n'.join(selected_paragraphs[:n])
|
65 |
|
66 |
return text
|