Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -102,7 +102,17 @@ def extract_embeddings(text):
|
|
102 |
def extract_embeddings_batch(texts):
|
103 |
return [extract_embeddings(text) for text in texts]
|
104 |
|
105 |
-
def compress_semantically(input_text, word_reduction_factor=0.35
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
semantic_embeddings = extract_embeddings(input_text)
|
107 |
text_lang = detect_language_en_pt(input_text)
|
108 |
stopwords = en_stop_words if text_lang == 'en' else pt_stop_words
|
|
|
102 |
def extract_embeddings_batch(texts):
|
103 |
return [extract_embeddings(text) for text in texts]
|
104 |
|
105 |
+
def compress_semantically(input_text, word_reduction_factor=0.35):
|
106 |
+
|
107 |
+
num_samples = 500
|
108 |
+
word_count = input_text.split()
|
109 |
+
|
110 |
+
thresholds = [(1500, 80), (1000, 90), (700, 110), (500, 130), (250, 160)]
|
111 |
+
for threshold, value in thresholds:
|
112 |
+
if word_count > threshold:
|
113 |
+
num_samples = value
|
114 |
+
break
|
115 |
+
|
116 |
semantic_embeddings = extract_embeddings(input_text)
|
117 |
text_lang = detect_language_en_pt(input_text)
|
118 |
stopwords = en_stop_words if text_lang == 'en' else pt_stop_words
|