cnmoro commited on
Commit
b76aadb
1 Parent(s): 7838009

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -102,7 +102,17 @@ def extract_embeddings(text):
102
  def extract_embeddings_batch(texts):
103
  return [extract_embeddings(text) for text in texts]
104
 
105
- def compress_semantically(input_text, word_reduction_factor=0.35, num_samples=500):
 
 
 
 
 
 
 
 
 
 
106
  semantic_embeddings = extract_embeddings(input_text)
107
  text_lang = detect_language_en_pt(input_text)
108
  stopwords = en_stop_words if text_lang == 'en' else pt_stop_words
 
102
  def extract_embeddings_batch(texts):
103
  return [extract_embeddings(text) for text in texts]
104
 
105
+ def compress_semantically(input_text, word_reduction_factor=0.35):
106
+
107
+ num_samples = 500
108
+ word_count = input_text.split()
109
+
110
+ thresholds = [(1500, 80), (1000, 90), (700, 110), (500, 130), (250, 160)]
111
+ for threshold, value in thresholds:
112
+ if word_count > threshold:
113
+ num_samples = value
114
+ break
115
+
116
  semantic_embeddings = extract_embeddings(input_text)
117
  text_lang = detect_language_en_pt(input_text)
118
  stopwords = en_stop_words if text_lang == 'en' else pt_stop_words