Spaces:

divAIne
/

busy-module-text

Sleeping

App Files Files Community

EurekaPotato commited on 13 days ago

Commit

6d92900

verified ·

1 Parent(s): cd2f19e

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

text_features.py +35 -17

text_features.py CHANGED Viewed

@@ -11,14 +11,15 @@ KEY IMPROVEMENTS:
 import numpy as np
 from typing import List, Dict, Tuple
 from transformers import pipeline
-from sentence_transformers import SentenceTransformer
 import re
 class TextFeatureExtractor:
     """Extract 9 text features for busy detection"""
-    def __init__(self, use_intent_model: bool = True):
         """
         Initialize NLP models
@@ -39,9 +40,22 @@ class TextFeatureExtractor:
         )
         print("[OK] Sentiment model loaded")
-        # Coherence model
-        self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
-        print("[OK] Coherence model loaded")
         # Always setup patterns — busy_keywords is needed by extract_marker_counts()
         self._setup_patterns()
@@ -278,8 +292,8 @@ class TextFeatureExtractor:
             if keyword in transcript_lower
         )
-        # Normalize by total words
-        cognitive_load = cognitive_load_count / total_words
         time_pressure = time_pressure_count / total_words
         deflection = deflection_count / total_words
@@ -319,16 +333,20 @@ class TextFeatureExtractor:
             return 0.5  # Neutral if no data (changed from 1.0 to be more conservative)
         try:
-            # Encode question and responses
-            question_embedding = self.coherence_model.encode(question, convert_to_tensor=True)
-            response_embeddings = self.coherence_model.encode(responses, convert_to_tensor=True)
-            # Calculate cosine similarity
-            from sentence_transformers import util
-            similarities = util.cos_sim(question_embedding, response_embeddings)[0]
-            # Average similarity as coherence score
-            coherence = float(np.mean(similarities.cpu().numpy()))
             return max(0.0, min(1.0, coherence))  # Clamp to [0, 1]
         except Exception as e:

 import numpy as np
 from typing import List, Dict, Tuple
 from transformers import pipeline
+from sentence_transformers import SentenceTransformer, CrossEncoder
 import re
 class TextFeatureExtractor:
     """Extract 9 text features for busy detection"""
+    def __init__(self, use_intent_model: bool = True, marker_alpha: float = 1.0, marker_beta: float = 1.0,
+                 coherence_model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
         """
         Initialize NLP models
         )
         print("[OK] Sentiment model loaded")
+        # Coherence model (cross-encoder for next-utterance plausibility)
+        self.coherence_model = None
+        self.coherence_model_is_cross = True
+        self.coherence_model_name = coherence_model_name
+        try:
+            self.coherence_model = CrossEncoder(self.coherence_model_name, device="cpu")
+            print(f"[OK] Coherence model loaded (CrossEncoder: {self.coherence_model_name})")
+        except Exception as e:
+            print(f"[WARN] CrossEncoder load failed: {e}")
+            self.coherence_model = SentenceTransformer('all-MiniLM-L6-v2')
+            self.coherence_model_is_cross = False
+            print("[OK] Coherence model loaded (SentenceTransformer fallback)")
+        # Marker smoothing
+        self.marker_alpha = float(marker_alpha)
+        self.marker_beta = float(marker_beta)
         # Always setup patterns — busy_keywords is needed by extract_marker_counts()
         self._setup_patterns()
             if keyword in transcript_lower
         )
+        # Normalize by total words with smoothing (cognitive load only)
+        cognitive_load = (cognitive_load_count + self.marker_alpha) / (total_words + self.marker_beta)
         time_pressure = time_pressure_count / total_words
         deflection = deflection_count / total_words
             return 0.5  # Neutral if no data (changed from 1.0 to be more conservative)
         try:
+            if self.coherence_model_is_cross:
+                pairs = [(question, response) for response in responses]
+                scores = self.coherence_model.predict(pairs)
+                scores = np.array(scores, dtype=np.float32)
+                if np.any(scores < 0.0) or np.any(scores > 1.0):
+                    scores = 1.0 / (1.0 + np.exp(-scores))
+                coherence = float(np.mean(scores))
+            else:
+                # Fallback: cosine similarity
+                question_embedding = self.coherence_model.encode(question, convert_to_tensor=True)
+                response_embeddings = self.coherence_model.encode(responses, convert_to_tensor=True)
+                from sentence_transformers import util
+                similarities = util.cos_sim(question_embedding, response_embeddings)[0]
+                coherence = float(np.mean(similarities.cpu().numpy()))
             return max(0.0, min(1.0, coherence))  # Clamp to [0, 1]
         except Exception as e: