Spaces:
Runtime error
● Perfekt! 🚀 Wir haben das System komplett umgestellt von statischen Listen zu intelligenter
Browse filessemantischer Erkennung:
🧠 Was ist jetzt anders:
Statt fester Listen:
# ALT (statisch):
if entity in ['conveyor', 'belt', 'machine']:
return 'industrial'
Jetzt intelligente Vektoren:
# NEU (intelligent):
similarity = cosine_similarity(
spacy_vector(entity),
spacy_vector('manufacturing equipment')
)
if similarity > 0.6:
return 'industrial'
🔥 Neue Features:
1. 🎯 Semantic Prototypes: Statt Listen verwenden wir Konzept-Kerne wie 'manufacturing',
'production', 'assembly'
2. 📊 Cosine Similarity: Berechnet echte semantische Ähnlichkeit zwischen Wörtern
3. 🧭 Contextual Classification: Wenn "Förderband" neben "Fabrik" steht → automatisch industrial
4. 🌍 Sprachunabhängig: Funktioniert mit jedem Wort in jeder Sprache
5. 🔄 Dreistufiges Fallback:
- Semantic → Contextual → Pattern-based
🎯 Ihr "conveyor" Beispiel:
Vorher:
- conveyor nicht in Liste → unknown
Jetzt:
- spacy_vector('conveyor') ähnlich spacy_vector('manufacturing equipment') → industrial ✅
Das System versteht jetzt Bedeutungen, nicht nur Wortlisten!
@@ -74,36 +74,22 @@ class RealGASMInterface:
|
|
74 |
self.tokenizer = None
|
75 |
self.last_gasm_results = None # Store last results for visualization
|
76 |
|
77 |
-
#
|
78 |
-
self.
|
79 |
-
'
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
'
|
88 |
-
'robotics': ['robot', 'arm', 'sensor', 'motor', 'actuator', 'controller', 'manipulator', 'gripper', 'joint'],
|
89 |
-
'scientific': ['detector', 'microscope', 'telescope', 'spectrometer', 'analyzer', 'probe', 'scanner'],
|
90 |
-
'industrial': ['reactor', 'turbine', 'compressor', 'pump', 'valve', 'conveyor', 'assembly', 'platform',
|
91 |
-
'machine', 'equipment', 'apparatus', 'device', 'unit', 'system', 'installation',
|
92 |
-
'sorting', 'sorter', 'belt', 'line', 'station', 'workstation', 'cell'],
|
93 |
-
'electronic': ['circuit', 'processor', 'memory', 'display', 'antenna', 'battery', 'capacitor']
|
94 |
-
},
|
95 |
-
'spatial_objects': {
|
96 |
-
'architectural': ['room', 'door', 'window', 'wall', 'floor', 'ceiling', 'corner'],
|
97 |
-
'locations': ['center', 'side', 'edge', 'surface', 'space', 'area', 'zone', 'place', 'position', 'spot'],
|
98 |
-
'natural': ['tree', 'rock', 'river', 'mountain', 'field', 'forest', 'lake']
|
99 |
-
},
|
100 |
-
'scientific_entities': {
|
101 |
-
'physics': ['atom', 'electron', 'proton', 'neutron', 'photon', 'molecule', 'particle'],
|
102 |
-
'chemistry': ['crystal', 'compound', 'solution', 'reaction', 'catalyst', 'polymer'],
|
103 |
-
'astronomy': ['satellite', 'planet', 'star', 'galaxy', 'comet', 'asteroid', 'orbit']
|
104 |
-
}
|
105 |
}
|
106 |
|
|
|
|
|
|
|
107 |
# Fallback patterns for when spaCy is not available
|
108 |
self.fallback_entity_patterns = [
|
109 |
# High-confidence patterns
|
@@ -250,18 +236,36 @@ class RealGASMInterface:
|
|
250 |
return self._is_in_semantic_categories(text)
|
251 |
|
252 |
def _is_in_semantic_categories(self, entity: str) -> bool:
|
253 |
-
"""Check if entity belongs to any
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
for item in items:
|
262 |
-
if item in entity_lower or entity_lower in item:
|
263 |
return True
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
def _filter_entities_semantically(self, entities: List[str]) -> List[str]:
|
267 |
"""Filter entities based on semantic relevance"""
|
@@ -688,12 +692,13 @@ class RealGASMInterface:
|
|
688 |
logger.warning(f"Consistency verification failed: {consistency_error}")
|
689 |
consistency_results = {'warning': 'verification_failed'}
|
690 |
|
691 |
-
# Create entity data with real GASM positions
|
|
|
692 |
real_entities = []
|
693 |
-
for i, entity in enumerate(
|
694 |
real_entities.append({
|
695 |
'name': entity,
|
696 |
-
'type': self.classify_entity_type(entity),
|
697 |
'position': final_positions[i].tolist(),
|
698 |
'confidence': 0.95 # High confidence for real GASM results
|
699 |
})
|
@@ -718,44 +723,130 @@ class RealGASMInterface:
|
|
718 |
logger.error(f"Real GASM forward pass failed: {e}")
|
719 |
raise e
|
720 |
|
721 |
-
def
|
722 |
-
"""Classify entity type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
723 |
entity_lower = entity.lower()
|
724 |
|
725 |
-
#
|
726 |
-
for
|
727 |
-
for subcategory, items in subcategories.items():
|
728 |
-
if entity_lower in items:
|
729 |
-
if category == 'technical_objects':
|
730 |
-
if subcategory == 'robotics':
|
731 |
-
return 'robotic'
|
732 |
-
elif subcategory == 'industrial':
|
733 |
-
return 'industrial'
|
734 |
-
elif subcategory == 'scientific':
|
735 |
-
return 'scientific'
|
736 |
-
else:
|
737 |
-
return 'technical'
|
738 |
-
elif category == 'physical_objects':
|
739 |
-
return 'physical'
|
740 |
-
elif category == 'spatial_objects':
|
741 |
-
return 'spatial'
|
742 |
-
elif category == 'scientific_entities':
|
743 |
-
return 'scientific'
|
744 |
-
|
745 |
-
# Fallback patterns for backwards compatibility
|
746 |
-
if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor']):
|
747 |
return 'robotic'
|
748 |
-
elif any(word in entity_lower for word in ['conveyor', 'machine', 'equipment', 'system']):
|
749 |
-
return 'industrial'
|
750 |
-
elif any(word in entity_lower for word in ['
|
751 |
return 'scientific'
|
752 |
-
elif any(word in entity_lower for word in ['
|
753 |
-
return '
|
754 |
-
elif any(word in entity_lower for word in ['
|
|
|
|
|
755 |
return 'spatial'
|
|
|
|
|
756 |
else:
|
757 |
return 'unknown'
|
758 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
759 |
def process_with_real_gasm(
|
760 |
self,
|
761 |
text: str,
|
@@ -824,9 +915,10 @@ class RealGASMInterface:
|
|
824 |
) -> Dict[str, Any]:
|
825 |
"""Enhanced simulation when real GASM fails"""
|
826 |
try:
|
827 |
-
# Create realistic entity data
|
|
|
828 |
entity_data = []
|
829 |
-
for i, entity in enumerate(
|
830 |
# Generate more realistic positions based on text analysis
|
831 |
angle = (i * 2 * np.pi) / max(len(entities), 3)
|
832 |
radius = 2 + i * 0.3
|
@@ -839,7 +931,7 @@ class RealGASMInterface:
|
|
839 |
|
840 |
entity_data.append({
|
841 |
'name': entity,
|
842 |
-
'type': self.classify_entity_type(entity),
|
843 |
'position': position,
|
844 |
'confidence': min(0.9, 0.6 + len(entity) * 0.02)
|
845 |
})
|
|
|
74 |
self.tokenizer = None
|
75 |
self.last_gasm_results = None # Store last results for visualization
|
76 |
|
77 |
+
# Semantic prototype words for dynamic classification using word vectors
|
78 |
+
self.semantic_prototypes = {
|
79 |
+
'industrial': ['machine', 'equipment', 'factory', 'production', 'assembly', 'manufacturing'],
|
80 |
+
'robotic': ['robot', 'automation', 'mechanical', 'actuator', 'control', 'artificial'],
|
81 |
+
'scientific': ['research', 'analysis', 'measurement', 'laboratory', 'experiment', 'detection'],
|
82 |
+
'physical': ['object', 'material', 'substance', 'physical', 'tangible', 'solid'],
|
83 |
+
'spatial': ['location', 'position', 'space', 'area', 'place', 'region'],
|
84 |
+
'electronic': ['digital', 'electronic', 'circuit', 'computer', 'technology', 'device'],
|
85 |
+
'furniture': ['furniture', 'seating', 'desk', 'storage', 'household', 'interior'],
|
86 |
+
'tool': ['tool', 'instrument', 'implement', 'equipment', 'utility', 'apparatus'],
|
87 |
+
'vehicle': ['transportation', 'vehicle', 'travel', 'mobility', 'transport', 'automotive']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
|
90 |
+
# Similarity threshold for classification
|
91 |
+
self.similarity_threshold = 0.6
|
92 |
+
|
93 |
# Fallback patterns for when spaCy is not available
|
94 |
self.fallback_entity_patterns = [
|
95 |
# High-confidence patterns
|
|
|
236 |
return self._is_in_semantic_categories(text)
|
237 |
|
238 |
def _is_in_semantic_categories(self, entity: str) -> bool:
|
239 |
+
"""Check if entity belongs to any semantic category using vector similarity"""
|
240 |
+
if not SPACY_AVAILABLE or not nlp:
|
241 |
+
# Fallback to simple pattern matching
|
242 |
+
entity_lower = entity.lower().strip()
|
243 |
+
# Check against all prototype words
|
244 |
+
for category, prototypes in self.semantic_prototypes.items():
|
245 |
+
for prototype in prototypes:
|
246 |
+
if prototype in entity_lower or entity_lower in prototype:
|
|
|
|
|
247 |
return True
|
248 |
+
return False
|
249 |
+
|
250 |
+
try:
|
251 |
+
entity_doc = nlp(entity.lower().strip())
|
252 |
+
if not entity_doc.has_vector:
|
253 |
+
return False
|
254 |
+
|
255 |
+
# Check similarity with any category
|
256 |
+
for category, prototypes in self.semantic_prototypes.items():
|
257 |
+
for prototype in prototypes:
|
258 |
+
prototype_doc = nlp(prototype)
|
259 |
+
if prototype_doc.has_vector:
|
260 |
+
similarity = self._cosine_similarity(entity_doc.vector, prototype_doc.vector)
|
261 |
+
if similarity > self.similarity_threshold:
|
262 |
+
return True
|
263 |
+
|
264 |
+
return False
|
265 |
+
|
266 |
+
except Exception as e:
|
267 |
+
logger.warning(f"Semantic category check failed for '{entity}': {e}")
|
268 |
+
return False
|
269 |
|
270 |
def _filter_entities_semantically(self, entities: List[str]) -> List[str]:
|
271 |
"""Filter entities based on semantic relevance"""
|
|
|
692 |
logger.warning(f"Consistency verification failed: {consistency_error}")
|
693 |
consistency_results = {'warning': 'verification_failed'}
|
694 |
|
695 |
+
# Create entity data with real GASM positions using contextual classification
|
696 |
+
entity_names = [str(e) for e in entities[:len(final_positions)]]
|
697 |
real_entities = []
|
698 |
+
for i, entity in enumerate(entity_names):
|
699 |
real_entities.append({
|
700 |
'name': entity,
|
701 |
+
'type': self.classify_entity_type(entity, entity_names),
|
702 |
'position': final_positions[i].tolist(),
|
703 |
'confidence': 0.95 # High confidence for real GASM results
|
704 |
})
|
|
|
723 |
logger.error(f"Real GASM forward pass failed: {e}")
|
724 |
raise e
|
725 |
|
726 |
+
def classify_entity_type_semantic(self, entity: str) -> str:
|
727 |
+
"""Classify entity type using semantic similarity with spaCy vectors"""
|
728 |
+
if not SPACY_AVAILABLE or not nlp:
|
729 |
+
return self.classify_entity_type_fallback(entity)
|
730 |
+
|
731 |
+
try:
|
732 |
+
# Get entity vector
|
733 |
+
entity_doc = nlp(entity.lower())
|
734 |
+
if not entity_doc.has_vector:
|
735 |
+
return self.classify_entity_type_fallback(entity)
|
736 |
+
|
737 |
+
entity_vector = entity_doc.vector
|
738 |
+
|
739 |
+
best_category = 'unknown'
|
740 |
+
best_similarity = 0.0
|
741 |
+
|
742 |
+
# Compare with each category prototype
|
743 |
+
for category, prototypes in self.semantic_prototypes.items():
|
744 |
+
category_similarities = []
|
745 |
+
|
746 |
+
for prototype in prototypes:
|
747 |
+
prototype_doc = nlp(prototype)
|
748 |
+
if prototype_doc.has_vector:
|
749 |
+
# Calculate cosine similarity
|
750 |
+
similarity = self._cosine_similarity(entity_vector, prototype_doc.vector)
|
751 |
+
category_similarities.append(similarity)
|
752 |
+
|
753 |
+
# Use average similarity for this category
|
754 |
+
if category_similarities:
|
755 |
+
avg_similarity = sum(category_similarities) / len(category_similarities)
|
756 |
+
if avg_similarity > best_similarity and avg_similarity > self.similarity_threshold:
|
757 |
+
best_similarity = avg_similarity
|
758 |
+
best_category = category
|
759 |
+
|
760 |
+
return best_category
|
761 |
+
|
762 |
+
except Exception as e:
|
763 |
+
logger.warning(f"Semantic classification failed for '{entity}': {e}")
|
764 |
+
return self.classify_entity_type_fallback(entity)
|
765 |
+
|
766 |
+
def classify_entity_type_contextual(self, entity: str, context_entities: List[str]) -> str:
|
767 |
+
"""Enhanced classification using context from other entities"""
|
768 |
+
if not SPACY_AVAILABLE or not nlp:
|
769 |
+
return self.classify_entity_type_semantic(entity)
|
770 |
+
|
771 |
+
try:
|
772 |
+
# Get base classification
|
773 |
+
base_type = self.classify_entity_type_semantic(entity)
|
774 |
+
|
775 |
+
# If we got a good classification, use it
|
776 |
+
if base_type != 'unknown':
|
777 |
+
return base_type
|
778 |
+
|
779 |
+
# Try context-based classification
|
780 |
+
entity_doc = nlp(entity.lower())
|
781 |
+
if not entity_doc.has_vector:
|
782 |
+
return base_type
|
783 |
+
|
784 |
+
# Look for semantic relationships with context entities
|
785 |
+
context_types = []
|
786 |
+
for context_entity in context_entities:
|
787 |
+
if context_entity != entity:
|
788 |
+
context_type = self.classify_entity_type_semantic(context_entity)
|
789 |
+
if context_type != 'unknown':
|
790 |
+
context_types.append(context_type)
|
791 |
+
|
792 |
+
# If surrounded by industrial terms, likely industrial
|
793 |
+
if context_types:
|
794 |
+
most_common_type = max(set(context_types), key=context_types.count)
|
795 |
+
|
796 |
+
# Check if entity is semantically related to the dominant context
|
797 |
+
context_doc = nlp(' '.join([t for t in context_entities if t != entity]))
|
798 |
+
if context_doc.has_vector:
|
799 |
+
similarity = self._cosine_similarity(entity_doc.vector, context_doc.vector)
|
800 |
+
if similarity > 0.5: # Lower threshold for context
|
801 |
+
return most_common_type
|
802 |
+
|
803 |
+
return base_type
|
804 |
+
|
805 |
+
except Exception as e:
|
806 |
+
logger.warning(f"Contextual classification failed for '{entity}': {e}")
|
807 |
+
return self.classify_entity_type_semantic(entity)
|
808 |
+
|
809 |
+
def classify_entity_type_fallback(self, entity: str) -> str:
|
810 |
+
"""Fallback classification when spaCy is not available"""
|
811 |
entity_lower = entity.lower()
|
812 |
|
813 |
+
# Simple pattern matching as fallback
|
814 |
+
if any(word in entity_lower for word in ['robot', 'arm', 'sensor', 'motor', 'actuator']):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
815 |
return 'robotic'
|
816 |
+
elif any(word in entity_lower for word in ['conveyor', 'machine', 'equipment', 'system', 'factory', 'production']):
|
817 |
+
return 'industrial'
|
818 |
+
elif any(word in entity_lower for word in ['detector', 'microscope', 'analyzer', 'research', 'laboratory']):
|
819 |
return 'scientific'
|
820 |
+
elif any(word in entity_lower for word in ['computer', 'keyboard', 'monitor', 'screen', 'digital', 'electronic']):
|
821 |
+
return 'electronic'
|
822 |
+
elif any(word in entity_lower for word in ['table', 'chair', 'desk', 'bed', 'sofa', 'furniture']):
|
823 |
+
return 'furniture'
|
824 |
+
elif any(word in entity_lower for word in ['area', 'zone', 'space', 'place', 'location', 'position']):
|
825 |
return 'spatial'
|
826 |
+
elif any(word in entity_lower for word in ['ball', 'object', 'material', 'substance']):
|
827 |
+
return 'physical'
|
828 |
else:
|
829 |
return 'unknown'
|
830 |
|
831 |
+
def classify_entity_type(self, entity: str, context_entities: List[str] = None) -> str:
|
832 |
+
"""Main entity classification function with fallback chain"""
|
833 |
+
if context_entities:
|
834 |
+
return self.classify_entity_type_contextual(entity, context_entities)
|
835 |
+
else:
|
836 |
+
return self.classify_entity_type_semantic(entity)
|
837 |
+
|
838 |
+
def _cosine_similarity(self, vec1, vec2):
|
839 |
+
"""Compute cosine similarity between two vectors"""
|
840 |
+
try:
|
841 |
+
import numpy as np
|
842 |
+
# Normalize vectors
|
843 |
+
vec1_norm = vec1 / np.linalg.norm(vec1)
|
844 |
+
vec2_norm = vec2 / np.linalg.norm(vec2)
|
845 |
+
# Compute cosine similarity
|
846 |
+
return np.dot(vec1_norm, vec2_norm)
|
847 |
+
except:
|
848 |
+
return 0.0
|
849 |
+
|
850 |
def process_with_real_gasm(
|
851 |
self,
|
852 |
text: str,
|
|
|
915 |
) -> Dict[str, Any]:
|
916 |
"""Enhanced simulation when real GASM fails"""
|
917 |
try:
|
918 |
+
# Create realistic entity data with contextual classification
|
919 |
+
entity_names = [str(e) for e in entities]
|
920 |
entity_data = []
|
921 |
+
for i, entity in enumerate(entity_names):
|
922 |
# Generate more realistic positions based on text analysis
|
923 |
angle = (i * 2 * np.pi) / max(len(entities), 3)
|
924 |
radius = 2 + i * 0.3
|
|
|
931 |
|
932 |
entity_data.append({
|
933 |
'name': entity,
|
934 |
+
'type': self.classify_entity_type(entity, entity_names),
|
935 |
'position': position,
|
936 |
'confidence': min(0.9, 0.6 + len(entity) * 0.02)
|
937 |
})
|