Spaces:

WordLift
/

brand-llms

Sleeping

App Files Files Community

cyberandy commited on Nov 26, 2024

Commit

94ca202

1 Parent(s): 574ab91

update

Browse files

Files changed (1) hide show

app.py +54 -53

app.py CHANGED Viewed

@@ -11,9 +11,11 @@ import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 @dataclass
 class MarketingFeature:
     """Structure to hold marketing-relevant feature information"""
     feature_id: int
     name: str
     category: str
@@ -22,6 +24,7 @@ class MarketingFeature:
     layer: int
     threshold: float = 0.1
 # Define marketing-relevant features from Gemma Scope
 MARKETING_FEATURES = [
     MarketingFeature(
@@ -30,7 +33,7 @@ MARKETING_FEATURES = [
         category="technical",
         description="Detects technical and specialized terminology",
         interpretation_guide="High activation indicates strong technical focus",
-        layer=20
     ),
     MarketingFeature(
         feature_id=6680,
@@ -38,7 +41,7 @@ MARKETING_FEATURES = [
         category="technical",
         description="Identifies complex technical concepts",
         interpretation_guide="Consider simplifying language if activation is too high",
-        layer=20
     ),
     MarketingFeature(
         feature_id=2,
@@ -46,10 +49,11 @@ MARKETING_FEATURES = [
         category="seo",
         description="Identifies potential SEO keywords",
         interpretation_guide="High activation suggests strong SEO potential",
-        layer=20
     ),
 ]
 class MarketingAnalyzer:
     """Main class for analyzing marketing content using Gemma Scope"""
@@ -67,8 +71,7 @@ class MarketingAnalyzer:
             # Initialize model and tokenizer with token from environment
             self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                device_map='auto'
             )
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -87,25 +90,30 @@ class MarketingAnalyzer:
                 # Load SAE parameters for each feature
                 path = hf_hub_download(
                     repo_id=f"google/gemma-scope-{self.model_size}-pt-res",
-                    filename=f"layer_{feature.layer}/width_16k/average_l0_71/params.npz"
                 )
                 params = np.load(path)
                 self.saes[feature.feature_id] = {
-                    'params': {k: torch.from_numpy(v).to(self.device) for k, v in params.items()},
-                    'feature': feature
                 }
                 logger.info(f"Loaded SAE for feature {feature.feature_id}")
             except Exception as e:
-                logger.error(f"Error loading SAE for feature {feature.feature_id}: {str(e)}")
                 continue
     def analyze_content(self, text: str) -> Dict:
         """Analyze marketing content using loaded SAEs"""
         results = {
-            'text': text,
-            'features': {},
-            'categories': {},
-            'recommendations': []
         }
         try:
@@ -116,14 +124,12 @@ class MarketingAnalyzer:
             # Analyze each feature
             for feature_id, sae_data in self.saes.items():
-                feature = sae_data['feature']
                 layer_output = outputs.hidden_states[feature.layer]
                 # Apply SAE
                 activations = self._apply_sae(
-                    layer_output,
-                    sae_data['params'],
-                    feature.threshold
                 )
                 # Skip BOS token and handle empty activations
@@ -137,25 +143,24 @@ class MarketingAnalyzer:
                 # Record results
                 feature_result = {
-                    'name': feature.name,
-                    'category': feature.category,
-                    'activation_score': mean_activation,
-                    'max_activation': max_activation,
-                    'interpretation': self._interpret_activation(
-                        mean_activation,
-                        feature
-                    )
                 }
-                results['features'][feature_id] = feature_result
                 # Aggregate by category
-                if feature.category not in results['categories']:
-                    results['categories'][feature.category] = []
-                results['categories'][feature.category].append(feature_result)
             # Generate recommendations
-            results['recommendations'] = self._generate_recommendations(results)
         except Exception as e:
             logger.error(f"Error analyzing content: {str(e)}")
@@ -167,18 +172,16 @@ class MarketingAnalyzer:
         self,
         activations: torch.Tensor,
         sae_params: Dict[str, torch.Tensor],
-        threshold: float
     ) -> torch.Tensor:
         """Apply SAE to get feature activations"""
-        pre_acts = activations @ sae_params['W_enc'] + sae_params['b_enc']
-        mask = pre_acts > sae_params['threshold']
         acts = mask * torch.nn.functional.relu(pre_acts)
         return acts
     def _interpret_activation(
-        self,
-        activation: float,
-        feature: MarketingFeature
     ) -> str:
         """Interpret activation patterns for a feature"""
         if activation > 0.8:
@@ -195,13 +198,12 @@ class MarketingAnalyzer:
         try:
             # Get technical features
             tech_features = [
-                f for f in results['features'].values()
-                if f['category'] == 'technical'
             ]
             # Calculate average technical score if we have features
             if tech_features:
-                tech_score = np.mean([f['activation_score'] for f in tech_features])
                 if tech_score > 0.8:
                     recommendations.append(
@@ -216,6 +218,7 @@ class MarketingAnalyzer:
         return recommendations
 def create_gradio_interface():
     """Create Gradio interface for marketing analysis"""
     try:
@@ -227,7 +230,7 @@ def create_gradio_interface():
             inputs=gr.Textbox(),
             outputs=gr.Textbox(),
             title="Marketing Content Analyzer (Error)",
-            description="Failed to initialize. Please check if HF_TOKEN is properly set."
         )
     def analyze(text):
@@ -238,31 +241,29 @@ def create_gradio_interface():
         # Overall category scores
         output += "Category Scores:\n"
-        for category, features in results['categories'].items():
             if features:  # Check if we have features for this category
-                avg_score = np.mean([f['activation_score'] for f in features])
                 output += f"{category.title()}: {avg_score:.2f}\n"
         # Feature details
         output += "\nFeature Details:\n"
-        for feature_id, feature in results['features'].items():
             output += f"\n{feature['name']}:\n"
             output += f"Score: {feature['activation_score']:.2f}\n"
             output += f"Interpretation: {feature['interpretation']}\n"
         # Recommendations
-        if results['recommendations']:
             output += "\nRecommendations:\n"
-            for rec in results['recommendations']:
                 output += f"- {rec}\n"
         return output
     # Create interface with custom theming
     custom_theme = gr.themes.Soft(
-        primary_hue="indigo",
-        secondary_hue="blue",
-        neutral_hue="gray"
     )
     interface = gr.Interface(
@@ -270,7 +271,7 @@ def create_gradio_interface():
         inputs=gr.Textbox(
             lines=5,
             placeholder="Enter your marketing content here...",
-            label="Marketing Content"
         ),
         outputs=gr.Textbox(label="Analysis Results"),
         title="Marketing Content Analyzer",
@@ -278,14 +279,14 @@ def create_gradio_interface():
         examples=[
             ["WordLift is an AI-powered SEO tool"],
             ["Our advanced machine learning algorithms optimize your content"],
-            ["Simple and effective website optimization"]
         ],
-        theme=custom_theme
-    )
     )
     return interface
 if __name__ == "__main__":
     iface = create_gradio_interface()
-    iface.launch()

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 @dataclass
 class MarketingFeature:
     """Structure to hold marketing-relevant feature information"""
     feature_id: int
     name: str
     category: str
     layer: int
     threshold: float = 0.1
 # Define marketing-relevant features from Gemma Scope
 MARKETING_FEATURES = [
     MarketingFeature(
         category="technical",
         description="Detects technical and specialized terminology",
         interpretation_guide="High activation indicates strong technical focus",
+        layer=20,
     ),
     MarketingFeature(
         feature_id=6680,
         category="technical",
         description="Identifies complex technical concepts",
         interpretation_guide="Consider simplifying language if activation is too high",
+        layer=20,
     ),
     MarketingFeature(
         feature_id=2,
         category="seo",
         description="Identifies potential SEO keywords",
         interpretation_guide="High activation suggests strong SEO potential",
+        layer=20,
     ),
 ]
 class MarketingAnalyzer:
     """Main class for analyzing marketing content using Gemma Scope"""
             # Initialize model and tokenizer with token from environment
             self.model = AutoModelForCausalLM.from_pretrained(
+                model_name, device_map="auto"
             )
             self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                 # Load SAE parameters for each feature
                 path = hf_hub_download(
                     repo_id=f"google/gemma-scope-{self.model_size}-pt-res",
+                    filename=f"layer_{feature.layer}/width_16k/average_l0_71/params.npz",
                 )
                 params = np.load(path)
                 self.saes[feature.feature_id] = {
+                    "params": {
+                        k: torch.from_numpy(v).to(self.device)
+                        for k, v in params.items()
+                    },
+                    "feature": feature,
                 }
                 logger.info(f"Loaded SAE for feature {feature.feature_id}")
             except Exception as e:
+                logger.error(
+                    f"Error loading SAE for feature {feature.feature_id}: {str(e)}"
+                )
                 continue
     def analyze_content(self, text: str) -> Dict:
         """Analyze marketing content using loaded SAEs"""
         results = {
+            "text": text,
+            "features": {},
+            "categories": {},
+            "recommendations": [],
         }
         try:
             # Analyze each feature
             for feature_id, sae_data in self.saes.items():
+                feature = sae_data["feature"]
                 layer_output = outputs.hidden_states[feature.layer]
                 # Apply SAE
                 activations = self._apply_sae(
+                    layer_output, sae_data["params"], feature.threshold
                 )
                 # Skip BOS token and handle empty activations
                 # Record results
                 feature_result = {
+                    "name": feature.name,
+                    "category": feature.category,
+                    "activation_score": mean_activation,
+                    "max_activation": max_activation,
+                    "interpretation": self._interpret_activation(
+                        mean_activation, feature
+                    ),
                 }
+                results["features"][feature_id] = feature_result
                 # Aggregate by category
+                if feature.category not in results["categories"]:
+                    results["categories"][feature.category] = []
+                results["categories"][feature.category].append(feature_result)
             # Generate recommendations
+            results["recommendations"] = self._generate_recommendations(results)
         except Exception as e:
             logger.error(f"Error analyzing content: {str(e)}")
         self,
         activations: torch.Tensor,
         sae_params: Dict[str, torch.Tensor],
+        threshold: float,
     ) -> torch.Tensor:
         """Apply SAE to get feature activations"""
+        pre_acts = activations @ sae_params["W_enc"] + sae_params["b_enc"]
+        mask = pre_acts > sae_params["threshold"]
         acts = mask * torch.nn.functional.relu(pre_acts)
         return acts
     def _interpret_activation(
+        self, activation: float, feature: MarketingFeature
     ) -> str:
         """Interpret activation patterns for a feature"""
         if activation > 0.8:
         try:
             # Get technical features
             tech_features = [
+                f for f in results["features"].values() if f["category"] == "technical"
             ]
             # Calculate average technical score if we have features
             if tech_features:
+                tech_score = np.mean([f["activation_score"] for f in tech_features])
                 if tech_score > 0.8:
                     recommendations.append(
         return recommendations
 def create_gradio_interface():
     """Create Gradio interface for marketing analysis"""
     try:
             inputs=gr.Textbox(),
             outputs=gr.Textbox(),
             title="Marketing Content Analyzer (Error)",
+            description="Failed to initialize. Please check if HF_TOKEN is properly set.",
         )
     def analyze(text):
         # Overall category scores
         output += "Category Scores:\n"
+        for category, features in results["categories"].items():
             if features:  # Check if we have features for this category
+                avg_score = np.mean([f["activation_score"] for f in features])
                 output += f"{category.title()}: {avg_score:.2f}\n"
         # Feature details
         output += "\nFeature Details:\n"
+        for feature_id, feature in results["features"].items():
             output += f"\n{feature['name']}:\n"
             output += f"Score: {feature['activation_score']:.2f}\n"
             output += f"Interpretation: {feature['interpretation']}\n"
         # Recommendations
+        if results["recommendations"]:
             output += "\nRecommendations:\n"
+            for rec in results["recommendations"]:
                 output += f"- {rec}\n"
         return output
     # Create interface with custom theming
     custom_theme = gr.themes.Soft(
+        primary_hue="indigo", secondary_hue="blue", neutral_hue="gray"
     )
     interface = gr.Interface(
         inputs=gr.Textbox(
             lines=5,
             placeholder="Enter your marketing content here...",
+            label="Marketing Content",
         ),
         outputs=gr.Textbox(label="Analysis Results"),
         title="Marketing Content Analyzer",
         examples=[
             ["WordLift is an AI-powered SEO tool"],
             ["Our advanced machine learning algorithms optimize your content"],
+            ["Simple and effective website optimization"],
         ],
+        theme=custom_theme,
     )
     return interface
 if __name__ == "__main__":
     iface = create_gradio_interface()
+    iface.launch()