Spaces:

mwatkins1970
/

Gemma2BSAEexplorer

Sleeping

App Files Files Community

Nu Appleblossom commited on Aug 24

Commit

7b3b0f0

•

1 Parent(s): 1d40a54

back to last promising version with treebuild crashlog

Browse files

Files changed (1) hide show

app.py +23 -36

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from safetensors import safe_open
 import os
-import io
 import requests
 import json
 import math
@@ -199,15 +198,14 @@ def produce_next_token_ids(input_ids, model, topk, sub_token_id):
     top_k_probs, top_k_ids = torch.topk(softmax_probs, k=topk, dim=-1)
     return top_k_ids[0], top_k_probs[0]
-def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth=0, max_depth=25, cumulative_prob=1.0, output_buffer=None):
     if depth >= max_depth or cumulative_prob < config.CUTOFF:
         return
     current_prompt = tokenizer.decode(input_ids[0], skip_special_tokens=True)
-    # Print the current node information to the buffer
-    print(f"Depth {depth}: {current_prompt}      PROB: {cumulative_prob:.4f}", file=output_buffer)
     top_k_ids, top_k_probs = produce_next_token_ids(input_ids, model, config.TOPK, config.SUB_TOKEN_ID)
@@ -224,6 +222,10 @@ def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth
             continue
         token_str = tokenizer.decode([token_id], skip_special_tokens=True)
         new_child = {
             "token_id": token_id,
@@ -233,11 +235,10 @@ def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth
         }
         data['children'].append(new_child)
-        build_def_tree(new_input_ids, new_child, base_prompt, model, tokenizer, config, depth=depth+1, max_depth=max_depth, cumulative_prob=new_cumulative_prob, output_buffer=output_buffer)
 def generate_definition_tree(base_prompt, embedding, model, tokenizer, config):
     results_dict = {"token": "", "cumulative_prob": 1, "children": []}
-    output_buffer = io.StringIO()
     # Reset the token embedding
     token_embedding = torch.unsqueeze(embedding, dim=0).to(model.device)
@@ -248,14 +249,9 @@ def generate_definition_tree(base_prompt, embedding, model, tokenizer, config):
         model.reset_cache()
     input_ids = tokenizer.encode(base_prompt, return_tensors="pt").to(model.device)
-    build_def_tree(input_ids, results_dict, base_prompt, model, tokenizer, config, output_buffer=output_buffer)
-    tree_output = output_buffer.getvalue()
-    output_buffer.close()
-    return results_dict, tree_output
 def find_max_min_cumulative_weight(node, current_max=0, current_min=float('inf')):
@@ -370,7 +366,7 @@ def process_input(selected_sae, feature_number, weight_type, use_token_centroid,
             if w_enc is None or w_dec is None:
                 error_message = f"Failed to load SAE weights for {selected_sae}. Please try a different SAE or check your connection."
                 logger.error(error_message)
-                return error_message
             w_enc_dict[selected_sae] = w_enc
             w_dec_dict[selected_sae] = w_dec
         else:
@@ -398,29 +394,30 @@ def process_input(selected_sae, feature_number, weight_type, use_token_centroid,
                 # Generate the top 500 list
                 result = ", ".join([f"'{token}': {value:.4f}" for token, value in closest_tokens_with_values])
                 logger.info("Returning top 500 list")
-                return result
             else:
                 # Generate the top 100 list
                 token_list = [token for token, _ in closest_tokens_with_values[:100]]
                 result = f"100 tokens whose embeddings produce the smallest ratio (cos distance to feature vector)^m/(cos distance to token centroid)^n:\n\n"
-                result += f"[{', '.join(repr(token) for token in token_list)}]"
                 logger.info("Returning top 100 tokens")
-                return result
         elif mode == "definition tree generation":
             logger.info("Generating definition tree")
-            tree_data, tree_output = generate_definition_tree("definition tree", feature_vector, model, tokenizer, config)
             max_weight, min_weight = find_max_min_cumulative_weight(tree_data)
             tree_image = create_tree_diagram(tree_data, config, max_weight, min_weight)
-            return tree_output, tree_image
-        return "Mode not recognized or not implemented in this step."
     except Exception as e:
         logger.error(f"Error in process_input: {str(e)}")
-        return f"Error: {str(e)}"
     finally:
         del feature_vector
         del token_centroid
@@ -428,7 +425,6 @@ def process_input(selected_sae, feature_number, weight_type, use_token_centroid,
             del pca_direction
         torch.cuda.empty_cache()
 def trim_tree(trim_cutoff, tree_data):
     max_weight, min_weight = find_max_min_cumulative_weight(tree_data)
@@ -450,17 +446,8 @@ def gradio_interface():
     @spaces.GPU
     def update_output(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode, progress=gr.Progress()):
-        result = process_input(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode, top_500=False, progress=progress)
-        if mode == "definition tree generation":
-            for item in result:
-                if isinstance(item, tuple):
-                    yield item[0], item[1], None  # tree_text, tree_data, None
-                else:
-                    yield item, None, None  # Intermediate updates
-        else:
-            # For cosine distance token lists, result is not a generator
-            yield result, None, None
     @spaces.GPU
     def generate_top_500(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode):
@@ -475,7 +462,7 @@ def gradio_interface():
         return trimmed_tree_image
     with gr.Blocks() as demo:
-        gr.Markdown("# Gemma-2B SAE Feature Explorer")
         with gr.Row():
             with gr.Column(scale=2):
@@ -516,7 +503,7 @@ def gradio_interface():
         generate_btn.click(
             update_output,
             inputs=inputs,
-            outputs=[output_text, output_image, tree_data_state],
             show_progress="full"
         ).then(lambda: gr.update(visible=False, value=""), None, [output_500_text])

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from safetensors import safe_open
 import os
 import requests
 import json
 import math
     top_k_probs, top_k_ids = torch.topk(softmax_probs, k=topk, dim=-1)
     return top_k_ids[0], top_k_probs[0]
+def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth=0, max_depth=25, cumulative_prob=1.0):
     if depth >= max_depth or cumulative_prob < config.CUTOFF:
         return
     current_prompt = tokenizer.decode(input_ids[0], skip_special_tokens=True)
+    # Print the current cumulative definition being built
+    print("\n" + f"Depth {depth}: {current_prompt}      PROB: {cumulative_prob}")
     top_k_ids, top_k_probs = produce_next_token_ids(input_ids, model, config.TOPK, config.SUB_TOKEN_ID)
             continue
         token_str = tokenizer.decode([token_id], skip_special_tokens=True)
+        # Add the token to the current definition being built and print it
+        updated_prompt = f"{current_prompt} {token_str}"
+        print(f"Token: {token_str}, Updated Definition: {updated_prompt}, Cumulative Probability: {new_cumulative_prob}")
         new_child = {
             "token_id": token_id,
         }
         data['children'].append(new_child)
+        build_def_tree(new_input_ids, new_child, base_prompt, model, tokenizer, config, depth=depth+1, max_depth=max_depth, cumulative_prob=new_cumulative_prob)
 def generate_definition_tree(base_prompt, embedding, model, tokenizer, config):
     results_dict = {"token": "", "cumulative_prob": 1, "children": []}
     # Reset the token embedding
     token_embedding = torch.unsqueeze(embedding, dim=0).to(model.device)
         model.reset_cache()
     input_ids = tokenizer.encode(base_prompt, return_tensors="pt").to(model.device)
+    build_def_tree(input_ids, results_dict, base_prompt, model, tokenizer, config)
+    return results_dict
 def find_max_min_cumulative_weight(node, current_max=0, current_min=float('inf')):
             if w_enc is None or w_dec is None:
                 error_message = f"Failed to load SAE weights for {selected_sae}. Please try a different SAE or check your connection."
                 logger.error(error_message)
+                return error_message, None
             w_enc_dict[selected_sae] = w_enc
             w_dec_dict[selected_sae] = w_dec
         else:
                 # Generate the top 500 list
                 result = ", ".join([f"'{token}': {value:.4f}" for token, value in closest_tokens_with_values])
                 logger.info("Returning top 500 list")
+                return result, None
             else:
                 # Generate the top 100 list
                 token_list = [token for token, _ in closest_tokens_with_values[:100]]
                 result = f"100 tokens whose embeddings produce the smallest ratio (cos distance to feature vector)^m/(cos distance to token centroid)^n:\n\n"
+                result += f"[{', '.join(repr(token) for token in token_list)}]\n"
                 logger.info("Returning top 100 tokens")
+                return result, None
         elif mode == "definition tree generation":
             logger.info("Generating definition tree")
+            tree_data = generate_definition_tree("definition tree", feature_vector, model, tokenizer, config)
             max_weight, min_weight = find_max_min_cumulative_weight(tree_data)
             tree_image = create_tree_diagram(tree_data, config, max_weight, min_weight)
+            return None, tree_image
+        return "Mode not recognized or not implemented in this step.", None
     except Exception as e:
         logger.error(f"Error in process_input: {str(e)}")
+        return f"Error: {str(e)}", None
     finally:
         del feature_vector
         del token_centroid
             del pca_direction
         torch.cuda.empty_cache()
 def trim_tree(trim_cutoff, tree_data):
     max_weight, min_weight = find_max_min_cumulative_weight(tree_data)
     @spaces.GPU
     def update_output(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode, progress=gr.Progress()):
+        # Call process_input without generating the top 500 list initially
+        return process_input(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode, top_500=False, progress=progress)
     @spaces.GPU
     def generate_top_500(selected_sae, feature_number, weight_type, use_token_centroid, scaling_factor, use_pca, pca_weight, num_exp, denom_exp, mode):
         return trimmed_tree_image
     with gr.Blocks() as demo:
+        gr.Markdown("# Gemma-2B SAE Feature Explorer (back2crashlogs)")
         with gr.Row():
             with gr.Column(scale=2):
         generate_btn.click(
             update_output,
             inputs=inputs,
+            outputs=[output_text, output_image],
             show_progress="full"
         ).then(lambda: gr.update(visible=False, value=""), None, [output_500_text])