Spaces:

mwatkins1970
/

Gemma2BSAEexplorer

Sleeping

App Files Files Community

Nu Appleblossom commited on Aug 23

Commit

c9aa04e

•

1 Parent(s): 95a8028

next attempt at tree functionality 6

Browse files

Files changed (1) hide show

app.py +20 -6

app.py CHANGED Viewed

@@ -194,12 +194,26 @@ def produce_next_token_ids(input_ids, model, topk, sub_token_id):
     with torch.no_grad():
         outputs = model(input_ids)
         logits = outputs.logits
     last_logits = logits[:, -1, :]
     last_logits[:, sub_token_id] = float('-inf')
     softmax_probs = torch.softmax(last_logits, dim=-1)
     top_k_probs, top_k_ids = torch.topk(softmax_probs, k=topk, dim=-1)
-    return top_k_ids[0], top_k_probs[0]
 def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth=0, max_depth=25, cumulative_prob=1.0, progress_callback=None):
     if depth >= max_depth or cumulative_prob < config.CUTOFF:
@@ -209,10 +223,10 @@ def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth
     if progress_callback:
         progress_callback(f"Depth {depth}: {current_prompt}      PROB: {cumulative_prob}\n")
-    try:
-        top_k_ids, top_k_probs = produce_next_token_ids(input_ids, model, config.TOPK, config.SUB_TOKEN_ID)
-    except Exception as e:
-        logger.error(f"Error generating next token IDs at depth {depth}: {str(e)}")
         return
     for idx, token_id in enumerate(top_k_ids.tolist()):
@@ -247,7 +261,6 @@ def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth
         build_def_tree(new_input_ids, new_child, base_prompt, model, tokenizer, config, depth=depth+1, max_depth=max_depth, cumulative_prob=new_cumulative_prob, progress_callback=progress_callback)
 def generate_definition_tree(base_prompt, embedding, model, tokenizer, config, progress_callback=None):
     results_dict = {"token": "", "cumulative_prob": 1, "children": []}
@@ -267,6 +280,7 @@ def generate_definition_tree(base_prompt, embedding, model, tokenizer, config, p
 def find_max_min_cumulative_weight(node, current_max=0, current_min=float('inf')):
     current_max = max(current_max, node.get('cumulative_prob', 0))
     if node.get('cumulative_prob', 1) > 0:

     with torch.no_grad():
         outputs = model(input_ids)
         logits = outputs.logits
+    if logits.size(1) == 0:  # Check if there are logits to process
+        logger.error("Logits are empty. Cannot produce next token IDs.")
+        return None, None
     last_logits = logits[:, -1, :]
+    if last_logits.size(0) == 0 or last_logits.size(1) == 0:  # Check if last logits are valid
+        logger.error("Last logits are empty. Cannot produce next token IDs.")
+        return None, None
     last_logits[:, sub_token_id] = float('-inf')
     softmax_probs = torch.softmax(last_logits, dim=-1)
     top_k_probs, top_k_ids = torch.topk(softmax_probs, k=topk, dim=-1)
+    if top_k_ids.size(0) == 0 or top_k_probs.size(0) == 0:  # Check if we successfully got top-k IDs and probabilities
+        logger.error("Top-k IDs or probabilities are empty. Cannot produce next token IDs.")
+        return None, None
+    return top_k_ids[0], top_k_probs[0]
 def build_def_tree(input_ids, data, base_prompt, model, tokenizer, config, depth=0, max_depth=25, cumulative_prob=1.0, progress_callback=None):
     if depth >= max_depth or cumulative_prob < config.CUTOFF:
     if progress_callback:
         progress_callback(f"Depth {depth}: {current_prompt}      PROB: {cumulative_prob}\n")
+    top_k_ids, top_k_probs = produce_next_token_ids(input_ids, model, config.TOPK, config.SUB_TOKEN_ID)
+    if top_k_ids is None or top_k_probs is None:  # Ensure that top_k_ids and top_k_probs are valid before proceeding
+        logger.error(f"Failed to generate next token IDs at depth {depth}.")
         return
     for idx, token_id in enumerate(top_k_ids.tolist()):
         build_def_tree(new_input_ids, new_child, base_prompt, model, tokenizer, config, depth=depth+1, max_depth=max_depth, cumulative_prob=new_cumulative_prob, progress_callback=progress_callback)
 def generate_definition_tree(base_prompt, embedding, model, tokenizer, config, progress_callback=None):
     results_dict = {"token": "", "cumulative_prob": 1, "children": []}
 def find_max_min_cumulative_weight(node, current_max=0, current_min=float('inf')):
     current_max = max(current_max, node.get('cumulative_prob', 0))
     if node.get('cumulative_prob', 1) > 0: