Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Feb 13

Commit

67a34bd

•

1 Parent(s): 6ff516d

feat/fix: fixing attention bug, fixing other mistral bugs

Browse files

Files changed (9) hide show

explanation/attention.py +33 -16
explanation/interpret_captum.py +7 -2
explanation/interpret_shap.py +6 -2
explanation/markup.py +14 -11
explanation/plotting.py +2 -2
main.py +28 -17
pyproject.toml +1 -0
utils/formatting.py +31 -8
utils/modelling.py +11 -0

explanation/attention.py CHANGED Viewed

@@ -2,7 +2,8 @@
 # internal imports
-from utils import formatting as fmt
 from .markup import markup_text
@@ -10,36 +11,52 @@ from .markup import markup_text
 # and marked text based on attention
 def chat_explained(model, prompt):
-    model.set_config({"return_dict": True})
     # get encoded input
-    encoder_input_ids = model.TOKENIZER(
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
-    # generate output together with attentions of the model
-    decoder_input_ids = model.MODEL.generate(
-        encoder_input_ids, output_attentions=True, generation_config=model.CONFIG
-    )
     # get input and output text as list of strings
-    encoder_text = fmt.format_tokens(
-        model.TOKENIZER.convert_ids_to_tokens(encoder_input_ids[0])
-    )
-    decoder_text = fmt.format_tokens(
-        model.TOKENIZER.convert_ids_to_tokens(decoder_input_ids[0])
     )
-    averaged_attention = fmt.avg_attention(decoder_input_ids)
     # format response text for clean output
-    response_text = fmt.format_output_text(decoder_text)
     # setting placeholder for iFrame graphic
     graphic = (
         "<div style='text-align: center; font-family:arial;'><h4>Attention"
         " Visualization doesn't support an interactive graphic.</h4></div>"
     )
     # creating marked text using markup_text function and attention
-    marked_text = markup_text(encoder_text, averaged_attention, variant="visualizer")
     # returning response, graphic and marked text array
     return response_text, graphic, marked_text, None

 # internal imports
+from utils import formatting as fmt, modelling as mdl
+from model import mistral
 from .markup import markup_text
 # and marked text based on attention
 def chat_explained(model, prompt):
     # get encoded input
+    input_ids = model.TOKENIZER(
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
+    # generate output of the  model
+    decoder_ids = model.MODEL.generate(input_ids, generation_config=model.CONFIG)
     # get input and output text as list of strings
+    input_text = fmt.format_tokens(model.TOKENIZER.convert_ids_to_tokens(input_ids[0]))
+    output_text = fmt.format_tokens(
+        model.TOKENIZER.convert_ids_to_tokens(decoder_ids[0])
     )
+    # checking if model is mistral
+    if type(model.MODEL) == type(mistral.MODEL):
+        # get attention values for the input vectors
+        attention_output = model.MODEL(input_ids, output_attentions=True).attentions
+        # averaging attention across layers and heads
+        attention_output = mdl.format_mistral_attention(attention_output)
+        averaged_attention = fmt.avg_attention(attention_output, model="mistral")
+    # attention visualization for godel
+    else:
+        # get attention values for the input and output vectors
+        # using already generated input and output
+        attention_output = model.MODEL(
+            input_ids=input_ids,
+            decoder_input_ids=decoder_ids,
+            output_attentions=True,
+        )
+        # averaging attention across layers
+        averaged_attention = fmt.avg_attention(attention_output, model="godel")
     # format response text for clean output
+    response_text = fmt.format_output_text(output_text)
     # setting placeholder for iFrame graphic
     graphic = (
         "<div style='text-align: center; font-family:arial;'><h4>Attention"
         " Visualization doesn't support an interactive graphic.</h4></div>"
     )
     # creating marked text using markup_text function and attention
+    marked_text = markup_text(input_text, averaged_attention, variant="visualizer")
     # returning response, graphic and marked text array
     return response_text, graphic, marked_text, None

explanation/interpret_captum.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 # internal imports
 from utils import formatting as fmt
 from .markup import markup_text
@@ -26,7 +27,7 @@ def cpt_extract_seq_att(attr):
 def chat_explained(model, prompt):
     model.set_config({})
-    # creating llm attribution class with KernelSHAP and Mistal Model, Tokenizer
     llm_attribution = LLMAttribution(KernelShap(model.MODEL), model.TOKENIZER)
     # generation attribution
@@ -48,7 +49,11 @@ def chat_explained(model, prompt):
     graphic = """<div style='text-align: center; font-family:arial;'><h4>
         Intepretation with Captum doesn't support an interactive graphic.</h4></div>
         """
     marked_text = markup_text(input_tokens, values, variant="captum")
     # return response, graphic and marked_text array
-    return response_text, graphic, marked_text, None

 # internal imports
 from utils import formatting as fmt
+from .plotting import plot_seq
 from .markup import markup_text
 def chat_explained(model, prompt):
     model.set_config({})
+    # creating llm attribution class with KernelSHAP and Mistral Model, Tokenizer
     llm_attribution = LLMAttribution(KernelShap(model.MODEL), model.TOKENIZER)
     # generation attribution
     graphic = """<div style='text-align: center; font-family:arial;'><h4>
         Intepretation with Captum doesn't support an interactive graphic.</h4></div>
         """
+    # create the explanation marked text array
     marked_text = markup_text(input_tokens, values, variant="captum")
+    # creating sequence attribution plot
+    plot = plot_seq(cpt_extract_seq_att(attribution_result), "KernelSHAP")
     # return response, graphic and marked_text array
+    return response_text, graphic, marked_text, plot

explanation/interpret_shap.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 # internal imports
 from utils import formatting as fmt
 from .markup import markup_text
 # global variables
@@ -14,7 +15,7 @@ TEXT_MASKER = None
 # function to extract summarized sequence wise attribution
-def extract_seq_att(shap_values):
     # extracting summed up shap values
     values = fmt.flatten_attribution(shap_values.values[0], 1)
@@ -78,5 +79,8 @@ def chat_explained(model, prompt):
     # create the response text
     response_text = fmt.format_output_text(shap_values.output_names)
     # return response, graphic and marked_text array
-    return response_text, graphic, marked_text, None

 # internal imports
 from utils import formatting as fmt
+from .plotting import plot_seq
 from .markup import markup_text
 # global variables
 # function to extract summarized sequence wise attribution
+def shap_extract_seq_att(shap_values):
     # extracting summed up shap values
     values = fmt.flatten_attribution(shap_values.values[0], 1)
     # create the response text
     response_text = fmt.format_output_text(shap_values.output_names)
+    # creating sequence attribution plot
+    plot = plot_seq(shap_extract_seq_att(shap_values), "PartitionSHAP")
     # return response, graphic and marked_text array
+    return response_text, graphic, marked_text, plot

explanation/markup.py CHANGED Viewed

@@ -25,12 +25,12 @@ def markup_text(input_text: list, text_values: ndarray, variant: str):
     min_val, max_val = np.min(text_values), np.max(text_values)
     # separate the threshold calculation for negative and positive values
-    # visualization negative thresholds are all 0 since attetion always positive
     if variant == "visualizer":
         neg_thresholds = np.linspace(
             0, 0, num=(len(bucket_tags) - 1) // 2 + 1, endpoint=False
         )[1:]
-    # standart config for 5 negative buckets
     else:
         neg_thresholds = np.linspace(
             min_val, 0, num=(len(bucket_tags) - 1) // 2 + 1, endpoint=False
@@ -45,16 +45,19 @@ def markup_text(input_text: list, text_values: ndarray, variant: str):
     # looping over each text snippet and attribution value
     for text, value in zip(input_text, text_values):
-        # setting inital bucket at lowest
-        bucket = "-5"
-        # looping over all bucket and their threshold
-        for i, threshold in zip(bucket_tags, thresholds):
-            # updating assigned bucket if value is above threshold
-            if value >= threshold:
-                bucket = i
-        # finally adding text and bucket assignment to list of tuples
-        marked_text.append((text, str(bucket)))
     # returning list of marked text snippets as list of tuples
     return marked_text

     min_val, max_val = np.min(text_values), np.max(text_values)
     # separate the threshold calculation for negative and positive values
+    # visualization negative thresholds are all 0 since attention always positive
     if variant == "visualizer":
         neg_thresholds = np.linspace(
             0, 0, num=(len(bucket_tags) - 1) // 2 + 1, endpoint=False
         )[1:]
+    # standard config for 5 negative buckets
     else:
         neg_thresholds = np.linspace(
             min_val, 0, num=(len(bucket_tags) - 1) // 2 + 1, endpoint=False
     # looping over each text snippet and attribution value
     for text, value in zip(input_text, text_values):
+        # validating text and skipping empty text/special tokens
+        if text not in ("", fmt.SPECIAL_TOKENS):
+            # setting initial bucket at lowest
+            bucket = "-5"
+            # looping over all bucket and their threshold
+            for i, threshold in zip(bucket_tags, thresholds):
+                # updating assigned bucket if value is above threshold
+                if value >= threshold:
+                    bucket = i
+            # finally adding text and bucket assignment to list of tuples
+            marked_text.append((text, str(bucket)))
     # returning list of marked text snippets as list of tuples
     return marked_text

explanation/plotting.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 import matplotlib.pyplot as plt
-def plot_seq(seq_values: list, method_model: tuple = ("", "")):
     # Separate the tokens and their corresponding importance values
     tokens, importance = zip(*seq_values)
@@ -45,7 +45,7 @@ def plot_seq(seq_values: list, method_model: tuple = ("", "")):
         )
     plt.axhline(0, color="black", linewidth=1)
-    plt.title(f"Input Token Attribution with {method_model[0]} on {method_model[1]}")
     plt.xlabel("Input Tokens", labelpad=0.5)
     plt.ylabel("Attribution")
     plt.xticks(x_positions, tokens, rotation=45)

 import matplotlib.pyplot as plt
+def plot_seq(seq_values: list, method: str = ""):
     # Separate the tokens and their corresponding importance values
     tokens, importance = zip(*seq_values)
         )
     plt.axhline(0, color="black", linewidth=1)
+    plt.title(f"Input Token Attribution with {method}")
     plt.xlabel("Input Tokens", labelpad=0.5)
     plt.ylabel("Attribution")
     plt.xticks(x_positions, tokens, rotation=45)

main.py CHANGED Viewed

@@ -155,7 +155,7 @@ with gr.Blocks(
                     The explanations are based on 10 buckets that range between the
                     lowest negative value (1 to 5) and the highest positive attribution value (6 to 10).
                     **The legend shows the color for each bucket.**
                     *HINT*: This works best in light mode.
                     """)
                     xai_text = gr.HighlightedText(
@@ -210,12 +210,34 @@ with gr.Blocks(
                 gr.Examples(
                     label="Example Questions",
                     examples=[
-                        ["Does money buy happiness?", "", "Mistral", "SHAP"],
-                        ["Does money buy happiness?", "", "Mistral", "Attention"],
                     ],
                     inputs=[
                         user_prompt,
                         knowledge_input,
                         model_selection,
                         xai_selection,
                     ],
@@ -227,32 +249,21 @@ with gr.Blocks(
                     label="Example Questions",
                     examples=[
                         [
-                            "How does a black hole form in space?",
                             (
                                 "Black holes are created when a massive star's core"
                                 " collapses after a supernova, forming an object with"
                                 " gravity so intense that even light cannot escape."
                             ),
                             "GODEL",
                             "SHAP",
                         ],
-                        [
-                            (
-                                "Explain the importance of the Rosetta Stone in"
-                                " understanding ancient languages."
-                            ),
-                            (
-                                "The Rosetta Stone, an ancient Egyptian artifact, was"
-                                " key in decoding hieroglyphs, featuring the same text"
-                                " in three scripts: hieroglyphs, Demotic, and Greek."
-                            ),
-                            "GODEL",
-                            "Attention",
-                        ],
                     ],
                     inputs=[
                         user_prompt,
                         knowledge_input,
                         model_selection,
                         xai_selection,
                     ],

                     The explanations are based on 10 buckets that range between the
                     lowest negative value (1 to 5) and the highest positive attribution value (6 to 10).
                     **The legend shows the color for each bucket.**
                     *HINT*: This works best in light mode.
                     """)
                     xai_text = gr.HighlightedText(
                 gr.Examples(
                     label="Example Questions",
                     examples=[
+                        ["Does money buy happiness?", "", "", "Mistral", "None"],
+                        ["Does money buy happiness?", "", "", "Mistral", "SHAP"],
+                        ["Does money buy happiness?", "", "", "Mistral", "Attention"],
+                        [
+                            "Does money buy happiness?",
+                            "",
+                            (
+                                "Respond from the perspective of a billionaire enjoying"
+                                " life in Dubai"
+                            ),
+                            "Mistral",
+                            "None",
+                        ],
+                        [
+                            "Does money buy happiness?",
+                            "",
+                            (
+                                "Respond from the perspective of a billionaire enjoying"
+                                " life in Dubai"
+                            ),
+                            "Mistral",
+                            "SHAP",
+                        ],
                     ],
                     inputs=[
                         user_prompt,
                         knowledge_input,
+                        system_prompt,
                         model_selection,
                         xai_selection,
                     ],
                     label="Example Questions",
                     examples=[
                         [
+                            "Does money buy happiness?",
                             (
                                 "Black holes are created when a massive star's core"
                                 " collapses after a supernova, forming an object with"
                                 " gravity so intense that even light cannot escape."
                             ),
+                            "",
                             "GODEL",
                             "SHAP",
                         ],
                     ],
                     inputs=[
                         user_prompt,
                         knowledge_input,
+                        system_prompt,
                         model_selection,
                         xai_selection,
                     ],

pyproject.toml CHANGED Viewed

@@ -21,6 +21,7 @@ exclude = '''
 [tool.pylint.messages_control]
 disable = [
     "not-a-mapping",
     "arguments-differ",
     "attribute-defined-outside-init",

 [tool.pylint.messages_control]
 disable = [
+    "unidiomatic-typecheck",
     "not-a-mapping",
     "arguments-differ",
     "attribute-defined-outside-init",

utils/formatting.py CHANGED Viewed

@@ -2,12 +2,31 @@
 # external imports
 import re
 import numpy as np
 from numpy import ndarray
-# function to format the model reponse nicely
-# takes a list of strings and returnings a combined string
 def format_output_text(output: list):
     # remove special tokens from list using other function
@@ -36,8 +55,6 @@ def format_output_text(output: list):
 # format the tokens by removing special tokens and special characters
 def format_tokens(tokens: list):
-    # define special tokens to remove
-    special_tokens = ["[CLS]", "[SEP]", "[PAD]", "[UNK]", "[MASK]", "▁", "Ġ", "</w>"]
     # initialize empty list
     updated_tokens = []
@@ -49,7 +66,7 @@ def format_tokens(tokens: list):
             t = t.lstrip("▁")
         # loop through special tokens list and remove from current token if matched
-        for s in special_tokens:
             t = t.replace(s, "")
         # add token to list
@@ -70,6 +87,12 @@ def flatten_attention(values: ndarray, axis: int = 0):
 # function to get averaged decoder attention from attention values
-def avg_attention(attention_values):
-    attention = attention_values.decoder_attentions[0][0].detach().numpy()
-    return np.mean(attention, axis=0)

 # external imports
 import re
+import torch
 import numpy as np
 from numpy import ndarray
+# globally defined tokens that are removed from the output
+SPECIAL_TOKENS = [
+    "[CLS]",
+    "[SEP]",
+    "[PAD]",
+    "[UNK]",
+    "[MASK]",
+    "▁",
+    "Ġ",
+    "</w>",
+    "<0x0A>",
+    "<0x0D>",
+    "<0x09>",
+    "<s>",
+    "</s>",
+]
+# function to format the model repose nicely
+# takes a list of strings and returning a combined string
 def format_output_text(output: list):
     # remove special tokens from list using other function
 # format the tokens by removing special tokens and special characters
 def format_tokens(tokens: list):
     # initialize empty list
     updated_tokens = []
             t = t.lstrip("▁")
         # loop through special tokens list and remove from current token if matched
+        for s in SPECIAL_TOKENS:
             t = t.replace(s, "")
         # add token to list
 # function to get averaged decoder attention from attention values
+def avg_attention(attention_values, model: str):
+    # check if model is godel
+    if model == "godel":
+        # get attention values for the input and output vectors
+        attention = attention_values.decoder_attentions[0][0].detach().numpy()
+        return np.mean(attention, axis=0)
+    # extracting attention values for mistral
+    attention_np = attention_values.to(torch.device("cpu")).detach().numpy()
+    return np.mean(attention_np, axis=(0, 1, 2))

utils/modelling.py CHANGED Viewed

@@ -97,3 +97,14 @@ def gpu_loading_config(max_memory: str = "15000MB"):
     )
     return n_gpus, max_memory, bnb_config

     )
     return n_gpus, max_memory, bnb_config
+# formatting mistral attention values
+# CREDIT: copied and adapted from BERTViz
+# see https://github.com/jessevig/bertviz
+def format_mistral_attention(attention_values):
+    squeezed = []
+    for layer_attention in attention_values:
+        layer_attention = layer_attention.squeeze(0)
+        squeezed.append(layer_attention)
+    return torch.stack(squeezed)