Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Feb 13, 2024

Commit

517fd4c

unverified ·

1 Parent(s): 1f063be

fix: final fix of attention

Browse files

Files changed (6) hide show

explanation/attention.py +2 -2
explanation/markup.py +1 -1
main.py +4 -4
model/mistral.py +2 -3
utils/formatting.py +2 -3
utils/modelling.py +7 -3

explanation/attention.py CHANGED Viewed

@@ -37,8 +37,8 @@ def chat_explained(model, prompt):
         attention_output = mdl.format_mistral_attention(attention_output)
         averaged_attention = fmt.avg_attention(attention_output, model="mistral")
-        response_text = fmt.format_output_text(output_text)
-        response_text = mistral.format_answer(response_text)
     # otherwise use attention visualization for godel
     else:

         attention_output = mdl.format_mistral_attention(attention_output)
         averaged_attention = fmt.avg_attention(attention_output, model="mistral")
+        output_text = fmt.format_output_text(output_text)
+        response_text = mistral.format_answer(output_text)
     # otherwise use attention visualization for godel
     else:

explanation/markup.py CHANGED Viewed

@@ -10,7 +10,7 @@ from utils import formatting as fmt
 # main function that assigns each text snipped a marked bucket
 def markup_text(input_text: list, text_values: ndarray, variant: str):
-    print(f"Marking up text {input_text} and {text_values} for {variant}.")
     # naming of the 11 buckets
     bucket_tags = ["-5", "-4", "-3", "-2", "-1", "0", "+1", "+2", "+3", "+4", "+5"]

 # main function that assigns each text snipped a marked bucket
 def markup_text(input_text: list, text_values: ndarray, variant: str):
+    print(f"Marking up text {input_text} for {variant}.")
     # naming of the 11 buckets
     bucket_tags = ["-5", "-4", "-3", "-2", "-1", "0", "+1", "+2", "+3", "+4", "+5"]

main.py CHANGED Viewed

@@ -252,8 +252,8 @@ with gr.Blocks(
                     ],
                     inputs=[
                         user_prompt,
-                        system_prompt,
                         xai_selection,
                         model_selection,
                         knowledge_input,
                     ],
@@ -266,6 +266,7 @@ with gr.Blocks(
                     examples=[
                         [
                             "Does money buy happiness?",
                             (
                                 "Some studies have found a correlation between income"
                                 " and happiness, but this relationship often has"
@@ -275,10 +276,10 @@ with gr.Blocks(
                             ),
                             "",
                             "GODEL",
-                            "SHAP",
                         ],
                         [
                             "Does money buy happiness?",
                             (
                                 "Some studies have found a correlation between income"
                                 " and happiness, but this relationship often has"
@@ -288,14 +289,13 @@ with gr.Blocks(
                             ),
                             "",
                             "GODEL",
-                            "Attention",
                         ],
                         [
                             "Does money buy happiness?",
                             "",
                             "",
                             "GODEL",
-                            "Attention",
                         ],
                     ],
                     inputs=[

                     ],
                     inputs=[
                         user_prompt,
                         xai_selection,
+                        system_prompt,
                         model_selection,
                         knowledge_input,
                     ],
                     examples=[
                         [
                             "Does money buy happiness?",
+                            "SHAP",
                             (
                                 "Some studies have found a correlation between income"
                                 " and happiness, but this relationship often has"
                             ),
                             "",
                             "GODEL",
                         ],
                         [
                             "Does money buy happiness?",
+                            "Attention",
                             (
                                 "Some studies have found a correlation between income"
                                 " and happiness, but this relationship often has"
                             ),
                             "",
                             "GODEL",
                         ],
                         [
                             "Does money buy happiness?",
+                            "Attention",
                             "",
                             "",
                             "GODEL",
                         ],
                     ],
                     inputs=[

model/mistral.py CHANGED Viewed

@@ -32,12 +32,11 @@ TOKENIZER = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
 # default model config
 CONFIG = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
 base_config_dict = {
-    "temperature": 0.7,
-    "max_new_tokens": 64,
     "top_p": 0.9,
     "repetition_penalty": 1.2,
     "do_sample": True,
-    "seed": 42,
 }
 CONFIG.update(**base_config_dict)

 # default model config
 CONFIG = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
 base_config_dict = {
+    "temperature": 1,
+    "max_new_tokens": 100,
     "top_p": 0.9,
     "repetition_penalty": 1.2,
     "do_sample": True,
 }
 CONFIG.update(**base_config_dict)

utils/formatting.py CHANGED Viewed

@@ -92,15 +92,14 @@ def avg_attention(attention_values, model: str):
     # check if model is godel
     if model == "godel":
         # get attention values for the input and output vectors
-        attention = attention_values.decoder_attentions[0][0].detach().numpy()
-        return np.mean(attention, axis=0)
     # extracting attention values for mistral
     attention = attention_values.to(torch.device("cpu")).detach().numpy()
     # removing the last dimension and transposing to get the correct shape
     attention = attention[:, :, :, 0]
-    attention = attention.transpose()
     # return the averaged attention values
     return np.mean(attention, axis=1)

     # check if model is godel
     if model == "godel":
         # get attention values for the input and output vectors
+        attention = attention_values.encoder_attentions[0][0].detach().numpy()
+        return np.mean(attention, axis=1)
     # extracting attention values for mistral
     attention = attention_values.to(torch.device("cpu")).detach().numpy()
     # removing the last dimension and transposing to get the correct shape
     attention = attention[:, :, :, 0]
     # return the averaged attention values
     return np.mean(attention, axis=1)

utils/modelling.py CHANGED Viewed

@@ -100,11 +100,15 @@ def gpu_loading_config(max_memory: str = "15000MB"):
 # formatting mistral attention values
-# CREDIT: copied and adapted from BERTViz
 # see https://github.com/jessevig/bertviz
-def format_mistral_attention(attention_values):
     squeezed = []
     for layer_attention in attention_values:
         layer_attention = layer_attention.squeeze(0)
         squeezed.append(layer_attention)
-    return torch.stack(squeezed).to(torch.device("cpu"))

 # formatting mistral attention values
+# CREDIT: copied from BERTViz
 # see https://github.com/jessevig/bertviz
+def format_mistral_attention(attention_values, layers=None, heads=None):
+    if layers:
+        attention_values = [attention_values[layer_index] for layer_index in layers]
     squeezed = []
     for layer_attention in attention_values:
         layer_attention = layer_attention.squeeze(0)
+        if heads:
+            layer_attention = layer_attention[heads]
         squeezed.append(layer_attention)
+    return torch.stack(squeezed)