Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Feb 13, 2024

Commit

f301e04

unverified ·

1 Parent(s): 67a34bd

fix: fixing for plotting and attention visualization

Browse files

Files changed (6) hide show

backend/controller.py +44 -43
explanation/attention.py +2 -2
explanation/plotting.py +1 -3
model/mistral.py +0 -1
utils/formatting.py +10 -2
utils/modelling.py +1 -1

backend/controller.py CHANGED Viewed

@@ -14,8 +14,47 @@ from explanation import (
 )
-# main interference function that that calls chat functions depending on selections
-# is getting called on every chat submit
 def interference(
     prompt: str,
     history: list,
@@ -31,6 +70,7 @@ def interference(
             Always answer as helpfully as possible, while being safe.
         """
     if model_selection.lower() == "mistral":
         model = mistral
         print("Indentified model as Mistral")
@@ -39,6 +79,7 @@ def interference(
         print("Indentified model as GODEL")
     # if a XAI approach is selected, grab the XAI module instance
     if xai_selection in ("SHAP", "Attention"):
         # matching selection
         match xai_selection.lower():
@@ -71,7 +112,7 @@ def interference(
         )
     # if no XAI approach is selected call the vanilla chat function
     else:
-        # call the vanilla chat function
         prompt_output, history_output = vanilla_chat(
             model=model,
             message=prompt,
@@ -91,43 +132,3 @@ def interference(
     # return the outputs
     return prompt_output, history_output, xai_interactive, xai_markup, xai_plot
-# simple chat function that calls the model
-# formats prompts, calls for an answer and returns updated conversation history
-def vanilla_chat(
-    model, message: str, history: list, system_prompt: str, knowledge: str = ""
-):
-    print(f"Running normal chat with {model}.")
-    # formatting the prompt using the model's format_prompt function
-    prompt = model.format_prompt(message, history, system_prompt, knowledge)
-    # generating an answer using the model's respond function
-    answer = model.respond(prompt)
-    # updating the chat history with the new answer
-    history.append((message, answer))
-    # returning the updated history
-    return "", history
-def explained_chat(
-    model, xai, message: str, history: list, system_prompt: str, knowledge: str = ""
-):
-    print(f"Running explained chat with {xai} with {model}.")
-    # formatting the prompt using the model's format_prompt function
-    # message, history, system_prompt, knowledge = mdl.prompt_limiter(
-    #    message, history, system_prompt, knowledge
-    # )
-    prompt = model.format_prompt(message, history, system_prompt, knowledge)
-    # generating an answer using the methods chat function
-    answer, xai_graphic, xai_markup, xai_plot = xai.chat_explained(model, prompt)
-    # updating the chat history with the new answer
-    history.append((message, answer))
-    # returning the updated history, xai graphic and xai plot elements
-    return "", history, xai_graphic, xai_markup, xai_plot

 )
+# simple chat function that calls the model
+# formats prompts, calls for an answer and returns updated conversation history
+def vanilla_chat(
+    model, message: str, history: list, system_prompt: str, knowledge: str = ""
+):
+    print(f"Running normal chat with {model}.")
+    # formatting the prompt using the model's format_prompt function
+    prompt = model.format_prompt(message, history, system_prompt, knowledge)
+    # generating an answer using the model's respond function
+    answer = model.respond(prompt)
+    # updating the chat history with the new answer
+    history.append((message, answer))
+    # returning the updated history
+    return "", history
+def explained_chat(
+    model, xai, message: str, history: list, system_prompt: str, knowledge: str = ""
+):
+    print(f"Running explained chat with {xai} with {model}.")
+    # formatting the prompt using the model's format_prompt function
+    # message, history, system_prompt, knowledge = mdl.prompt_limiter(
+    #    message, history, system_prompt, knowledge
+    # )
+    prompt = model.format_prompt(message, history, system_prompt, knowledge)
+    # generating an answer using the methods chat function
+    answer, xai_graphic, xai_markup, xai_plot = xai.chat_explained(model, prompt)
+    # updating the chat history with the new answer
+    history.append((message, answer))
+    # returning the updated history, xai graphic and xai plot elements
+    return "", history, xai_graphic, xai_markup, xai_plot
+# main interference function that calls chat functions depending on selections
 def interference(
     prompt: str,
     history: list,
             Always answer as helpfully as possible, while being safe.
         """
+    # if a model is selected, grab the model instance
     if model_selection.lower() == "mistral":
         model = mistral
         print("Indentified model as Mistral")
         print("Indentified model as GODEL")
     # if a XAI approach is selected, grab the XAI module instance
+    # and call the explained chat function
     if xai_selection in ("SHAP", "Attention"):
         # matching selection
         match xai_selection.lower():
         )
     # if no XAI approach is selected call the vanilla chat function
     else:
+        # calling the vanilla chat function
         prompt_output, history_output = vanilla_chat(
             model=model,
             message=prompt,
     # return the outputs
     return prompt_output, history_output, xai_interactive, xai_markup, xai_plot

explanation/attention.py CHANGED Viewed

@@ -28,14 +28,14 @@ def chat_explained(model, prompt):
     # checking if model is mistral
     if type(model.MODEL) == type(mistral.MODEL):
-        # get attention values for the input vectors
         attention_output = model.MODEL(input_ids, output_attentions=True).attentions
         # averaging attention across layers and heads
         attention_output = mdl.format_mistral_attention(attention_output)
         averaged_attention = fmt.avg_attention(attention_output, model="mistral")
-    # attention visualization for godel
     else:
         # get attention values for the input and output vectors
         # using already generated input and output

     # checking if model is mistral
     if type(model.MODEL) == type(mistral.MODEL):
+        # get attention values for the input vectors, specific to mistral
         attention_output = model.MODEL(input_ids, output_attentions=True).attentions
         # averaging attention across layers and heads
         attention_output = mdl.format_mistral_attention(attention_output)
         averaged_attention = fmt.avg_attention(attention_output, model="mistral")
+    # otherwise use attention visualization for godel
     else:
         # get attention values for the input and output vectors
         # using already generated input and output

explanation/plotting.py CHANGED Viewed

@@ -12,7 +12,6 @@ def plot_seq(seq_values: list, method: str = ""):
     # Convert importance values to numpy array for conditional coloring
     importance = np.array(importance)
-    importance = importance.log
     # Determine the colors based on the sign of the importance values
     colors = ["#ff0051" if val > 0 else "#008bfb" for val in importance]
@@ -22,9 +21,8 @@ def plot_seq(seq_values: list, method: str = ""):
     x_positions = range(len(tokens))  # Positions for the bars
     # Creating vertical bar plot
-    bar_width = 0.8  # Increase this value to make the bars wider
     plt.bar(x_positions, importance, color=colors, align="center", width=bar_width)
-    plt.yscale("symlog")
     # Annotating each bar with its value
     padding = 0.1  # Padding for text annotation

     # Convert importance values to numpy array for conditional coloring
     importance = np.array(importance)
     # Determine the colors based on the sign of the importance values
     colors = ["#ff0051" if val > 0 else "#008bfb" for val in importance]
     x_positions = range(len(tokens))  # Positions for the bars
     # Creating vertical bar plot
+    bar_width = 0.8
     plt.bar(x_positions, importance, color=colors, align="center", width=bar_width)
     # Annotating each bar with its value
     padding = 0.1  # Padding for text annotation

model/mistral.py CHANGED Viewed

@@ -31,7 +31,6 @@ CONFIG = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
 base_config_dict = {
     "temperature": 0.7,
     "max_new_tokens": 64,
-    "max_length": 64,
     "top_p": 0.9,
     "repetition_penalty": 1.2,
     "do_sample": True,

 base_config_dict = {
     "temperature": 0.7,
     "max_new_tokens": 64,
     "top_p": 0.9,
     "repetition_penalty": 1.2,
     "do_sample": True,

utils/formatting.py CHANGED Viewed

@@ -88,11 +88,19 @@ def flatten_attention(values: ndarray, axis: int = 0):
 # function to get averaged decoder attention from attention values
 def avg_attention(attention_values, model: str):
     # check if model is godel
     if model == "godel":
         # get attention values for the input and output vectors
         attention = attention_values.decoder_attentions[0][0].detach().numpy()
         return np.mean(attention, axis=0)
     # extracting attention values for mistral
-    attention_np = attention_values.to(torch.device("cpu")).detach().numpy()
-    return np.mean(attention_np, axis=(0, 1, 2))

 # function to get averaged decoder attention from attention values
 def avg_attention(attention_values, model: str):
     # check if model is godel
     if model == "godel":
         # get attention values for the input and output vectors
         attention = attention_values.decoder_attentions[0][0].detach().numpy()
         return np.mean(attention, axis=0)
     # extracting attention values for mistral
+    attention = attention_values.to(torch.device("cpu")).detach().numpy()
+    # removing the last dimension and transposing to get the correct shape
+    attention = attention[:, :, :, 0]
+    attention = attention.transpose
+    # return the averaged attention values
+    return np.mean(attention, axis=1)

utils/modelling.py CHANGED Viewed

@@ -107,4 +107,4 @@ def format_mistral_attention(attention_values):
     for layer_attention in attention_values:
         layer_attention = layer_attention.squeeze(0)
         squeezed.append(layer_attention)
-    return torch.stack(squeezed)

     for layer_attention in attention_values:
         layer_attention = layer_attention.squeeze(0)
         squeezed.append(layer_attention)
+    return torch.stack(squeezed).to(torch.device("cpu"))