Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Feb 8

Commit

c7e16d0

•

1 Parent(s): b0721f8

fix: fixing attention visualization

Browse files

Files changed (6) hide show

explanation/attention.py +22 -10
explanation/interpret_captum.py +1 -3
explanation/interpret_shap.py +1 -1
main.py +13 -7
model/godel.py +9 -2
model/mistral.py +1 -0

explanation/attention.py CHANGED Viewed

@@ -3,18 +3,22 @@
 # internal imports
 from utils import formatting as fmt
 from .markup import markup_text
 # chat function that returns an answer
 # and marked text based on attention
 def chat_explained(model, prompt):
     # get encoded input
     encoder_input_ids = model.TOKENIZER(
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
     # generate output together with attentions of the model
-    decoder_input_ids = model.MODEL.generate(
         encoder_input_ids, output_attentions=True, generation_config=model.CONFIG
     )
@@ -26,16 +30,24 @@ def chat_explained(model, prompt):
         model.TOKENIZER.convert_ids_to_tokens(decoder_input_ids[0])
     )
-    # get attention values for the input and output vectors
-    # using already generated input and output
-    attention_output = model.MODEL(
-        input_ids=encoder_input_ids,
-        decoder_input_ids=decoder_input_ids,
-        output_attentions=True,
-    )
-    # averaging attention across layers
-    averaged_attention = fmt.avg_attention(attention_output)
     # format response text for clean output
     response_text = fmt.format_output_text(decoder_text)

 # internal imports
 from utils import formatting as fmt
+from model import godel
 from .markup import markup_text
 # chat function that returns an answer
 # and marked text based on attention
 def chat_explained(model, prompt):
+    model.set_config({"return_dict": True})
     # get encoded input
     encoder_input_ids = model.TOKENIZER(
         prompt, return_tensors="pt", add_special_tokens=True
     ).input_ids
     # generate output together with attentions of the model
+    decoder_input_ids = model.MODEL(
         encoder_input_ids, output_attentions=True, generation_config=model.CONFIG
     )
         model.TOKENIZER.convert_ids_to_tokens(decoder_input_ids[0])
     )
+    # getting attention if model is godel
+    if isinstance(model, godel):
+        print("attention.py: Model detected to be GODEL")
+        # get attention values for the input and output vectors
+        # using already generated input and output
+        attention_output = model.MODEL(
+            input_ids=encoder_input_ids,
+            decoder_input_ids=decoder_input_ids,
+            output_attentions=True,
+        )
+        # averaging attention across layers
+        averaged_attention = fmt.avg_attention(attention_output)
+    # getting attention is model is mistral
+    else:
+        averaged_attention = fmt.avg_attention(decoder_input_ids)
     # format response text for clean output
     response_text = fmt.format_output_text(decoder_text)

explanation/interpret_captum.py CHANGED Viewed

@@ -45,11 +45,9 @@ def chat_explained(model, prompt):
     # getting response text, graphic placeholder and marked text object
     response_text = fmt.format_output_text(attribution_result.output_tokens)
-    graphic = (
-        """<div style='text-align: center; font-family:arial;'><h4>
         Intepretation with Captum doesn't support an interactive graphic.</h4></div>
         """
-    )
     marked_text = markup_text(input_tokens, values, variant="captum")
     # return response, graphic and marked_text array

     # getting response text, graphic placeholder and marked text object
     response_text = fmt.format_output_text(attribution_result.output_tokens)
+    graphic = """<div style='text-align: center; font-family:arial;'><h4>
         Intepretation with Captum doesn't support an interactive graphic.</h4></div>
         """
     marked_text = markup_text(input_tokens, values, variant="captum")
     # return response, graphic and marked_text array

explanation/interpret_shap.py CHANGED Viewed

@@ -32,7 +32,7 @@ def wrap_shap(model):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # updating the model settings
-    model.set_config()
     # (re)initialize the shap models and masker
     # creating a shap text_generation model

     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # updating the model settings
+    model.set_config({})
     # (re)initialize the shap models and masker
     # creating a shap text_generation model

main.py CHANGED Viewed

@@ -110,11 +110,10 @@ with gr.Blocks(
                         label="System Prompt",
                         info="Set the models system prompt, dictating how it answers.",
                         # default system prompt is set to this in the backend
-                        placeholder=("""
                             You are a helpful, respectful and honest assistant. Always
                             answer as helpfully as possible, while being safe.
-                            """
-                        ),
                     )
                 # column that takes up 1/4 of the row
                 with gr.Column(scale=1):
@@ -122,7 +121,9 @@ with gr.Blocks(
                     xai_selection = gr.Radio(
                         ["None", "SHAP", "Attention"],
                         label="Interpretability Settings",
-                        info="Select a Interpretability Approach Implementation to use.",
                         value="None",
                         interactive=True,
                         show_label=True,
@@ -209,10 +210,15 @@ with gr.Blocks(
                 gr.Examples(
                     label="Example Questions",
                     examples=[
-                        ["Does money buy happiness?", "Mistral", "SHAP"],
-                        ["Does money buy happiness?", "Mistral", "Attention"],
                     ],
-                    inputs=[user_prompt, model_selection, xai_selection],
                 )
             with gr.Accordion("GODEL Model Examples", open=False):
                 # examples util component

                         label="System Prompt",
                         info="Set the models system prompt, dictating how it answers.",
                         # default system prompt is set to this in the backend
+                        placeholder="""
                             You are a helpful, respectful and honest assistant. Always
                             answer as helpfully as possible, while being safe.
+                            """,
                     )
                 # column that takes up 1/4 of the row
                 with gr.Column(scale=1):
                     xai_selection = gr.Radio(
                         ["None", "SHAP", "Attention"],
                         label="Interpretability Settings",
+                        info=(
+                            "Select a Interpretability Approach Implementation to use."
+                        ),
                         value="None",
                         interactive=True,
                         show_label=True,
                 gr.Examples(
                     label="Example Questions",
                     examples=[
+                        ["Does money buy happiness?", "", "Mistral", "SHAP"],
+                        ["Does money buy happiness?", "", "Mistral", "Attention"],
+                    ],
+                    inputs=[
+                        user_prompt,
+                        knowledge_input,
+                        model_selection,
+                        xai_selection,
                     ],
                 )
             with gr.Accordion("GODEL Model Examples", open=False):
                 # examples util component

model/godel.py CHANGED Viewed

@@ -13,7 +13,12 @@ MODEL = AutoModelForSeq2SeqLM.from_pretrained("microsoft/GODEL-v1_1-large-seq2se
 # model config definition
 CONFIG = GenerationConfig.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq")
-base_config_dict = {"max_new_tokens": 50, "min_length": 8, "top_p": 0.9, "do_sample": True}
 CONFIG.update(**base_config_dict)
@@ -59,11 +64,13 @@ def format_prompt(message: str, history: list, system_prompt: str, knowledge: st
 # CREDIT: Copied from official interference example on Huggingface
 ## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq
 def respond(prompt):
     # tokenizing input string
     input_ids = TOKENIZER(f"{prompt}", return_tensors="pt").input_ids
     # generating using config and decoding output
-    outputs = MODEL.generate(input_ids,generation_config=CONFIG)
     output = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
     # returns the model output string

 # model config definition
 CONFIG = GenerationConfig.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq")
+base_config_dict = {
+    "max_new_tokens": 50,
+    "min_length": 8,
+    "top_p": 0.9,
+    "do_sample": True,
+}
 CONFIG.update(**base_config_dict)
 # CREDIT: Copied from official interference example on Huggingface
 ## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq
 def respond(prompt):
+    set_config({})
     # tokenizing input string
     input_ids = TOKENIZER(f"{prompt}", return_tensors="pt").input_ids
     # generating using config and decoding output
+    outputs = MODEL.generate(input_ids, generation_config=CONFIG)
     output = TOKENIZER.decode(outputs[0], skip_special_tokens=True)
     # returns the model output string

model/mistral.py CHANGED Viewed

@@ -110,6 +110,7 @@ def format_answer(answer: str):
 def respond(prompt: str):
     # tokenizing inputs and configuring model
     input_ids = TOKENIZER(f"{prompt}", return_tensors="pt")["input_ids"].to(device)

 def respond(prompt: str):
+    set_config({})
     # tokenizing inputs and configuring model
     input_ids = TOKENIZER(f"{prompt}", return_tensors="pt")["input_ids"].to(device)