Spaces:

antenmanuuel
/

bert-attention-visualizer

Sleeping

App Files Files Community

antenmanuuel commited on Apr 16

Commit

742ad17

verified ·

1 Parent(s): 03de5c7

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

classes.py +4 -1
helpers.py +12 -3
models.py +20 -1
routes/attention.py +31 -12
routes/mask_prediction.py +3 -2
routes/tokenize.py +3 -2

classes.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from typing import List
 from pydantic import BaseModel
 class TokenizeRequest(BaseModel):
     text: str
     model_name: str = "bert-base-uncased"
 class Token(BaseModel):
     text: str
@@ -25,6 +26,7 @@ class MaskPredictionRequest(BaseModel):
     mask_index: int
     model_name: str = "bert-base-uncased"
     top_k: int = 10
 class MaskPredictionResponse(BaseModel):
     predictions: List[WordPrediction]
@@ -33,6 +35,7 @@ class AttentionRequest(BaseModel):
     text: str
     model_name: str = "bert-base-uncased"
     visualization_method: str = "raw"  # Options: "raw", "rollout", "flow"
 class AttentionHead(BaseModel):
     headIndex: int

+from typing import List, Optional
 from pydantic import BaseModel
 class TokenizeRequest(BaseModel):
     text: str
     model_name: str = "bert-base-uncased"
+    debug: Optional[bool] = False
 class Token(BaseModel):
     text: str
     mask_index: int
     model_name: str = "bert-base-uncased"
     top_k: int = 10
+    debug: Optional[bool] = False
 class MaskPredictionResponse(BaseModel):
     predictions: List[WordPrediction]
     text: str
     model_name: str = "bert-base-uncased"
     visualization_method: str = "raw"  # Options: "raw", "rollout", "flow"
+    debug: Optional[bool] = False
 class AttentionHead(BaseModel):
     headIndex: int

helpers.py CHANGED Viewed

@@ -202,17 +202,26 @@ def map_bert_tokens_to_words(tokens, original_text):
     return token_to_word_map
 # Helper function to load models on demand
-def get_model_and_tokenizer(model_name):
     if model_name not in MODEL_CONFIGS:
         raise HTTPException(status_code=400, detail=f"Model {model_name} not supported")
     if model_name not in models:
         print(f"Loading {model_name}...")
         config = MODEL_CONFIGS[model_name]
-        models[model_name] = config["model_class"].from_pretrained(model_name)
-        tokenizers[model_name] = config["tokenizer_class"].from_pretrained(model_name)
         if torch.cuda.is_available():
             models[model_name] = models[model_name].cuda()
         models[model_name].eval()
         print(f"Model {model_name} loaded")

     return token_to_word_map
 # Helper function to load models on demand
+def get_model_and_tokenizer(model_name, debug=False):
     if model_name not in MODEL_CONFIGS:
         raise HTTPException(status_code=400, detail=f"Model {model_name} not supported")
     if model_name not in models:
         print(f"Loading {model_name}...")
         config = MODEL_CONFIGS[model_name]
+        # Check if this is a custom model that requires special loading
+        if config["model_class"] == "custom" or model_name == "EdwinXhen/TinyBert_6Layer_MLM":
+            # Use the custom model loading function
+            tokenizers[model_name], models[model_name] = load_model(model_name, debug)
+        else:
+            # Standard model loading
+            models[model_name] = config["model_class"].from_pretrained(model_name)
+            tokenizers[model_name] = config["tokenizer_class"].from_pretrained(model_name)
         if torch.cuda.is_available():
             models[model_name] = models[model_name].cuda()
         models[model_name].eval()
         print(f"Model {model_name} loaded")

models.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import BertForMaskedLM, RobertaForMaskedLM, AutoTokenizer, BertModel, RobertaModel, DistilBertForMaskedLM, DistilBertModel
 import nltk
@@ -30,8 +30,27 @@ MODEL_CONFIGS = {
         "model_class": DistilBertForMaskedLM,
         "tokenizer_class": AutoTokenizer,
         "base_model_class": DistilBertModel
     }
 }
 models = {}
 tokenizers = {}

+from transformers import BertForMaskedLM, RobertaForMaskedLM, AutoTokenizer, BertModel, RobertaModel, DistilBertForMaskedLM, DistilBertModel, AutoModelForMaskedLM
 import nltk
         "model_class": DistilBertForMaskedLM,
         "tokenizer_class": AutoTokenizer,
         "base_model_class": DistilBertModel
+    },
+    "EdwinXhen/TinyBert_6Layer_MLM": {
+        "name": "TinyBERT 6 Layer",
+        "model_class": "custom",
+        "tokenizer_class": AutoTokenizer,
+        "base_model_class": BertModel
     }
 }
 models = {}
 tokenizers = {}
+def load_model(model_type, debug=False):
+    if model_type.lower() == "custom" or model_type == "EdwinXhen/TinyBert_6Layer_MLM":
+        # Load custom model from Hugging Face repository
+        custom_repo = "EdwinXhen/TinyBert_6Layer_MLM"
+        if debug:
+            print(f"[DEBUG] Loading custom model from HuggingFace repository: {custom_repo}")
+        tokenizer = AutoTokenizer.from_pretrained(custom_repo)
+        model = AutoModelForMaskedLM.from_pretrained(custom_repo, output_attentions=True)
+        return tokenizer, model
+    # Handle other models with existing logic
+    # This is a placeholder for the existing model loading logic
+    return None, None

routes/attention.py CHANGED Viewed

@@ -9,11 +9,12 @@ router = APIRouter()
 async def get_attention_matrices(request: AttentionRequest):
     """Get attention matrices for the input text using the specified model"""
     try:
-        print(f"Processing attention request: text='{request.text}', model={request.model_name}, method={request.visualization_method}")
         # First tokenize the text using the same function that the /tokenize endpoint uses
         # to ensure consistency
-        tokenizer_response = await tokenize_text(TokenizeRequest(text=request.text, model_name=request.model_name))
         tokens = tokenizer_response["tokens"]
         print(f"Tokenized into {len(tokens)} tokens")
@@ -24,17 +25,35 @@ async def get_attention_matrices(request: AttentionRequest):
             raise HTTPException(status_code=400, detail=f"Model {model_name} not supported")
         config = MODEL_CONFIGS[model_name]
-        base_model_class = config["base_model_class"]
-        # Check if we already have a base model cached
-        base_model_key = f"{model_name}_base"
-        if base_model_key not in models:
-            print(f"Loading base model {model_name}...")
-            models[base_model_key] = base_model_class.from_pretrained(model_name, attn_implementation="eager")
-            if torch.cuda.is_available():
-                models[base_model_key] = models[base_model_key].cuda()
-            models[base_model_key].eval()
-            print(f"Base model {model_name} loaded")
         model = models[base_model_key]
         tokenizer = tokenizers[request.model_name]

 async def get_attention_matrices(request: AttentionRequest):
     """Get attention matrices for the input text using the specified model"""
     try:
+        debug = request.debug if hasattr(request, 'debug') else False
+        print(f"Processing attention request: text='{request.text}', model={request.model_name}, method={request.visualization_method}, debug={debug}")
         # First tokenize the text using the same function that the /tokenize endpoint uses
         # to ensure consistency
+        tokenizer_response = await tokenize_text(TokenizeRequest(text=request.text, model_name=request.model_name, debug=debug))
         tokens = tokenizer_response["tokens"]
         print(f"Tokenized into {len(tokens)} tokens")
             raise HTTPException(status_code=400, detail=f"Model {model_name} not supported")
         config = MODEL_CONFIGS[model_name]
+        # Handle custom model differently if needed
+        if config["model_class"] == "custom":
+            # For custom models, we need special handling
+            base_model_key = f"{model_name}_base"
+            if base_model_key not in models:
+                # For TinyBERT, we use the same model with different configuration
+                _, tokenizer = get_model_and_tokenizer(model_name, debug)
+                custom_repo = "EdwinXhen/TinyBert_6Layer_MLM"
+                print(f"Loading base model from {custom_repo} for attention visualization...")
+                from transformers import AutoModel
+                models[base_model_key] = AutoModel.from_pretrained(custom_repo, attn_implementation="eager", output_attentions=True)
+                if torch.cuda.is_available():
+                    models[base_model_key] = models[base_model_key].cuda()
+                models[base_model_key].eval()
+                print(f"Base model {model_name} loaded")
+        else:
+            # Standard model loading
+            base_model_class = config["base_model_class"]
+            # Check if we already have a base model cached
+            base_model_key = f"{model_name}_base"
+            if base_model_key not in models:
+                print(f"Loading base model {model_name}...")
+                models[base_model_key] = base_model_class.from_pretrained(model_name, attn_implementation="eager")
+                if torch.cuda.is_available():
+                    models[base_model_key] = models[base_model_key].cuda()
+                models[base_model_key].eval()
+                print(f"Base model {model_name} loaded")
         model = models[base_model_key]
         tokenizer = tokenizers[request.model_name]

routes/mask_prediction.py CHANGED Viewed

@@ -18,7 +18,8 @@ async def predict_masked_token(request: MaskPredictionRequest, x_token_to_mask:
         print(f"Token to mask header: '{x_token_to_mask}'")
         print(f"Explicit masked text header: '{x_explicit_masked_text}'")
-        model, tokenizer = get_model_and_tokenizer(request.model_name)
         # For RoBERTa, use explicit masked text if provided
         if "roberta" in request.model_name and x_explicit_masked_text:
@@ -78,7 +79,7 @@ async def predict_masked_token(request: MaskPredictionRequest, x_token_to_mask:
             return MaskPredictionResponse(predictions=predictions_list)
         # Get tokens from the original text using the tokenize endpoint for consistency
-        tokenizer_response = await tokenize_text(TokenizeRequest(text=request.text, model_name=request.model_name))
         tokens = tokenizer_response["tokens"]
         print(f"Tokenizer response: {len(tokens)} tokens")

         print(f"Token to mask header: '{x_token_to_mask}'")
         print(f"Explicit masked text header: '{x_explicit_masked_text}'")
+        debug = request.debug if hasattr(request, 'debug') else False
+        model, tokenizer = get_model_and_tokenizer(request.model_name, debug)
         # For RoBERTa, use explicit masked text if provided
         if "roberta" in request.model_name and x_explicit_masked_text:
             return MaskPredictionResponse(predictions=predictions_list)
         # Get tokens from the original text using the tokenize endpoint for consistency
+        tokenizer_response = await tokenize_text(TokenizeRequest(text=request.text, model_name=request.model_name, debug=debug))
         tokens = tokenizer_response["tokens"]
         print(f"Tokenizer response: {len(tokens)} tokens")

routes/tokenize.py CHANGED Viewed

@@ -8,7 +8,8 @@ router = APIRouter()
 async def tokenize_text(request: TokenizeRequest):
     """Tokenize input text using the specified model's tokenizer"""
     try:
-        _, tokenizer = get_model_and_tokenizer(request.model_name)
         # The text might include punctuation - let the tokenizer handle it properly
         if "roberta" in request.model_name:
@@ -27,7 +28,7 @@ async def tokenize_text(request: TokenizeRequest):
             # Clean the tokens to remove the leading 'Ġ' character from RoBERTa tokens
             tokens = [clean_roberta_token(token) for token in tokens]
         else:
-            # For BERT and DistilBERT, add special tokens and tokenize
             text = f"[CLS] {request.text} [SEP]"
             tokens = tokenizer.tokenize(text)

 async def tokenize_text(request: TokenizeRequest):
     """Tokenize input text using the specified model's tokenizer"""
     try:
+        debug = request.debug if hasattr(request, 'debug') else False
+        _, tokenizer = get_model_and_tokenizer(request.model_name, debug)
         # The text might include punctuation - let the tokenizer handle it properly
         if "roberta" in request.model_name:
             # Clean the tokens to remove the leading 'Ġ' character from RoBERTa tokens
             tokens = [clean_roberta_token(token) for token in tokens]
         else:
+            # For BERT, DistilBERT, and TinyBERT, add special tokens and tokenize
             text = f"[CLS] {request.text} [SEP]"
             tokens = tokenizer.tokenize(text)