Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment

Browse files

Files changed (4) hide show

README.md +50 -0
config.yaml +3 -3
inference.py +44 -58
pytorch_model.bin +2 -2

README.md CHANGED Viewed

@@ -9,6 +9,16 @@ datasets:
 metrics:
 - accuracy
 - f1
 ---
 # ModernBERT IMDb Sentiment Analysis Model
@@ -69,3 +79,43 @@ print(f"Predicted label: {predicted_label}")
 ### Model Citation
 - **Citation**: voxmenthe/modernbert-imdb-sentiment

 metrics:
 - accuracy
 - f1
+title: IMDb Sentiment Analyzer
+emoji: 🤗
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: "5.29.0" # Verify this matches your Gradio version in requirements.txt
+app_file: app.py
+pinned: false
+hf_oauth: false
+disable_embedding: false
 ---
 # ModernBERT IMDb Sentiment Analysis Model
 ### Model Citation
 - **Citation**: voxmenthe/modernbert-imdb-sentiment
+## IMDb Sentiment Analyzer - Gradio App
+This repository contains a Gradio application for sentiment analysis of IMDb movie reviews.
+It uses a fine-tuned ModernBERT model hosted on Hugging Face.
+**Space Link:** [voxmenthe/imdb-sentiment-demo](https://huggingface.co/spaces/voxmenthe/imdb-sentiment-demo)
+**Model Link:** [voxmenthe/modernbert-imdb-sentiment](https://huggingface.co/voxmenthe/modernbert-imdb-sentiment)
+## Features
+*   **Text Input**: Analyze custom movie review text.
+*   **Random IMDb Sample**: Load a random review from the IMDb test dataset.
+*   **Sentiment Prediction**: Classifies sentiment as Positive or Negative.
+*   **True Label Display**: Shows the actual IMDb label for loaded samples.
+## Setup & Running Locally
+1.  **Clone the repository (or your Space repository):**
+    ```bash
+    git clone https://huggingface.co/spaces/voxmenthe/imdb-sentiment-demo
+    cd imdb-sentiment-demo
+    ```
+2.  **Install dependencies:**
+    Ensure you have Python 3.11+ installed.
+    ```bash
+    pip install -r requirements.txt
+    ```
+3.  **Run the application:**
+    ```bash
+    python app.py
+    ```
+    The application will be available at `http://127.0.0.1:7860`.
+## Model Information
+The sentiment analysis model is a `ModernBERT` architecture fine-tuned on the IMDb dataset. The specific checkpoint used is `mean_epoch5_0.9575acc_0.9575f1.pt` before being uploaded to `voxmenthe/modernbert-imdb-sentiment`.

config.yaml CHANGED Viewed

@@ -1,12 +1,12 @@
 model:
-  name: "voxmenthe/modernbert-imdb-sentiment"
-  output_dir: "checkpoints"
   max_length: 880 # 256
   dropout: 0.1
   pooling_strategy: "mean" # Current default, change as needed
 inference:
   # Default path, can be overridden
-  model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
   # Using the same max_length as training for consistency
   max_length: 880 # 256

 model:
+  name_or_path: "voxmenthe/modernbert-imdb-sentiment"
+  tokenizer_name_or_path: "answerdotai/ModernBERT-base"
   max_length: 880 # 256
   dropout: 0.1
   pooling_strategy: "mean" # Current default, change as needed
 inference:
   # Default path, can be overridden
+  model_path: "voxmenthe/modernbert-imdb-sentiment"
   # Using the same max_length as training for consistency
   max_length: 880 # 256

inference.py CHANGED Viewed

@@ -1,79 +1,65 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from models import ModernBertForSentiment
-from transformers import ModernBertConfig
 from typing import Dict, Any
 import yaml
-import os
 class SentimentInference:
     def __init__(self, config_path: str = "config.yaml"):
-        """Load configuration and initialize model and tokenizer."""
         with open(config_path, 'r') as f:
-            config = yaml.safe_load(f)
-        model_cfg = config.get('model', {})
-        inference_cfg = config.get('inference', {})
-        # Path to the .pt model weights file
-        model_weights_path = inference_cfg.get('model_path',
-                                             os.path.join(model_cfg.get('output_dir', 'checkpoints'), 'best_model.pt'))
-        # Base model name from config (e.g., 'answerdotai/ModernBERT-base')
-        # This will be used for loading both tokenizer and base BERT config from Hugging Face Hub
-        base_model_name = model_cfg.get('name', 'answerdotai/ModernBERT-base')
-        self.max_length = inference_cfg.get('max_length', model_cfg.get('max_length', 256))
-        # Load tokenizer from the base model name (e.g., from Hugging Face Hub)
-        print(f"Loading tokenizer from: {base_model_name}")
-        self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-        # Load base BERT config from the base model name
-        print(f"Loading ModernBertConfig from: {base_model_name}")
-        bert_config = ModernBertConfig.from_pretrained(base_model_name)
-        # --- Apply any necessary overrides from your config to the loaded bert_config ---
-        # For example, if your ModernBertForSentiment expects specific config values beyond the base BERT model.
-        # Your current ModernBertForSentiment takes the entire config object, which might implicitly carry these.
-        # However, explicitly setting them on bert_config loaded from HF is safer if they are architecturally relevant.
-        bert_config.classifier_dropout = model_cfg.get('dropout', bert_config.classifier_dropout) # Example
-        # Ensure num_labels is set if your inference model needs it (usually for HF pipeline, less so for manual predict)
-        # bert_config.num_labels = model_cfg.get('num_labels', 1) # Typically 1 for binary sentiment regression-style output
-        # It's also important that pooling_strategy and num_weighted_layers are set on the config object
-        # that ModernBertForSentiment receives, as it uses these to build its layers.
-        # These are usually fine-tuning specific, not part of the base HF config, so they should come from your model_cfg.
-        bert_config.pooling_strategy = model_cfg.get('pooling_strategy', 'cls')
-        bert_config.num_weighted_layers = model_cfg.get('num_weighted_layers', 4)
-        bert_config.loss_function = model_cfg.get('loss_function', {'name': 'SentimentWeightedLoss', 'params': {}}) # Needed by model init
-        # Ensure num_labels is explicitly set for the model's classifier head
-        bert_config.num_labels = 1 # For sentiment (positive/negative) often treated as 1 logit output
-        print("Instantiating ModernBertForSentiment model structure...")
-        self.model = ModernBertForSentiment(bert_config)
-        print(f"Loading model weights from local checkpoint: {model_weights_path}")
-        # Load the entire checkpoint dictionary first
-        checkpoint = torch.load(model_weights_path, map_location=torch.device('cpu'))
-        # Extract the model_state_dict from the checkpoint
-        # This handles the case where the checkpoint saves more than just the model weights (e.g., optimizer state, epoch)
-        if 'model_state_dict' in checkpoint:
-            model_state_to_load = checkpoint['model_state_dict']
-        else:
-            # If the checkpoint is just the state_dict itself (older format or different saving convention)
-            model_state_to_load = checkpoint
-        self.model.load_state_dict(model_state_to_load)
         self.model.eval()
-        print("Model loaded successfully.")
     def predict(self, text: str) -> Dict[str, Any]:
-        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length)
         with torch.no_grad():
             outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
-        logits = outputs["logits"]
         prob = torch.sigmoid(logits).item()
         return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}

 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
+# models.py (containing ModernBertForSentiment) will be loaded from the Hub due to trust_remote_code=True
 from typing import Dict, Any
 import yaml
 class SentimentInference:
     def __init__(self, config_path: str = "config.yaml"):
+        """Load configuration and initialize model and tokenizer from Hugging Face Hub."""
         with open(config_path, 'r') as f:
+            config_data = yaml.safe_load(f)
+        model_yaml_cfg = config_data.get('model', {})
+        inference_yaml_cfg = config_data.get('inference', {})
+        model_hf_repo_id = model_yaml_cfg.get('name_or_path')
+        if not model_hf_repo_id:
+            raise ValueError("model.name_or_path must be specified in config.yaml (e.g., 'username/model_name')")
+        tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
+        self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
+        print(f"Loading tokenizer from: {tokenizer_hf_repo_id}")
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_hf_repo_id)
+        print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
+        # Load the config that was uploaded with the model (config.json in the HF repo)
+        # This config should already have the correct architecture defined by ModernBertConfig.
+        # We then augment it with any custom parameters needed by ModernBertForSentiment's __init__.
+        loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
+        # Augment loaded_config with parameters from model_yaml_cfg needed for ModernBertForSentiment initialization
+        # These should reflect how the model was trained and its specific custom head.
+        loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean') # Default to 'mean' as per your models.py change
+        loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
+        loaded_config.classifier_dropout = model_yaml_cfg.get('dropout') # Allow None if not in yaml
+        # num_labels should ideally be in the config.json uploaded to HF, but can be set here if needed.
+        # For binary sentiment with a single logit output, num_labels is 1.
+        loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
+        # The loss_function might not be strictly needed for inference if the model doesn't use it in forward pass for eval,
+        # but if ModernBertForSentiment.__init__ requires it, it must be provided.
+        # Assuming it's not critical for basic inference here to simplify.
+        # loaded_config.loss_function = model_yaml_cfg.get('loss_function', {'name': '...', 'params': {}})
+        print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
+        # trust_remote_code=True allows loading models.py (containing ModernBertForSentiment)
+        # from the Hugging Face model repository.
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            model_hf_repo_id,
+            config=loaded_config, # Pass the augmented config
+            trust_remote_code=True
+        )
         self.model.eval()
+        print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
     def predict(self, text: str) -> Dict[str, Any]:
+        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
         with torch.no_grad():
             outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
+        logits = outputs.get("logits") # Use .get for safety
+        if logits is None:
+            raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")
         prob = torch.sigmoid(logits).item()
         return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c95a2ef6b7a06191e4db8fe7f5975f7c8228ec9754d5222ffb3984b6b48010a
-size 1802582665

 version https://git-lfs.github.com/spec/v1
+oid sha256:106c846a077b9a1c445b0fa4b5d490d5e58a81270399da15064a4ee3a3b7e1ec
+size 600856675