Spaces:

stacklok
/

llm_security_leaderboard

Running

File size: 2,768 Bytes

b535ea0
 
 
c222ee6
b535ea0
 
 
 
 
 
 
 
 
 
c222ee6
be6e576
 
 
 
b535ea0
c222ee6
 
b535ea0
 
 
 
be6e576
b535ea0
 
 
be6e576
 
 
b535ea0
 
be6e576
b535ea0
 
be6e576
 
 
 
 
 
 
 
 
 
 
 
b535ea0
 
 
be6e576
 
 
 
 
 
 
 
 
 
 
 
b535ea0
be6e576
 
b535ea0
 
 
 
 
 
 
 
be6e576
b535ea0
 
be6e576
 
 
 
 
b535ea0

from dataclasses import dataclass
from enum import Enum


@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
    # Safetensors check
    safetensors = Task("safetensors_check", "compliant", "Safetensors")
    # Security prompts evaluation
    secure_coding = Task("secure_coding", "security_score", "Security Score ⬆️")


NUM_FEWSHOT = 0  # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">Secure-Code Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
This leaderboard evaluates language models based on two key security aspects:
1. **Safetensors Compliance**: Checks if models use the safer safetensors format for weight storage
2. **Secure Coding Evaluation**: Tests models against a series of security-focused prompts to assess their ability to generate secure code and provide security-aware responses
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = """
## How it works

### Safetensors Check
Models are evaluated for their use of the safetensors format, which provides:
- Memory safety
- Faster loading times
- Better security guarantees

### Secure Coding Evaluation
Models are tested against a comprehensive suite of security-focused prompts that assess:
- Secure coding practices
- Security vulnerability awareness
- Input validation handling
- Security best practices knowledge
"""

EVALUATION_QUEUE_TEXT = """
## Requirements for Model Submission

### 1) Safetensors Format
Your model should use the safetensors format. To convert your model:
```python
from transformers import AutoModelForCausalLM
from safetensors.torch import save_file

model = AutoModelForCausalLM.from_pretrained("your-model")
state_dict = model.state_dict()
save_file(state_dict, "model.safetensors")
```

### 2) Model Loading Requirements
Ensure your model can be loaded using standard AutoClasses:
```python
from transformers import AutoConfig, AutoModel, AutoTokenizer
config = AutoConfig.from_pretrained("your model name", revision=revision)
model = AutoModel.from_pretrained("your model name", revision=revision)
tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
```
"""


CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@misc{security-llm-leaderboard,
    title={Secure-Code Leaderboard},
    year={2025},
    note={Online resource for evaluating LLM security aspects}
}
"""