|
import pytest |
|
from unittest.mock import MagicMock |
|
|
|
import markdown |
|
from bs4 import BeautifulSoup |
|
from compliance_checks import ( |
|
ComplianceSuite, |
|
ModelProviderIdentityCheck, ModelProviderIdentityResult, |
|
IntendedPurposeCheck, IntendedPurposeResult, |
|
GeneralLimitationsCheck, GeneralLimitationsResult, |
|
ComputationalRequirementsCheck, ComputationalRequirementsResult, |
|
) |
|
|
|
|
|
expected_infrastructure = """\ |
|
Jean Zay Public Supercomputer, provided by the French government.\ |
|
Hardware\ |
|
384 A100 80GB GPUs (48 nodes)\ |
|
Software\ |
|
Megatron-DeepSpeed (Github link)\ |
|
""" |
|
|
|
|
|
class TestComplianceCheck: |
|
@pytest.fixture |
|
def provider_identity_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
Some random info... |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
|
|
<!-- Provide a longer summary of what this model is. --> |
|
|
|
- **Developed by:** Nima Boscarino |
|
- **Model type:** Yada yada yada |
|
""" |
|
|
|
@pytest.fixture |
|
def bad_provider_identity_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
Some random info... |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
|
|
- **Developed by:** [More Information Needed] |
|
- **Model type:** Yada yada yada |
|
""" |
|
|
|
@pytest.fixture |
|
def intended_purpose_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
Some random info... |
|
|
|
## Uses |
|
|
|
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> |
|
|
|
### Direct Use |
|
|
|
Here is some info about direct uses... |
|
|
|
### Downstream Use [optional] |
|
|
|
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app --> |
|
|
|
[More Information Needed] |
|
|
|
### Out-of-Scope Use |
|
|
|
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. --> |
|
|
|
Here is some info about out-of-scope uses... |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
<!-- This section is meant to convey both technical and sociotechnical limitations. --> |
|
|
|
[More Information Needed] |
|
""" |
|
|
|
@pytest.fixture |
|
def bad_intended_purpose_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
Some random info... |
|
|
|
## Uses |
|
|
|
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> |
|
|
|
### Direct Use |
|
|
|
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. --> |
|
|
|
[More Information Needed] |
|
|
|
### Downstream Use [optional] |
|
|
|
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app --> |
|
|
|
[More Information Needed] |
|
|
|
### Out-of-Scope Use |
|
|
|
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. --> |
|
|
|
[More Information Needed] |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
<!-- This section is meant to convey both technical and sociotechnical limitations. --> |
|
|
|
[More Information Needed] |
|
""" |
|
|
|
@pytest.fixture |
|
def general_limitations_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
## Some Random Header |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
<!-- This section is meant to convey both technical and sociotechnical limitations. --> |
|
|
|
Hello world! These are some risks... |
|
|
|
## More Things |
|
""" |
|
|
|
@pytest.fixture |
|
def bad_general_limitations_model_card(self): |
|
return """ |
|
# Model Card for Sample Model |
|
|
|
## Some Random Header |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
<!-- This section is meant to convey both technical and sociotechnical limitations. --> |
|
|
|
[More Information Needed] |
|
|
|
## More Things |
|
""" |
|
|
|
@pytest.fixture |
|
def computational_requirements_model_card(self): |
|
|
|
return """ |
|
# Model Card for Sample Model |
|
|
|
## Some Random Header |
|
|
|
## Technical Specifications |
|
|
|
### Compute infrastructure |
|
Jean Zay Public Supercomputer, provided by the French government. |
|
|
|
#### Hardware |
|
|
|
* 384 A100 80GB GPUs (48 nodes) |
|
|
|
#### Software |
|
|
|
* Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed)) |
|
</details> |
|
|
|
## Intended Use |
|
|
|
Etc.. |
|
""" |
|
|
|
@pytest.fixture |
|
def bad_computational_requirements_model_card(self): |
|
|
|
return """ |
|
# Model Card for Sample Model |
|
|
|
## Some Random Header |
|
|
|
## Technical Specifications |
|
|
|
### Compute infrastructure |
|
[More Information Needed] |
|
|
|
## Intended Use |
|
|
|
Etc.. |
|
""" |
|
|
|
@pytest.mark.parametrize("check,card,expected", [ |
|
(ModelProviderIdentityCheck(), "provider_identity_model_card", ModelProviderIdentityResult( |
|
status=True, |
|
provider="Nima Boscarino", |
|
)), |
|
(ModelProviderIdentityCheck(), "bad_provider_identity_model_card", ModelProviderIdentityResult()), |
|
(IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult( |
|
status=True, |
|
direct_use="Here is some info about direct uses...", |
|
downstream_use=None, |
|
out_of_scope_use="Here is some info about out-of-scope uses...", |
|
)), |
|
(IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()), |
|
(GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult( |
|
status=True, |
|
limitations="Hello world! These are some risks..." |
|
)), |
|
(GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()), |
|
(ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult( |
|
status=True, |
|
requirements=expected_infrastructure, |
|
)), |
|
(ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()), |
|
]) |
|
def test_run_checks(self, check, card, expected, request): |
|
card = request.getfixturevalue(card) |
|
|
|
model_card_html = markdown.markdown(card) |
|
card_soup = BeautifulSoup(model_card_html, features="html.parser") |
|
|
|
results = check.run_check(card_soup) |
|
|
|
assert results == expected |
|
|
|
|
|
class TestComplianceSuite: |
|
@pytest.fixture |
|
def mock_compliance_check(self): |
|
mockComplianceCheck = MagicMock() |
|
mockComplianceCheck.run_check = MagicMock(return_value=True) |
|
|
|
return mockComplianceCheck |
|
|
|
@pytest.fixture |
|
def empty_compliance_suite(self): |
|
return ComplianceSuite( |
|
checks=[] |
|
) |
|
|
|
@pytest.fixture |
|
def compliance_suite(self, mock_compliance_check): |
|
return ComplianceSuite( |
|
checks=[mock_compliance_check] |
|
) |
|
|
|
@pytest.fixture |
|
def empty_compliance_results(self): |
|
return [] |
|
|
|
@pytest.fixture |
|
def compliance_results(self): |
|
return [True] |
|
|
|
def test_create_empty_compliance_suite(self, empty_compliance_suite): |
|
assert len(empty_compliance_suite.checks) == 0 |
|
|
|
def test_create_compliance_suite(self, compliance_suite): |
|
assert len(compliance_suite.checks) == 1 |
|
|
|
@pytest.mark.parametrize("suite,results", [ |
|
("empty_compliance_suite", "empty_compliance_results"), |
|
("compliance_suite", "compliance_results") |
|
]) |
|
def test_run_compliance_suite(self, suite, results, request): |
|
suite: ComplianceSuite = request.getfixturevalue(suite) |
|
results: list = request.getfixturevalue(results) |
|
assert suite.run("") == results |
|
|
|
for check in suite.checks: |
|
check.run_check.assert_called_once() |
|
|
|
|
|
class TestEndToEnd: |
|
@pytest.mark.parametrize("card,fixture", [ |
|
(""" |
|
# Model Card for Sample Model |
|
|
|
Some random info... |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
|
|
- **Developed by:** Nima Boscarino |
|
- **Model type:** Yada yada yada |
|
|
|
## Uses |
|
|
|
### Direct Use |
|
|
|
Here is some info about direct uses... |
|
|
|
### Downstream Use [optional] |
|
|
|
[More Information Needed] |
|
|
|
### Out-of-Scope Use |
|
|
|
Here is some info about out-of-scope uses... |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
Hello world! These are some risks... |
|
|
|
## Technical Specifications |
|
|
|
### Compute infrastructure |
|
Jean Zay Public Supercomputer, provided by the French government. |
|
|
|
#### Hardware |
|
|
|
* 384 A100 80GB GPUs (48 nodes) |
|
|
|
#### Software |
|
|
|
* Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed)) |
|
</details> |
|
|
|
## More Things |
|
""", False), |
|
("bloom_card", True) |
|
]) |
|
def test_end_to_end_compliance_suite(self, card, fixture, request): |
|
if fixture: |
|
card = request.getfixturevalue(card) |
|
|
|
suite = ComplianceSuite(checks=[ |
|
ModelProviderIdentityCheck(), |
|
IntendedPurposeCheck(), |
|
GeneralLimitationsCheck(), |
|
ComputationalRequirementsCheck() |
|
]) |
|
|
|
results = suite.run(card) |
|
|
|
assert all([r.status for r in results]) |
|
|