import pytest from unittest.mock import MagicMock import markdown from bs4 import BeautifulSoup from compliance_checks import ( ComplianceSuite, ModelProviderIdentityCheck, ModelProviderIdentityResult, IntendedPurposeCheck, IntendedPurposeResult, GeneralLimitationsCheck, GeneralLimitationsResult, ComputationalRequirementsCheck, ComputationalRequirementsResult, ) expected_infrastructure = """\ Jean Zay Public Supercomputer, provided by the French government.\ Hardware\ 384 A100 80GB GPUs (48 nodes)\ Software\ Megatron-DeepSpeed (Github link)\ """ class TestComplianceCheck: @pytest.fixture def provider_identity_model_card(self): return """ # Model Card for Sample Model Some random info... ## Model Details ### Model Description - **Developed by:** Nima Boscarino - **Model type:** Yada yada yada """ @pytest.fixture def bad_provider_identity_model_card(self): return """ # Model Card for Sample Model Some random info... ## Model Details ### Model Description - **Developed by:** [More Information Needed] - **Model type:** Yada yada yada """ @pytest.fixture def intended_purpose_model_card(self): return """ # Model Card for Sample Model Some random info... ## Uses ### Direct Use Here is some info about direct uses... ### Downstream Use [optional] [More Information Needed] ### Out-of-Scope Use Here is some info about out-of-scope uses... ## Bias, Risks, and Limitations [More Information Needed] """ @pytest.fixture def bad_intended_purpose_model_card(self): return """ # Model Card for Sample Model Some random info... ## Uses ### Direct Use [More Information Needed] ### Downstream Use [optional] [More Information Needed] ### Out-of-Scope Use [More Information Needed] ## Bias, Risks, and Limitations [More Information Needed] """ @pytest.fixture def general_limitations_model_card(self): return """ # Model Card for Sample Model ## Some Random Header ## Bias, Risks, and Limitations Hello world! These are some risks... ## More Things """ @pytest.fixture def bad_general_limitations_model_card(self): return """ # Model Card for Sample Model ## Some Random Header ## Bias, Risks, and Limitations [More Information Needed] ## More Things """ @pytest.fixture def computational_requirements_model_card(self): # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md return """ # Model Card for Sample Model ## Some Random Header ## Technical Specifications ### Compute infrastructure Jean Zay Public Supercomputer, provided by the French government. #### Hardware * 384 A100 80GB GPUs (48 nodes) #### Software * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed)) ## Intended Use Etc.. """ @pytest.fixture def bad_computational_requirements_model_card(self): # Adapted from: https://huggingface.co/bigscience/bloom/blob/main/README.md return """ # Model Card for Sample Model ## Some Random Header ## Technical Specifications ### Compute infrastructure [More Information Needed] ## Intended Use Etc.. """ @pytest.mark.parametrize("check,card,expected", [ (ModelProviderIdentityCheck(), "provider_identity_model_card", ModelProviderIdentityResult( status=True, provider="Nima Boscarino", )), (ModelProviderIdentityCheck(), "bad_provider_identity_model_card", ModelProviderIdentityResult()), (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult( status=True, direct_use="Here is some info about direct uses...", downstream_use=None, out_of_scope_use="Here is some info about out-of-scope uses...", )), (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()), (GeneralLimitationsCheck(), "general_limitations_model_card", GeneralLimitationsResult( status=True, limitations="Hello world! These are some risks..." )), (GeneralLimitationsCheck(), "bad_general_limitations_model_card", GeneralLimitationsResult()), (ComputationalRequirementsCheck(), "computational_requirements_model_card", ComputationalRequirementsResult( status=True, requirements=expected_infrastructure, )), (ComputationalRequirementsCheck(), "bad_computational_requirements_model_card", ComputationalRequirementsResult()), ]) def test_run_checks(self, check, card, expected, request): card = request.getfixturevalue(card) model_card_html = markdown.markdown(card) card_soup = BeautifulSoup(model_card_html, features="html.parser") results = check.run_check(card_soup) assert results == expected class TestComplianceSuite: @pytest.fixture def mock_compliance_check(self): mockComplianceCheck = MagicMock() mockComplianceCheck.run_check = MagicMock(return_value=True) return mockComplianceCheck @pytest.fixture def empty_compliance_suite(self): return ComplianceSuite( checks=[] ) @pytest.fixture def compliance_suite(self, mock_compliance_check): return ComplianceSuite( checks=[mock_compliance_check] ) @pytest.fixture def empty_compliance_results(self): return [] @pytest.fixture def compliance_results(self): return [True] def test_create_empty_compliance_suite(self, empty_compliance_suite): assert len(empty_compliance_suite.checks) == 0 def test_create_compliance_suite(self, compliance_suite): assert len(compliance_suite.checks) == 1 @pytest.mark.parametrize("suite,results", [ ("empty_compliance_suite", "empty_compliance_results"), ("compliance_suite", "compliance_results") ]) def test_run_compliance_suite(self, suite, results, request): suite: ComplianceSuite = request.getfixturevalue(suite) results: list = request.getfixturevalue(results) assert suite.run("") == results for check in suite.checks: check.run_check.assert_called_once() class TestEndToEnd: @pytest.mark.parametrize("card,fixture", [ (""" # Model Card for Sample Model Some random info... ## Model Details ### Model Description - **Developed by:** Nima Boscarino - **Model type:** Yada yada yada ## Uses ### Direct Use Here is some info about direct uses... ### Downstream Use [optional] [More Information Needed] ### Out-of-Scope Use Here is some info about out-of-scope uses... ## Bias, Risks, and Limitations Hello world! These are some risks... ## Technical Specifications ### Compute infrastructure Jean Zay Public Supercomputer, provided by the French government. #### Hardware * 384 A100 80GB GPUs (48 nodes) #### Software * Megatron-DeepSpeed ([Github link](https://github.com/bigscience-workshop/Megatron-DeepSpeed)) ## More Things """, False), ("bloom_card", True) ]) def test_end_to_end_compliance_suite(self, card, fixture, request): if fixture: card = request.getfixturevalue(card) suite = ComplianceSuite(checks=[ ModelProviderIdentityCheck(), IntendedPurposeCheck(), GeneralLimitationsCheck(), ComputationalRequirementsCheck() ]) results = suite.run(card) assert all([r.status for r in results])