Spaces:

society-ethics
/

model-card-regulatory-check

Running

App Files Files Community

NimaBoscarino commited on Mar 17, 2023

Commit

f8c21da

•

1 Parent(s): 490bc75

WIP: Slight refactor, IntendedPurposeCheck w/ edge cases

Browse files

Files changed (6) hide show

compliance_checks/__init__.py +7 -0
compliance_checks.py → compliance_checks/base.py +4 -53
compliance_checks/intended_purpose.py +57 -0
tests/conftest.py +5 -2
tests/test_compliance_checks.py +2 -4
tests/test_intended_purpose_check.py +46 -50

compliance_checks/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from compliance_checks.base import (
+    ComplianceSuite,
+)
+from compliance_checks.intended_purpose import (
+    IntendedPurposeCheck, IntendedPurposeResult
+)

compliance_checks.py → compliance_checks/base.py RENAMED Viewed

@@ -5,7 +5,7 @@ import markdown
 from bs4 import BeautifulSoup, Comment
-def walk_to_next_heading(card, heading, heading_text):
     stop_at = [heading, f"h{int(heading[1]) - 1}"]
     try:
@@ -22,11 +22,11 @@ def walk_to_next_heading(card, heading, heading_text):
             sibling = next(sibling_gen, None)
         if content.strip() == "[More Information Needed]":
-            return False, None
-        return True, content
     except AttributeError:
-        return False, None
 class ComplianceResult(ABC):
@@ -94,55 +94,6 @@ class ModelProviderIdentityCheck(ComplianceCheck):
             return ModelProviderIdentityResult()
-class IntendedPurposeResult(ComplianceResult):
-    name = "Intended Purpose"
-    def __init__(
-            self,
-            direct_use: str = None,
-            downstream_use: str = None,
-            out_of_scope_use: str = None,
-            *args,
-            **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.direct_use = direct_use
-        self.downstream_use = downstream_use
-        self.out_of_scope_use = out_of_scope_use
-    def __eq__(self, other):
-        if isinstance(other, IntendedPurposeResult):
-            if super().__eq__(other):
-                try:
-                    assert self.direct_use == other.direct_use
-                    assert self.downstream_use == other.downstream_use
-                    assert self.out_of_scope_use == other.out_of_scope_use
-                    return True
-                except AssertionError:
-                    return False
-        else:
-            return False
-    def to_string(self):
-        return str((self.direct_use, self.direct_use, self.out_of_scope_use))
-class IntendedPurposeCheck(ComplianceCheck):
-    name = "Intended Purpose"
-    def run_check(self, card: BeautifulSoup):
-        direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
-        # TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
-        downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
-        out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
-        return IntendedPurposeResult(
-            status=direct_use_check and out_of_scope_use_check,
-            direct_use=direct_use_content,
-            downstream_use=downstream_use_content,
-            out_of_scope_use=out_of_scope_use_content
-        )
 class GeneralLimitationsResult(ComplianceResult):
     name = "General Limitations"

 from bs4 import BeautifulSoup, Comment
+def walk_to_next_heading(card, heading, heading_text) -> bool:
     stop_at = [heading, f"h{int(heading[1]) - 1}"]
     try:
             sibling = next(sibling_gen, None)
         if content.strip() == "[More Information Needed]":
+            return False  # , None
+        return True  # , content
     except AttributeError:
+        return False  # , None
 class ComplianceResult(ABC):
             return ModelProviderIdentityResult()
 class GeneralLimitationsResult(ComplianceResult):
     name = "General Limitations"

compliance_checks/intended_purpose.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from compliance_checks.base import ComplianceResult, ComplianceCheck, walk_to_next_heading
+from bs4 import BeautifulSoup
+class IntendedPurposeResult(ComplianceResult):
+    name = "Intended Purpose"
+    def __init__(
+            self,
+            direct_use: str = None,
+            downstream_use: str = None,
+            out_of_scope_use: str = None,
+            *args,
+            **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.direct_use = direct_use
+        self.downstream_use = downstream_use
+        self.out_of_scope_use = out_of_scope_use
+    def __eq__(self, other):
+        if isinstance(other, IntendedPurposeResult):
+            if super().__eq__(other):
+                try:
+                    # TODO: Either use these, or remove them.
+                    # assert self.direct_use == other.direct_use
+                    # assert self.downstream_use == other.downstream_use
+                    # assert self.out_of_scope_use == other.out_of_scope_use
+                    return True
+                except AssertionError:
+                    return False
+        else:
+            return False
+    def to_string(self):
+        return str((self.direct_use, self.direct_use, self.out_of_scope_use))
+class IntendedPurposeCheck(ComplianceCheck):
+    name = "Intended Purpose"
+    def run_check(self, card: BeautifulSoup):
+        combos = [
+            ("h2", "Intended uses & limitations"),
+            ("h1", "Uses"), ("h2", "Uses"),
+            ("h2", "Model Use"),
+            ("h2", "Intended uses"),
+        ]
+        for hX, heading in combos:
+            purpose_check = walk_to_next_heading(card, hX, heading)
+            if purpose_check:
+                return IntendedPurposeResult(
+                    status=True,
+                )
+        return IntendedPurposeResult()

tests/conftest.py CHANGED Viewed

@@ -5,6 +5,9 @@ from pathlib import Path
 # TODO: I have the option of maybe making a check for accuracy/metrics?
 # Intended Purpose, General Limitations, Computational Requirements
 expected_check_results = {
     "albert-base-v2": [True, True, False],
@@ -22,7 +25,7 @@ expected_check_results = {
     "gpt2": [True, True, False],
     "Helsinki-NLP___opus-mt-en-es": [False, False, False],
     "jonatasgrosman___wav2vec2-large-xlsr-53-english": [False, False, False],
-    "microsoft___layoutlmv3-base": [True, False, False],
     "openai___clip-vit-base-patch32": [True, True, False],
     "openai___clip-vit-large-patch14": [True, True, False],
     "philschmid___bart-large-cnn-samsum": [False, False, False],
@@ -36,7 +39,7 @@ expected_check_results = {
     "t5-small": [True, False, False],
     "xlm-roberta-base": [True, False, False],
     "xlm-roberta-large": [True, False, False],
-    "yiyanghkust___finbert-tone": [True, False, False],
 }

 # TODO: I have the option of maybe making a check for accuracy/metrics?
+# Note, some of these are marked as FALSE instead of TRUE because the
+# information is hidden somewhere non-standard, e.g. described in prose
 # Intended Purpose, General Limitations, Computational Requirements
 expected_check_results = {
     "albert-base-v2": [True, True, False],
     "gpt2": [True, True, False],
     "Helsinki-NLP___opus-mt-en-es": [False, False, False],
     "jonatasgrosman___wav2vec2-large-xlsr-53-english": [False, False, False],
+    "microsoft___layoutlmv3-base": [False, False, False],
     "openai___clip-vit-base-patch32": [True, True, False],
     "openai___clip-vit-large-patch14": [True, True, False],
     "philschmid___bart-large-cnn-samsum": [False, False, False],
     "t5-small": [True, False, False],
     "xlm-roberta-base": [True, False, False],
     "xlm-roberta-large": [True, False, False],
+    "yiyanghkust___finbert-tone": [False, False, False],
 }

tests/test_compliance_checks.py CHANGED Viewed

@@ -4,8 +4,6 @@ from unittest.mock import MagicMock
 from compliance_checks import (
     ComplianceSuite,
     IntendedPurposeCheck,
-    GeneralLimitationsCheck,
-    ComputationalRequirementsCheck,
 )
@@ -59,8 +57,8 @@ class TestComplianceSuite:
 def test_end_to_end_compliance_suite(real_model_card, expected_check_results):
     suite = ComplianceSuite(checks=[
         IntendedPurposeCheck(),
-        GeneralLimitationsCheck(),
-        ComputationalRequirementsCheck()
     ])
     results = suite.run(real_model_card)

 from compliance_checks import (
     ComplianceSuite,
     IntendedPurposeCheck,
 )
 def test_end_to_end_compliance_suite(real_model_card, expected_check_results):
     suite = ComplianceSuite(checks=[
         IntendedPurposeCheck(),
+        # GeneralLimitationsCheck(),
+        # ComputationalRequirementsCheck()
     ])
     results = suite.run(real_model_card)

tests/test_intended_purpose_check.py CHANGED Viewed

@@ -2,18 +2,12 @@ import pytest
 import markdown
 from bs4 import BeautifulSoup
-from compliance_checks import (
     IntendedPurposeCheck, IntendedPurposeResult,
 )
-@pytest.fixture
-def intended_purpose_model_card():
-    return """
-# Model Card for Sample Model
-Some random info...
 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
@@ -33,67 +27,69 @@ Here is some info about direct uses...
 <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
 Here is some info about out-of-scope uses...
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-    """
-@pytest.fixture
-def bad_intended_purpose_model_card():
-    return """
-# Model Card for Sample Model
-Some random info...
 ## Uses
-<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
-### Direct Use
-<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
-[More Information Needed]
-### Downstream Use [optional]
-<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
-[More Information Needed]
-### Out-of-Scope Use
-<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
-[More Information Needed]
-## Bias, Risks, and Limitations
-<!-- This section is meant to convey both technical and sociotechnical limitations. -->
-[More Information Needed]
-    """
-@pytest.mark.parametrize("check,card,expected", [
-    (IntendedPurposeCheck(), "intended_purpose_model_card", IntendedPurposeResult(
-        status=True,
-        direct_use="Here is some info about direct uses...",
-        downstream_use=None,
-        out_of_scope_use="Here is some info about out-of-scope uses...",
-    )),
-    (IntendedPurposeCheck(), "bad_intended_purpose_model_card", IntendedPurposeResult()),
 ])
-def test_run_checks(check, card, expected, request):
-    card = request.getfixturevalue(card)
     model_card_html = markdown.markdown(card)
     card_soup = BeautifulSoup(model_card_html, features="html.parser")
-    results = check.run_check(card_soup)
-    assert results == expected

 import markdown
 from bs4 import BeautifulSoup
+from compliance_checks.intended_purpose import (
     IntendedPurposeCheck, IntendedPurposeResult,
 )
+model_card_template = """\
 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
 <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
 Here is some info about out-of-scope uses...
+"""
+albert_base_v2 = """\
+# ALBERT Base v2
+## Intended uses & limitations
+Here is some info about direct uses...
+"""
+distilbert_base_cased_distilled_squad = """\
+# DistilBERT base cased distilled SQuAD
 ## Uses
+This model can be used for question answering.
+"""
+distilroberta_base = """\
+# Model Card for DistilRoBERTa base
+# Uses
+You can use the raw model for masked language modeling, but it's mostly intended to be fine-tuned on a downstream task.
+"""
+openai_clip_vit_base_patch = """\
+# Model Card: CLIP
+## Model Use
+Stuff.
+### Intended Use
+Stuff.
+#### Primary intended uses
+Stuff.
+### Out-of-Scope Use Cases
+Stuff.
+"""
+sentence_transformers = """\
+# all-MiniLM-L6-v2
+## Intended uses
+Our model is intented to be used as a sentence and short paragraph encoder.
+"""
+success_result = IntendedPurposeResult(
+    status=True
+)
+@pytest.mark.parametrize("card", [
+    model_card_template,
+    albert_base_v2,
+    distilbert_base_cased_distilled_squad,
+    distilroberta_base,
+    openai_clip_vit_base_patch,
+    sentence_transformers,
 ])
+def test_run_checks(card):
     model_card_html = markdown.markdown(card)
     card_soup = BeautifulSoup(model_card_html, features="html.parser")
+    results = IntendedPurposeCheck().run_check(card_soup)
+    assert results == success_result