|
from abc import ABC, abstractmethod |
|
from typing import Optional, List |
|
|
|
import markdown |
|
from bs4 import BeautifulSoup, Comment |
|
|
|
|
|
def walk_to_next_heading(card, heading, heading_text): |
|
stop_at = [heading, f"h{int(heading[1]) - 1}"] |
|
|
|
try: |
|
heading_node = card.find(heading, string=heading_text) |
|
|
|
content = "" |
|
|
|
sibling_gen = heading_node.nextSiblingGenerator() |
|
sibling = next(sibling_gen) |
|
|
|
while sibling and (not (sibling.name is not None and sibling.name in stop_at) or sibling.name is None): |
|
if not isinstance(sibling, Comment): |
|
content = content + sibling.text.strip() |
|
sibling = next(sibling_gen, None) |
|
|
|
if content.strip() == "[More Information Needed]": |
|
return False, None |
|
|
|
return True, content |
|
except AttributeError: |
|
return False, None |
|
|
|
|
|
class ComplianceResult(ABC): |
|
name: str = None |
|
|
|
def __init__(self, status: Optional[bool] = False, *args, **kwargs): |
|
self.status = status |
|
|
|
def __eq__(self, other): |
|
try: |
|
assert self.status == other.status |
|
return True |
|
except AssertionError: |
|
return False |
|
|
|
@abstractmethod |
|
def to_string(self): |
|
return "Not Implemented" |
|
|
|
|
|
class ComplianceCheck(ABC): |
|
name: str = None |
|
|
|
@abstractmethod |
|
def run_check(self, card: BeautifulSoup) -> ComplianceResult: |
|
raise NotImplementedError |
|
|
|
|
|
class ModelProviderIdentityResult(ComplianceResult): |
|
name = "Model Provider Identity" |
|
|
|
def __init__(self, provider: str = None, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
self.provider = provider |
|
|
|
def __eq__(self, other): |
|
if isinstance(other, ModelProviderIdentityResult): |
|
if super().__eq__(other): |
|
try: |
|
assert self.provider == other.provider |
|
return True |
|
except AssertionError: |
|
return False |
|
else: |
|
return False |
|
|
|
def to_string(self): |
|
return str(self.provider) |
|
|
|
|
|
class ModelProviderIdentityCheck(ComplianceCheck): |
|
name = "Model Provider Identity" |
|
|
|
def run_check(self, card: BeautifulSoup): |
|
try: |
|
developed_by = card.find("strong", string="Developed by:") |
|
|
|
developer = "".join([str(s) for s in developed_by.next_siblings]).strip() |
|
|
|
if developer == "[More Information Needed]": |
|
return ModelProviderIdentityResult() |
|
|
|
return ModelProviderIdentityResult(status=True, provider=developer) |
|
except AttributeError: |
|
return ModelProviderIdentityResult() |
|
|
|
|
|
class IntendedPurposeResult(ComplianceResult): |
|
name = "Intended Purpose" |
|
|
|
def __init__( |
|
self, |
|
direct_use: str = None, |
|
downstream_use: str = None, |
|
out_of_scope_use: str = None, |
|
*args, |
|
**kwargs, |
|
): |
|
super().__init__(*args, **kwargs) |
|
self.direct_use = direct_use |
|
self.downstream_use = downstream_use |
|
self.out_of_scope_use = out_of_scope_use |
|
|
|
def __eq__(self, other): |
|
if isinstance(other, IntendedPurposeResult): |
|
if super().__eq__(other): |
|
try: |
|
assert self.direct_use == other.direct_use |
|
assert self.downstream_use == other.downstream_use |
|
assert self.out_of_scope_use == other.out_of_scope_use |
|
return True |
|
except AssertionError: |
|
return False |
|
else: |
|
return False |
|
|
|
def to_string(self): |
|
return str((self.direct_use, self.direct_use, self.out_of_scope_use)) |
|
|
|
|
|
class IntendedPurposeCheck(ComplianceCheck): |
|
name = "Intended Purpose" |
|
|
|
def run_check(self, card: BeautifulSoup): |
|
direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use") |
|
|
|
downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]") |
|
out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use") |
|
return IntendedPurposeResult( |
|
status=direct_use_check and out_of_scope_use_check, |
|
direct_use=direct_use_content, |
|
downstream_use=downstream_use_content, |
|
out_of_scope_use=out_of_scope_use_content |
|
) |
|
|
|
|
|
class GeneralLimitationsResult(ComplianceResult): |
|
name = "General Limitations" |
|
|
|
def __init__( |
|
self, |
|
limitations: str = None, |
|
*args, |
|
**kwargs, |
|
): |
|
super().__init__(*args, **kwargs) |
|
self.limitations = limitations |
|
|
|
def __eq__(self, other): |
|
if isinstance(other, GeneralLimitationsResult): |
|
if super().__eq__(other): |
|
try: |
|
assert self.limitations == other.limitations |
|
return True |
|
except AssertionError: |
|
return False |
|
else: |
|
return False |
|
|
|
def to_string(self): |
|
return self.limitations |
|
|
|
|
|
class GeneralLimitationsCheck(ComplianceCheck): |
|
name = "General Limitations" |
|
|
|
def run_check(self, card: BeautifulSoup): |
|
check, content = walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations") |
|
|
|
return GeneralLimitationsResult( |
|
status=check, |
|
limitations=content |
|
) |
|
|
|
|
|
class ComputationalRequirementsResult(ComplianceResult): |
|
name = "Computational Requirements" |
|
|
|
def __init__( |
|
self, |
|
requirements: str = None, |
|
*args, |
|
**kwargs, |
|
): |
|
super().__init__(*args, **kwargs) |
|
self.requirements = requirements |
|
|
|
def __eq__(self, other): |
|
if isinstance(other, ComputationalRequirementsResult): |
|
if super().__eq__(other): |
|
try: |
|
assert self.requirements == other.requirements |
|
return True |
|
except AssertionError: |
|
return False |
|
else: |
|
return False |
|
|
|
def to_string(self): |
|
return self.requirements |
|
|
|
|
|
class ComputationalRequirementsCheck(ComplianceCheck): |
|
name = "Computational Requirements" |
|
|
|
def run_check(self, card: BeautifulSoup): |
|
check, content = walk_to_next_heading(card, "h3", "Compute infrastructure") |
|
|
|
return ComputationalRequirementsResult( |
|
status=check, |
|
requirements=content, |
|
) |
|
|
|
|
|
class ComplianceSuite: |
|
def __init__(self, checks): |
|
self.checks = checks |
|
|
|
def run(self, model_card) -> List[ComplianceResult]: |
|
model_card_html = markdown.markdown(model_card) |
|
card_soup = BeautifulSoup(model_card_html, features="html.parser") |
|
|
|
return [c.run_check(card_soup) for c in self.checks] |
|
|