File size: 6,930 Bytes
11bd448
aae10fc
11bd448
 
 
 
 
b4f5e30
25bf2cc
 
b4f5e30
 
11bd448
b4f5e30
11bd448
b4f5e30
 
 
25bf2cc
b4f5e30
 
25bf2cc
11bd448
b4f5e30
 
11bd448
b4f5e30
 
 
11bd448
b4f5e30
aae10fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4f5e30
aae10fc
 
b4f5e30
 
25bf2cc
b4f5e30
 
aae10fc
 
 
 
 
b4f5e30
11bd448
 
aae10fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25bf2cc
aae10fc
 
25bf2cc
aae10fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25bf2cc
 
 
aae10fc
 
25bf2cc
aae10fc
 
 
 
 
 
25bf2cc
 
11bd448
 
 
 
aae10fc
11bd448
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from abc import ABC, abstractmethod
from typing import Optional, List

import markdown
from bs4 import BeautifulSoup, Comment


def walk_to_next_heading(card, heading, heading_text):
    stop_at = [heading, f"h{int(heading[1]) - 1}"]

    try:
        heading_node = card.find(heading, string=heading_text)

        content = ""

        sibling_gen = heading_node.nextSiblingGenerator()
        sibling = next(sibling_gen)

        while sibling and (not (sibling.name is not None and sibling.name in stop_at) or sibling.name is None):
            if not isinstance(sibling, Comment):
                content = content + sibling.text.strip()
            sibling = next(sibling_gen, None)

        if content.strip() == "[More Information Needed]":
            return False, None

        return True, content
    except AttributeError:
        return False, None


class ComplianceResult(ABC):
    name: str = None

    def __init__(self, status: Optional[bool] = False, *args, **kwargs):
        self.status = status

    def __eq__(self, other):
        try:
            assert self.status == other.status
            return True
        except AssertionError:
            return False

    @abstractmethod
    def to_string(self):
        return "Not Implemented"


class ComplianceCheck(ABC):
    name: str = None

    @abstractmethod
    def run_check(self, card: BeautifulSoup) -> ComplianceResult:
        raise NotImplementedError


class ModelProviderIdentityResult(ComplianceResult):
    name = "Model Provider Identity"

    def __init__(self, provider: str = None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.provider = provider

    def __eq__(self, other):
        if isinstance(other, ModelProviderIdentityResult):
            if super().__eq__(other):
                try:
                    assert self.provider == other.provider
                    return True
                except AssertionError:
                    return False
        else:
            return False

    def to_string(self):
        return str(self.provider)


class ModelProviderIdentityCheck(ComplianceCheck):
    name = "Model Provider Identity"

    def run_check(self, card: BeautifulSoup):
        try:
            developed_by = card.find("strong", string="Developed by:")

            developer = "".join([str(s) for s in developed_by.next_siblings]).strip()

            if developer == "[More Information Needed]":
                return ModelProviderIdentityResult()

            return ModelProviderIdentityResult(status=True, provider=developer)
        except AttributeError:
            return ModelProviderIdentityResult()


class IntendedPurposeResult(ComplianceResult):
    name = "Intended Purpose"

    def __init__(
            self,
            direct_use: str = None,
            downstream_use: str = None,
            out_of_scope_use: str = None,
            *args,
            **kwargs,
    ):
        super().__init__(*args, **kwargs)
        self.direct_use = direct_use
        self.downstream_use = downstream_use
        self.out_of_scope_use = out_of_scope_use

    def __eq__(self, other):
        if isinstance(other, IntendedPurposeResult):
            if super().__eq__(other):
                try:
                    assert self.direct_use == other.direct_use
                    assert self.downstream_use == other.downstream_use
                    assert self.out_of_scope_use == other.out_of_scope_use
                    return True
                except AssertionError:
                    return False
        else:
            return False

    def to_string(self):
        return str((self.direct_use, self.direct_use, self.out_of_scope_use))


class IntendedPurposeCheck(ComplianceCheck):
    name = "Intended Purpose"

    def run_check(self, card: BeautifulSoup):
        direct_use_check, direct_use_content = walk_to_next_heading(card, "h3", "Direct Use")
        # TODO: Handle [optional], which doesn't exist in BLOOM, e.g.
        downstream_use_check, downstream_use_content = walk_to_next_heading(card, "h3", "Downstream Use [optional]")
        out_of_scope_use_check, out_of_scope_use_content = walk_to_next_heading(card, "h3", "Out-of-Scope Use")
        return IntendedPurposeResult(
            status=direct_use_check and out_of_scope_use_check,
            direct_use=direct_use_content,
            downstream_use=downstream_use_content,
            out_of_scope_use=out_of_scope_use_content
        )


class GeneralLimitationsResult(ComplianceResult):
    name = "General Limitations"

    def __init__(
            self,
            limitations: str = None,
            *args,
            **kwargs,
    ):
        super().__init__(*args, **kwargs)
        self.limitations = limitations

    def __eq__(self, other):
        if isinstance(other, GeneralLimitationsResult):
            if super().__eq__(other):
                try:
                    assert self.limitations == other.limitations
                    return True
                except AssertionError:
                    return False
        else:
            return False

    def to_string(self):
        return self.limitations


class GeneralLimitationsCheck(ComplianceCheck):
    name = "General Limitations"

    def run_check(self, card: BeautifulSoup):
        check, content = walk_to_next_heading(card, "h2", "Bias, Risks, and Limitations")

        return GeneralLimitationsResult(
            status=check,
            limitations=content
        )


class ComputationalRequirementsResult(ComplianceResult):
    name = "Computational Requirements"

    def __init__(
            self,
            requirements: str = None,
            *args,
            **kwargs,
    ):
        super().__init__(*args, **kwargs)
        self.requirements = requirements

    def __eq__(self, other):
        if isinstance(other, ComputationalRequirementsResult):
            if super().__eq__(other):
                try:
                    assert self.requirements == other.requirements
                    return True
                except AssertionError:
                    return False
        else:
            return False

    def to_string(self):
        return self.requirements


class ComputationalRequirementsCheck(ComplianceCheck):
    name = "Computational Requirements"

    def run_check(self, card: BeautifulSoup):
        check, content = walk_to_next_heading(card, "h3", "Compute infrastructure")

        return ComputationalRequirementsResult(
            status=check,
            requirements=content,
        )


class ComplianceSuite:
    def __init__(self, checks):
        self.checks = checks

    def run(self, model_card) -> List[ComplianceResult]:
        model_card_html = markdown.markdown(model_card)
        card_soup = BeautifulSoup(model_card_html, features="html.parser")

        return [c.run_check(card_soup) for c in self.checks]