NimaBoscarino commited on
Commit
7a3d7a6
1 Parent(s): 998362d
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="1">
8
+ <item index="0" class="java.lang.String" itemvalue="pytest-runner" />
9
+ </list>
10
+ </value>
11
+ </option>
12
+ </inspection_tool>
13
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
14
+ <option name="ignoredErrors">
15
+ <list>
16
+ <option value="N801" />
17
+ <option value="N806" />
18
+ </list>
19
+ </option>
20
+ </inspection_tool>
21
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
22
+ <option name="ignoredIdentifiers">
23
+ <list>
24
+ <option value="dict.labels" />
25
+ <option value="geograpy.places.countries" />
26
+ </list>
27
+ </option>
28
+ </inspection_tool>
29
+ </profile>
30
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (sandbox)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/model-card-regulatory-check.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.10 (sandbox)" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/model-card-regulatory-check.iml" filepath="$PROJECT_DIR$/.idea/model-card-regulatory-check.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim-bullseye
2
+
3
+ # Set the working directory to /code
4
+ WORKDIR /code
5
+
6
+ # Copy the current directory contents into the container at /code
7
+ COPY ./requirements.txt /code/requirements.txt
8
+
9
+ # Install requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
+
12
+ # Set up a new user named "user" with user ID 1000
13
+ RUN useradd -m -u 1000 user
14
+ # Switch to the "user" user
15
+ USER user
16
+ # Set home to the user's home directory
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+
20
+ # Set the working directory to the user's home directory
21
+ WORKDIR $HOME/app
22
+
23
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
24
+ COPY --chown=user . $HOME/app
25
+
26
+ EXPOSE 7860
27
+ CMD ["uvicorn", "server:app","--proxy-headers", "--host", "0.0.0.0", "--port", "7860"]
__pycache__/main.cpython-310.pyc ADDED
Binary file (5.1 kB). View file
 
__pycache__/server.cpython-310.pyc ADDED
Binary file (803 Bytes). View file
 
main.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, Any, Optional, List
3
+ import re
4
+ from abc import ABC, abstractmethod
5
+
6
+ from huggingface_hub import (ModelCard, comment_discussion,
7
+ create_discussion, get_discussion_details,
8
+ get_repo_discussions)
9
+ import markdown
10
+ from bs4 import BeautifulSoup
11
+ from tabulate import tabulate
12
+ from difflib import SequenceMatcher
13
+
14
+ KEY = os.environ.get("KEY")
15
+
16
+
17
+ def similar(a, b):
18
+ """Check similarity of two sequences"""
19
+ return SequenceMatcher(None, a, b).ratio()
20
+
21
+
22
+ class ComplianceCheck(ABC):
23
+ def __init__(self, name):
24
+ self.name = name
25
+
26
+ @abstractmethod
27
+ def check(self, card: BeautifulSoup) -> bool:
28
+ raise NotImplementedError
29
+
30
+
31
+ class ModelProviderIdentityCheck(ComplianceCheck):
32
+ def __init__(self):
33
+ super().__init__("Identity and Contact Details")
34
+
35
+ def check(self, card: BeautifulSoup):
36
+ developed_by_li = card.findAll(text=re.compile("Developed by"))[0].parent.parent
37
+ developed_by = list(developed_by_li.children)[1].text.strip()
38
+
39
+ if developed_by == "[More Information Needed]":
40
+ return False
41
+ else:
42
+ return True
43
+
44
+
45
+ class IntendedPurposeCheck(ComplianceCheck):
46
+ def __init__(self):
47
+ super().__init__("Intended Purpose")
48
+
49
+ def check(self, card: BeautifulSoup):
50
+
51
+ # direct_use = card.find_all("h2", text="Direct Use")[0]
52
+ #
53
+ # if developed_by == "[More Information Needed]":
54
+ # return False
55
+ # else:
56
+ return False
57
+
58
+
59
+ compliance_checks = [
60
+ ModelProviderIdentityCheck(),
61
+ IntendedPurposeCheck()
62
+ # "General Limitations",
63
+ # "Computational and Hardware Requirements",
64
+ # "Carbon Emissions"
65
+ ]
66
+
67
+
68
+ def parse_webhook_post(data: Dict[str, Any]) -> Optional[str]:
69
+ event = data["event"]
70
+ if event["scope"] != "repo":
71
+ return None
72
+ repo = data["repo"]
73
+ repo_name = repo["name"]
74
+ repo_type = repo["type"]
75
+ if repo_type != "model":
76
+ raise ValueError("Incorrect repo type.")
77
+ return repo_name
78
+
79
+
80
+ def check_compliance(comp_checks: List[ComplianceCheck], card: BeautifulSoup) -> Dict[str, bool]:
81
+ return {c.name: c.check(card) for c in comp_checks}
82
+
83
+
84
+ def run_compliance_check(repo_name):
85
+ card_data: ModelCard = ModelCard.load(repo_id_or_path=repo_name)
86
+ card_html = markdown.markdown(card_data.content)
87
+ card_soup = BeautifulSoup(card_html, features="html.parser")
88
+ compliance_results = check_compliance(compliance_checks, card_soup)
89
+
90
+ return compliance_results
91
+
92
+
93
+ def create_metadata_breakdown_table(compliance_check_dictionary):
94
+ data = {k: v for k, v in compliance_check_dictionary.items()}
95
+ metadata_fields_column = list(data.keys())
96
+ metadata_values_column = list(data.values())
97
+ table_data = list(zip(metadata_fields_column, metadata_values_column))
98
+ return tabulate(
99
+ table_data, tablefmt="github", headers=("Compliance Check", "Present")
100
+ )
101
+
102
+
103
+ def create_markdown_report(
104
+ desired_metadata_dictionary, repo_name, update: bool = False
105
+ ):
106
+ report = f"""# Model Card Regulatory Compliance report card {"(updated)" if update else ""}
107
+ \n
108
+ This is an automatically produced model card regulatory compliance report card for {repo_name}.
109
+ This report is meant as a POC!
110
+ \n
111
+ ## Breakdown of metadata fields for your model
112
+ \n
113
+ {create_metadata_breakdown_table(desired_metadata_dictionary)}
114
+ \n
115
+ """
116
+ return report
117
+
118
+
119
+ def create_or_update_report(compliance_check, repo_name):
120
+ report = create_markdown_report(
121
+ compliance_check, repo_name, update=False
122
+ )
123
+ repo_discussions = get_repo_discussions(
124
+ repo_name,
125
+ repo_type="model",
126
+ )
127
+ for discussion in repo_discussions:
128
+ if (
129
+ discussion.title == "Metadata Report Card" and discussion.status == "open"
130
+ ): # An existing open report card thread
131
+ discussion_details = get_discussion_details(
132
+ repo_name, discussion.num, repo_type="model"
133
+ )
134
+ last_comment = discussion_details.events[-1].content
135
+ if similar(report, last_comment) <= 0.999:
136
+ report = create_markdown_report(
137
+ compliance_check,
138
+ repo_name,
139
+ update=True,
140
+ )
141
+ comment_discussion(
142
+ repo_name,
143
+ discussion.num,
144
+ comment=report,
145
+ repo_type="model",
146
+ )
147
+ return True
148
+ create_discussion(
149
+ repo_name,
150
+ "Model Card Regulatory Compliance Report Card",
151
+ description=report,
152
+ repo_type="model",
153
+ )
154
+ return True
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ markdown
4
+ beautifulsoup4
5
+ tabulate
server.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, Request, Response
3
+ from main import parse_webhook_post, run_compliance_check, create_or_update_report
4
+
5
+ KEY = os.environ.get("KEY")
6
+
7
+ app = FastAPI()
8
+
9
+
10
+ @app.post("/webhook")
11
+ async def webhook(request: Request):
12
+ if request.method == "POST":
13
+ # if request.headers.get("X-Webhook-Secret") != KEY:
14
+ # return Response("Invalid secret", status_code=401)
15
+
16
+ data = await request.json()
17
+
18
+ if parsed_post := parse_webhook_post(data):
19
+ repo_name = parsed_post
20
+ else:
21
+ return Response("Unable to parse webhook data", status_code=400)
22
+
23
+ compliance_check = run_compliance_check(repo_name)
24
+ result = create_or_update_report(compliance_check, repo_name)
25
+ return result