Ark-kun commited on
Commit
560d306
·
1 Parent(s): 1e4a738

WIP - HuggingFace auth

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. README.md +3 -0
  3. huggingface_overlay/start_HuggingFace.py +209 -36
Dockerfile CHANGED
@@ -48,7 +48,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
48
 
49
 
50
  # Installing HuggingFace. Needs to be done after uv sync
51
- RUN uv pip install huggingface_hub[cli]
52
 
53
  # Place executables in the environment at the front of the path
54
  ENV PATH="/app/backend/.venv/bin:$PATH"
 
48
 
49
 
50
  # Installing HuggingFace. Needs to be done after uv sync
51
+ RUN uv pip install huggingface_hub[oauth]
52
 
53
  # Place executables in the environment at the front of the path
54
  ENV PATH="/app/backend/.venv/bin:$PATH"
README.md CHANGED
@@ -7,6 +7,9 @@ sdk: docker
7
  pinned: false
8
  hf_oauth: true
9
  hf_oauth_scopes:
 
 
 
10
  - read-repos
11
  - write-repos
12
  - manage-repos
 
7
  pinned: false
8
  hf_oauth: true
9
  hf_oauth_scopes:
10
+ - contribute-repos
11
+ - jobs
12
+ # Remove this
13
  - read-repos
14
  - write-repos
15
  - manage-repos
huggingface_overlay/start_HuggingFace.py CHANGED
@@ -1,10 +1,14 @@
1
  import logging
2
  import os
3
  import pathlib
 
4
 
5
  import fastapi
 
 
 
 
6
 
7
- # Debug
8
 
9
  # region Paths configuration
10
 
@@ -40,31 +44,10 @@ logs_root_uri = artifacts_root_uri
40
  # endregion
41
 
42
  # region: Launcher configuration
43
- # import docker
44
- # from cloud_pipelines_backend.launchers import local_docker_launchers
45
-
46
- # docker_client = docker.DockerClient.from_env(timeout=5)
47
- # _ = docker_client.version()
48
-
49
- # launcher = local_docker_launchers.DockerContainerLauncher(
50
- # client=docker_client,
51
- # )
52
- launcher = None
53
- try:
54
- from cloud_pipelines_backend.launchers import huggingface_launchers
55
 
56
- launcher = huggingface_launchers.HuggingFaceJobsContainerLauncher()
57
- except Exception as ex:
58
- print(ex)
59
- pass
60
-
61
- try:
62
- import huggingface_hub
63
-
64
- huggingface_hub.list_repo_tree(repo_id="Ark-kun/tangle_data", repo_type="dataset")
65
- except Exception as ex:
66
- print(ex)
67
- pass
68
 
69
  # endregion
70
 
@@ -76,24 +59,194 @@ sleep_seconds_between_queue_sweeps: float = 5.0
76
  # region: Authentication configuration
77
  import fastapi
78
 
79
- ADMIN_USER_NAME = "admin"
80
- default_component_library_owner_username = ADMIN_USER_NAME
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  # ! This function is just a placeholder for user authentication and authorization so that every request has a user name and permissions.
84
  # ! This placeholder function authenticates the user as user with name "admin" and read/write/admin permissions.
85
  # ! In a real multi-user deployment, the `get_user_details` function MUST be replaced with real authentication/authorization based on OAuth or another auth system.
86
- def get_user_details(request: fastapi.Request):
87
- return api_router.UserDetails(
88
- name=ADMIN_USER_NAME,
89
- permissions=api_router.Permissions(
90
- read=True,
91
- write=True,
92
- admin=True,
93
- ),
94
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
 
 
97
  # endregion
98
 
99
 
@@ -270,6 +423,26 @@ def health_check():
270
  return {}
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  # Mounting the web app if the files exist
274
  this_dir = pathlib.Path(__file__).parent
275
  web_app_search_dirs = [
 
1
  import logging
2
  import os
3
  import pathlib
4
+ import typing
5
 
6
  import fastapi
7
+ import huggingface_hub
8
+ import huggingface_hub.errors
9
+
10
+ ENABLE_HUGGINGFACE_AUTH = True
11
 
 
12
 
13
  # region Paths configuration
14
 
 
44
  # endregion
45
 
46
  # region: Launcher configuration
47
+ from cloud_pipelines_backend.launchers import huggingface_launchers
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Requires HF_TOKEN
50
+ launcher = huggingface_launchers.HuggingFaceJobsContainerLauncher()
 
 
 
 
 
 
 
 
 
 
51
 
52
  # endregion
53
 
 
59
  # region: Authentication configuration
60
  import fastapi
61
 
62
+ print(f"{os.environ=}")
63
+
64
+ print(f'{os.environ["PERSISTENT_STORAGE_ENABLED"]=}')
65
+
66
+ hf_space_author_name = os.environ.get("SPACE_AUTHOR_NAME")
67
+ hf_space_creator_user_id = os.environ.get("SPACE_CREATOR_USER_ID")
68
+ print(f"{hf_space_author_name=}")
69
+ print(f"{hf_space_creator_user_id=}")
70
+
71
+ hf_token: str | None = None
72
+ try:
73
+ hf_token = huggingface_hub.get_token()
74
+ except Exception as ex:
75
+ logging.error("Error in `huggingface_hub.get_token()`")
76
+
77
+ print(f"{(hf_token is not None)=}")
78
+
79
+ hf_whoami: dict | None = None
80
+ hf_whoami_user_name: str | None = None
81
+ try:
82
+ hf_whoami = huggingface_hub.whoami()
83
+ hf_whoami_user_name = hf_whoami.get("name") if hf_whoami else None
84
+ except Exception as ex:
85
+ logging.error("Error in `hugginface_hub.whoami()`")
86
+
87
+ print(f"{hf_whoami=}")
88
+ print(f"{hf_whoami_user_name=}")
89
 
90
 
91
  # ! This function is just a placeholder for user authentication and authorization so that every request has a user name and permissions.
92
  # ! This placeholder function authenticates the user as user with name "admin" and read/write/admin permissions.
93
  # ! In a real multi-user deployment, the `get_user_details` function MUST be replaced with real authentication/authorization based on OAuth or another auth system.
94
+ # ADMIN_USER_NAME = "admin"
95
+
96
+ # FIX: Set to False by default
97
+ # any_user_can_read = os.environ.get("ANY_USER_CAN_READ", "false").lower() == "true"
98
+ any_user_can_read = os.environ.get("ANY_USER_CAN_READ", "true").lower() == "true"
99
+ print(f"{any_user_can_read=}")
100
+
101
+ IS_HUGGINGFACE_SPACE = hf_space_author_name is not None
102
+ print(f"{IS_HUGGINGFACE_SPACE=}")
103
+
104
+ if IS_HUGGINGFACE_SPACE:
105
+ ADMIN_USER_NAME = hf_space_author_name
106
+ print(f"{ADMIN_USER_NAME=}")
107
+
108
+ default_component_library_owner_username = ADMIN_USER_NAME
109
+
110
+ # Single-tenant
111
+ # Selecting the tenant. It's the user or arg that host the space.
112
+ tenant_name = hf_space_author_name
113
+
114
+ # We need to be careful and prevent public spaces with HF_TOKEN set from letting anyone exploit the HF_TOKEN user.
115
+ def get_user_details(request: fastapi.Request):
116
+ user_can_read = False
117
+ user_can_write = False
118
+ user_can_admin = False
119
+ user_can_read = user_can_read or any_user_can_read
120
+
121
+ oauth_info = huggingface_hub.parse_huggingface_oauth(request)
122
+ # if "USER_PERMISSIONS_MAP" in os.environ:
123
+ # ...
124
+
125
+ if oauth_info:
126
+ logger.info(f"{oauth_info=}")
127
+ logger.info(f"{oauth_info.user_info=}")
128
+ logger.info(f"{oauth_info.user_info.is_pro=}")
129
+ logger.info(f"{oauth_info.user_info.can_pay=}")
130
+ # TODO: Allow access for users belonging to an allowed org
131
+
132
+ user_is_space_author = (
133
+ oauth_info.user_info.preferred_username == hf_space_author_name
134
+ )
135
+ user_is_space_author_by_id = (
136
+ oauth_info.user_info.sub == hf_space_creator_user_id
137
+ )
138
+ # oauth_info.user_info.orgs[0].role_in_org
139
+ user_belongs_to_space_org = any(
140
+ org.preferred_username == hf_space_author_name
141
+ for org in oauth_info.user_info.orgs or []
142
+ )
143
+ logger.info(f"{user_belongs_to_space_org=}")
144
+ logger.info(f"{user_is_space_author=}")
145
+ logger.info(f"{user_is_space_author_by_id=}")
146
+
147
+ user_can_write = user_can_write or user_is_space_author
148
+ user_can_admin = user_can_admin or user_is_space_author
149
+
150
+ try:
151
+ # Checking user's role in the space org:
152
+ # For some reason, in OAuth_info, orgs are always empty.
153
+ # Getting the info using whoami
154
+ # This leads to extra HF API requests. Find a better way to fix.
155
+ logger.info(f"{huggingface_hub.whoami(token=oauth_info.access_token)=}")
156
+ oauth_whoami_user_info = huggingface_hub.whoami(
157
+ token=oauth_info.access_token
158
+ )
159
+ user_orgs = oauth_whoami_user_info.get("orgs", [])
160
+ space_org_candidates = [
161
+ user_org
162
+ for user_org in user_orgs
163
+ # Does not work: hf_space_creator_user_id is the creator user ID, not the space org ID
164
+ # if user_org.get("id") == hf_space_creator_user_id
165
+ if user_org.get("name") == hf_space_author_name
166
+ ]
167
+ if space_org_candidates:
168
+ space_org = space_org_candidates[0]
169
+ logger.info(f"{space_org=}")
170
+ user_role_in_org = space_org.get("roleInOrg")
171
+ logger.info(f"{user_role_in_org=}")
172
+
173
+ if user_role_in_org == "admin":
174
+ user_can_read = True
175
+ user_can_write = True
176
+ user_can_admin = True
177
+ elif user_role_in_org in ("write", "contribute"):
178
+ user_can_read = True
179
+ user_can_write = True
180
+ elif user_role_in_org == "read":
181
+ user_can_read = True
182
+ else:
183
+ pass
184
+
185
+ user_details = api_router.UserDetails(
186
+ name=oauth_info.user_info.preferred_username,
187
+ permissions=api_router.Permissions(
188
+ read=user_can_read,
189
+ write=user_can_write,
190
+ admin=user_can_admin,
191
+ ),
192
+ )
193
+ logger.info(f"{user_details=}")
194
+ return user_details
195
+ except huggingface_hub.errors.HfHubHTTPError as ex:
196
+ # Maybe redirect to logout or login API?
197
+ # Does not work. The browser is not redirected
198
+ # logger.error(
199
+ # f"Error getting authentication info from HuggingFace. Redirecting to login",
200
+ # exc_info=True,
201
+ # )
202
+ # raise fastapi.HTTPException(
203
+ # status_code=302,
204
+ # detail="Authorization error",
205
+ # # headers={"Location": "/api/oauth/huggingface/logout"},
206
+ # headers={"Location": "/api/oauth/huggingface/login"},
207
+ # )
208
+ if ex.response and ex.response.status_code == 401:
209
+ logger.error(
210
+ f"Error getting authentication info from HuggingFace. Deleting session OAuth info",
211
+ exc_info=True,
212
+ )
213
+ request.session.pop("oauth_info", None)
214
+ else:
215
+ logger.error(
216
+ f"Error getting authentication info from HuggingFace.",
217
+ exc_info=True,
218
+ )
219
+
220
+ return api_router.UserDetails(
221
+ name="anonymous",
222
+ permissions=api_router.Permissions(
223
+ read=any_user_can_read,
224
+ write=False,
225
+ admin=False,
226
+ ),
227
+ )
228
+
229
+ else:
230
+ # We're not in space.
231
+ ADMIN_USER_NAME = hf_whoami_user_name or "admin"
232
+ print(f"{ADMIN_USER_NAME=}")
233
+
234
+ default_component_library_owner_username = ADMIN_USER_NAME
235
+
236
+ # We need to be careful and prevent public spaces with HF_TOKEN set from letting anyone exploit the HF_TOKEN user.
237
+ def get_user_details(request: fastapi.Request):
238
+ return api_router.UserDetails(
239
+ name=ADMIN_USER_NAME,
240
+ permissions=api_router.Permissions(
241
+ read=True,
242
+ write=True,
243
+ admin=True,
244
+ ),
245
+ )
246
 
247
 
248
+ # !!! TODO: Use authenticated user's token to run Jobs via launcher.
249
+
250
  # endregion
251
 
252
 
 
423
  return {}
424
 
425
 
426
+ # @app.get("/api/users/me")
427
+ # def get_current_user(
428
+ # user_details: typing.Annotated[
429
+ # api_router.UserDetails | None, fastapi.Depends(get_user_details)
430
+ # ],
431
+ # ) -> api_router.UserDetails | None:
432
+ # return user_details
433
+
434
+
435
+ # Setting up HuggingFace auth.
436
+ # if "HF_TOKEN" in os.environ:
437
+
438
+ if ENABLE_HUGGINGFACE_AUTH:
439
+ if "OAUTH_CLIENT_SECRET" not in os.environ:
440
+ logger.warning(
441
+ "HuggingFace auth is enabled, but OAUTH_CLIENT_SECRET env variable is is missing."
442
+ )
443
+ huggingface_hub.attach_huggingface_oauth(app, route_prefix="/api/")
444
+
445
+
446
  # Mounting the web app if the files exist
447
  this_dir = pathlib.Path(__file__).parent
448
  web_app_search_dirs = [