ashhadahsan commited on
Commit
5493c06
β€’
1 Parent(s): df54e0f

Upload 3 files

Browse files
utils/openllmapi/api.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from requests import Session
2
+ import requests
3
+ import json
4
+ import os
5
+ import uuid
6
+ import logging
7
+ import re
8
+ import getpass
9
+
10
+ from .exceptions import *
11
+
12
+
13
+ class ChatBot:
14
+ cookies: dict
15
+ """Cookies for authentication"""
16
+
17
+ session: Session
18
+ """HuggingChat session"""
19
+
20
+ def __init__(self, cookies: dict = None, cookie_path: str = "") -> None:
21
+ if cookies is None and cookie_path == "":
22
+ raise ChatBotInitError(
23
+ "Authentication is required now, but no cookies provided. See tutorial at https://github.com/Soulter/hugging-chat-api"
24
+ )
25
+ elif cookies is not None and cookie_path != "":
26
+ raise ChatBotInitError("Both cookies and cookie_path provided")
27
+
28
+ if cookies is None and cookie_path != "":
29
+ # read cookies from path
30
+ if not os.path.exists(cookie_path):
31
+ raise ChatBotInitError(
32
+ f"Cookie file {cookie_path} not found. Note: The file must be in JSON format and must contain a list of cookies. See more at https://github.com/Soulter/hugging-chat-api"
33
+ )
34
+ with open(cookie_path, "r") as f:
35
+ cookies = json.load(f)
36
+
37
+ # convert cookies to KV format
38
+ if isinstance(cookies, list):
39
+ cookies = {cookie["name"]: cookie["value"] for cookie in cookies}
40
+
41
+ self.cookies = cookies
42
+
43
+ self.hf_base_url = "https://huggingface.co"
44
+ self.json_header = {"Content-Type": "application/json"}
45
+ self.session = self.get_hc_session()
46
+ self.conversation_id_list = []
47
+ self.__not_summarize_cids = []
48
+ self.active_model = "meta-llama/Llama-2-70b-chat-hf"
49
+ self.accepted_welcome_modal = (
50
+ False # Only when accepted, it can create a new conversation.
51
+ )
52
+ self.current_conversation = self.new_conversation()
53
+
54
+ def get_hc_session(self) -> Session:
55
+ session = Session()
56
+ # set cookies
57
+ session.cookies.update(self.cookies)
58
+ session.get(self.hf_base_url + "/chat")
59
+ return session
60
+
61
+ def get_headers(self, ref=True, ref_cid=None) -> dict:
62
+ _h = {
63
+ "Accept": "*/*",
64
+ "Connection": "keep-alive",
65
+ "Host": "huggingface.co",
66
+ "Origin": "https://huggingface.co",
67
+ "Sec-Fetch-Site": "same-origin",
68
+ "Content-Type": "application/json",
69
+ "Sec-Ch-Ua-Platform": "Windows",
70
+ "Sec-Ch-Ua": 'Chromium";v="116", "Not)A;Brand";v="24", "Microsoft Edge";v="116',
71
+ "Sec-Ch-Ua-Mobile": "?0",
72
+ "Sec-Fetch-Mode": "cors",
73
+ "Sec-Fetch-Dest": "empty",
74
+ "Accept-Encoding": "gzip, deflate, br",
75
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36",
76
+ }
77
+ if ref:
78
+ if ref_cid is None:
79
+ ref_cid = self.current_conversation
80
+ _h["Referer"] = f"https://huggingface.co/chat/conversation/{ref_cid}"
81
+ return _h
82
+
83
+ def get_cookies(self) -> dict:
84
+ return self.session.cookies.get_dict()
85
+
86
+ # NOTE: To create a copy when calling this, call it inside of list().
87
+ # If not, when updating or altering the values in the variable will
88
+ # also be applied to this class's variable.
89
+ # This behaviour is with any function returning self.<var_name>. It
90
+ # acts as a pointer to the data in the object.
91
+ #
92
+ # Returns a pointer to this objects list that contains id of conversations.
93
+ def get_conversation_list(self) -> list:
94
+ return list(self.conversation_id_list)
95
+
96
+ def accept_ethics_modal(self):
97
+ """
98
+ [Deprecated Method]
99
+ """
100
+ response = self.session.post(
101
+ self.hf_base_url + "/chat/settings",
102
+ headers=self.get_headers(ref=False),
103
+ cookies=self.get_cookies(),
104
+ allow_redirects=True,
105
+ data={
106
+ "ethicsModalAccepted": "true",
107
+ "shareConversationsWithModelAuthors": "true",
108
+ "ethicsModalAcceptedAt": "",
109
+ "activeModel": str(self.active_model),
110
+ },
111
+ )
112
+
113
+ if response.status_code != 200:
114
+ raise Exception(
115
+ f"Failed to accept ethics modal with status code {response.status_code}. {response.content.decode()}"
116
+ )
117
+
118
+ return True
119
+
120
+ def new_conversation(self) -> str:
121
+ """
122
+ Create a new conversation. Return the new conversation id. You should change the conversation by calling change_conversation() after calling this method.
123
+ """
124
+ err_count = 0
125
+
126
+ # Accept the welcome modal when init.
127
+ # 17/5/2023: This is not required anymore.
128
+ # if not self.accepted_welcome_modal:
129
+ # self.accept_ethics_modal()
130
+
131
+ # Create new conversation and get a conversation id.
132
+
133
+ _header = self.get_headers(ref=False)
134
+ _header["Referer"] = "https://huggingface.co/chat"
135
+
136
+ resp = ""
137
+ while True:
138
+ try:
139
+ resp = self.session.post(
140
+ self.hf_base_url + "/chat/conversation",
141
+ json={"model": self.active_model},
142
+ headers=_header,
143
+ cookies=self.get_cookies(),
144
+ )
145
+ # print("new conversation")
146
+ # print(resp.text)
147
+ logging.debug(resp.text)
148
+ cid = json.loads(resp.text)["conversationId"]
149
+ self.conversation_id_list.append(cid)
150
+ self.__not_summarize_cids.append(
151
+ cid
152
+ ) # For the 1st chat, the conversation needs to be summarized.
153
+ self.__preserve_context(cid=cid, ending="1_1")
154
+ return cid
155
+
156
+ except BaseException as e:
157
+ err_count += 1
158
+ logging.debug(
159
+ f" Failed to create new conversation. Retrying... ({err_count})"
160
+ )
161
+ if err_count > 5:
162
+ raise CreateConversationError(
163
+ f"Failed to create new conversation. ({err_count})"
164
+ )
165
+ continue
166
+
167
+ def change_conversation(self, conversation_id: str) -> bool:
168
+ """
169
+ Change the current conversation to another one. Need a valid conversation id.
170
+ """
171
+ if conversation_id not in self.conversation_id_list:
172
+ raise InvalidConversationIDError(
173
+ "Invalid conversation id, not in conversation list."
174
+ )
175
+ self.current_conversation = conversation_id
176
+ return True
177
+
178
+ def summarize_conversation(self, conversation_id: str = None) -> str:
179
+ """
180
+ Return a summary of the conversation.
181
+ """
182
+ if conversation_id is None:
183
+ conversation_id = self.current_conversation
184
+
185
+ headers = self.get_headers(ref=True)
186
+ r = self.session.post(
187
+ f"{self.hf_base_url}/chat/conversation/{conversation_id}/summarize",
188
+ headers=headers,
189
+ cookies=self.get_cookies(),
190
+ )
191
+
192
+ if r.status_code != 200:
193
+ raise Exception(
194
+ f"Failed to send chat message with status code: {r.status_code}"
195
+ )
196
+
197
+ response = r.json()
198
+ # print(response)
199
+ if "title" in response:
200
+ return response["title"]
201
+
202
+ raise Exception(f"Unknown server response: {response}")
203
+
204
+ def share_conversation(self, conversation_id: str = None) -> str:
205
+ """
206
+ Return a share link of the conversation.
207
+ """
208
+ if conversation_id is None:
209
+ conversation_id = self.current_conversation
210
+
211
+ headers = self.get_headers()
212
+
213
+ r = self.session.post(
214
+ f"{self.hf_base_url}/chat/conversation/{conversation_id}/share",
215
+ headers=headers,
216
+ cookies=self.get_cookies(),
217
+ )
218
+
219
+ if r.status_code != 200:
220
+ raise Exception(
221
+ f"Failed to send chat message with status code: {r.status_code}"
222
+ )
223
+
224
+ response = r.json()
225
+ if "url" in response:
226
+ return response["url"]
227
+
228
+ raise Exception(f"Unknown server response: {response}")
229
+
230
+ def delete_conversation(self, conversation_id: str = None) -> bool:
231
+ """
232
+ Delete a HuggingChat conversation by conversation_id.
233
+ """
234
+
235
+ if conversation_id is None:
236
+ raise DeleteConversationError("conversation_id is required.")
237
+
238
+ headers = self.get_headers()
239
+
240
+ r = self.session.delete(
241
+ f"{self.hf_base_url}/chat/conversation/{conversation_id}",
242
+ headers=headers,
243
+ cookies=self.get_cookies(),
244
+ )
245
+
246
+ if r.status_code != 200:
247
+ raise DeleteConversationError(
248
+ f"Failed to delete conversation with status code: {r.status_code}"
249
+ )
250
+
251
+ def get_available_llm_models(self) -> list:
252
+ """
253
+ Get all available models that exists in huggingface.co/chat.
254
+ Returns a hard-code array. The array is up to date.
255
+ """
256
+ return [
257
+ "OpenAssistant/oasst-sft-6-llama-30b-xor",
258
+ "meta-llama/Llama-2-70b-chat-hf",
259
+ "codellama/CodeLlama-34b-Instruct-hf",
260
+ ]
261
+
262
+ def set_share_conversations(self, val: bool = True):
263
+ setting = {
264
+ "ethicsModalAcceptedAt": "",
265
+ "searchEnabled": "true",
266
+ "activeModel": "meta-llama/Llama-2-70b-chat-hf",
267
+ }
268
+ if val:
269
+ setting["shareConversationsWithModelAuthors"] = "on"
270
+
271
+ response = self.session.post(
272
+ self.hf_base_url + "/chat/settings",
273
+ headers=self.get_headers(ref=True),
274
+ cookies=self.get_cookies(),
275
+ allow_redirects=True,
276
+ data=setting,
277
+ )
278
+
279
+ def switch_llm(self, to: int) -> bool:
280
+ """
281
+ Attempts to change current conversation's Large Language Model.
282
+ Requires an index to indicate the model you want to switch.
283
+ For now, 0 is `OpenAssistant/oasst-sft-6-llama-30b-xor`, 1 is `meta-llama/Llama-2-70b-chat-hf`, 2 is 'codellama/CodeLlama-34b-Instruct-hf' :)
284
+
285
+ * llm 1 is the latest LLM.
286
+ * REMEMBER: For flexibility, the effect of switch just limited to *current conversation*. You can manually switch llm when you change a conversasion.
287
+ """
288
+
289
+ llms = [
290
+ "OpenAssistant/oasst-sft-6-llama-30b-xor",
291
+ "meta-llama/Llama-2-70b-chat-hf",
292
+ "codellama/CodeLlama-34b-Instruct-hf",
293
+ ]
294
+
295
+ mdl = ""
296
+ if to == 0:
297
+ mdl = ("OpenAssistant/oasst-sft-6-llama-30b-xor",)
298
+ elif to == 1:
299
+ mdl = "meta-llama/Llama-2-70b-chat-hf"
300
+ elif to == 2:
301
+ mdl = "codellama/CodeLlama-34b-Instruct-hf"
302
+ else:
303
+ raise BaseException(
304
+ "Can't switch llm, unexpected index. For now, 0 is `OpenAssistant/oasst-sft-6-llama-30b-xor`, 1 is `meta-llama/Llama-2-70b-chat-hf`, 2 is 'codellama/CodeLlama-34b-Instruct-hf':)"
305
+ )
306
+
307
+ response = self.session.post(
308
+ self.hf_base_url + "/chat/settings",
309
+ headers=self.get_headers(ref=True),
310
+ cookies=self.get_cookies(),
311
+ allow_redirects=True,
312
+ data={
313
+ "shareConversationsWithModelAuthors": "on",
314
+ "ethicsModalAcceptedAt": "",
315
+ "searchEnabled": "true",
316
+ "activeModel": mdl,
317
+ },
318
+ )
319
+
320
+ check = self.check_operation()
321
+ if check:
322
+ return True
323
+ else:
324
+ print(
325
+ f"Switch LLM {llms[to]} failed. Please submit an issue to https://github.com/Soulter/hugging-chat-api"
326
+ )
327
+ return False
328
+
329
+ def check_operation(self) -> bool:
330
+ r = self.session.post(
331
+ self.hf_base_url
332
+ + f"/chat/conversation/{self.current_conversation}/__data.json?x-sveltekit-invalidated=1_1",
333
+ headers=self.get_headers(ref=True),
334
+ cookies=self.get_cookies(),
335
+ )
336
+ return r.status_code == 200
337
+
338
+ # def _web_search(self, prompt: str) -> bool:
339
+ # print("searching on web ...")
340
+ # r = self.session.get(self.hf_base_url + f"/chat/conversation/{self.current_conversation}/web-search?prompt={prompt}", headers=self.get_headers(ref=True), cookies=self.get_cookies(), timeout=300)
341
+ # print("done")
342
+ # return r.status_code == 200
343
+
344
+ def chat(
345
+ self,
346
+ text: str,
347
+ # web_search: bool=False,
348
+ temperature: float = 0.1,
349
+ top_p: float = 0.95,
350
+ repetition_penalty: float = 1.2,
351
+ top_k: int = 50,
352
+ truncate: int = 1000,
353
+ watermark: bool = False,
354
+ max_new_tokens: int = 1024,
355
+ stop: list = ["</s>"],
356
+ return_full_text: bool = False,
357
+ stream: bool = True,
358
+ use_cache: bool = False,
359
+ is_retry: bool = False,
360
+ retry_count: int = 5,
361
+ ):
362
+ """
363
+ Send a message to the current conversation. Return the response text.
364
+ You can customize these optional parameters.
365
+ You can turn on the web search by set the parameter `web_search` to True
366
+ """
367
+
368
+ if retry_count <= 0:
369
+ raise Exception("the parameter retry_count must be greater than 0.")
370
+ if self.current_conversation == "":
371
+ self.current_conversation = self.new_conversation()
372
+ if text == "":
373
+ raise Exception("the prompt can not be empty.")
374
+
375
+ # Invoke Web Search API
376
+ # if web_search:
377
+ # res = self._web_search(text)
378
+ # if not res:
379
+ # print("Web search may failed.")
380
+ options_id = str(uuid.uuid4())
381
+ options_rid = str(uuid.uuid4())
382
+
383
+ req_json = {
384
+ "inputs": text,
385
+ "parameters": {
386
+ "temperature": temperature,
387
+ "top_p": top_p,
388
+ "repetition_penalty": repetition_penalty,
389
+ "top_k": top_k,
390
+ "truncate": truncate,
391
+ "watermark": watermark,
392
+ "max_new_tokens": max_new_tokens,
393
+ "stop": stop,
394
+ "return_full_text": return_full_text,
395
+ "stream": stream,
396
+ },
397
+ "options": {
398
+ "use_cache": use_cache,
399
+ "is_retry": is_retry,
400
+ "id": str(uuid.uuid4()),
401
+ },
402
+ "stream": True,
403
+ }
404
+
405
+ # if web_search:
406
+ # req_json["options"]["web_search_id"] = str(uuid.uuid4()).replace("-","")[0:24]
407
+ # print(req_json)
408
+ # print(self.session.cookies.get_dict())
409
+ # print(f"https://huggingface.co/chat/conversation/{self.now_conversation}")
410
+ headers = {
411
+ "Origin": "https://huggingface.co",
412
+ "Referer": f"https://huggingface.co/chat/conversation/{self.current_conversation}",
413
+ "Content-Type": "application/json",
414
+ "Sec-ch-ua": '"Chromium";v="94", "Microsoft Edge";v="94", ";Not A Brand";v="99"',
415
+ "Sec-ch-ua-mobile": "?0",
416
+ "Sec-ch-ua-platform": '"Windows"',
417
+ "Accept": "*/*",
418
+ "Accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
419
+ }
420
+
421
+ while retry_count > 0:
422
+ resp = self.session.post(
423
+ self.hf_base_url + f"/chat/conversation/{self.current_conversation}",
424
+ json=req_json,
425
+ stream=True,
426
+ headers=headers,
427
+ cookies=self.session.cookies.get_dict(),
428
+ )
429
+ res_text = ""
430
+
431
+ if resp.status_code != 200:
432
+ retry_count -= 1
433
+ if retry_count <= 0:
434
+ raise ChatError(f"Failed to chat. ({resp.status_code})")
435
+
436
+ for line in resp.iter_lines():
437
+ if line:
438
+ res = line.decode("utf-8")
439
+ try:
440
+ # print(f"line: {res}")
441
+ obj = json.loads(res[5:])
442
+ except:
443
+ if '{"error":"Model is overloaded"' in res:
444
+ raise ModelOverloadedError(
445
+ "Model is overloaded, please try again later."
446
+ )
447
+ raise ChatError(f"Failed to parse response: {res}")
448
+ if "generated_text" in obj:
449
+ if obj["token"]["text"].endswith("</s>"):
450
+ res_text += obj["token"]["text"][:-5]
451
+ else:
452
+ res_text += obj["token"]["text"]
453
+ elif "error" in obj:
454
+ raise ChatError(obj["error"])
455
+ # try to summarize the conversation and preserve the context.
456
+ try:
457
+ if self.current_conversation in self.__not_summarize_cids:
458
+ self.summarize_conversation()
459
+ self.__not_summarize_cids.remove(self.current_conversation)
460
+ self.__preserve_context(ref_cid=self.current_conversation)
461
+ except:
462
+ pass
463
+
464
+ return res_text.strip()
465
+
466
+ def __preserve_context(
467
+ self, cid: str = None, ending: str = "1_", ref_cid: str = ""
468
+ ):
469
+ # print("preserve_context")
470
+ headers = {
471
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203",
472
+ "Accept": "*/*",
473
+ }
474
+ if ref_cid == "":
475
+ headers["Referer"] = "https://huggingface.co/chat"
476
+ else:
477
+ headers["Referer"] = f"https://huggingface.co/chat/conversation/{ref_cid}"
478
+ # print(headers)
479
+ cookie = {
480
+ "hf-chat": self.get_cookies()["hf-chat"],
481
+ }
482
+ if cid is None:
483
+ cid = self.current_conversation
484
+ url = f"https://huggingface.co/chat/conversation/{cid}/__data.json?x-sveltekit-invalidated={ending}"
485
+ # response = requests.get(url, cookies = cookie, headers = headers )
486
+ response = self.session.get(url, cookies=cookie, headers=headers, data={})
487
+ # print(response.text)
488
+ import time
489
+
490
+ # f = open(f"test{str(time.time())}.json", "w", encoding="utf-8")
491
+ # f.write(json.dumps(response.json(), indent=4, ensure_ascii=False))
492
+ # f.close()
493
+
494
+ if response.status_code == 200:
495
+ # print("OK")
496
+ return {"message": "Context Successfully Preserved", "status": 200}
497
+ else:
498
+ return {"message": "Internal Error", "status": 500}
499
+
500
+
501
+ if __name__ == "__main__":
502
+ bot = ChatBot()
503
+ message_content = bot.chat("Hello", max_new_tokens=10)
504
+ print(message_content)
505
+ summary = bot.summarize_conversation()
506
+ print(summary)
507
+ sharelink = bot.share_conversation()
508
+ print(sharelink)
utils/openllmapi/exceptions.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ModelOverloadedError(Exception):
2
+ """
3
+ HF Model Overloaded Error
4
+
5
+ Raised when hf return response `{"error":"Model is overloaded","error_type":"overloaded"}`
6
+ """
7
+ pass
8
+
9
+
10
+ class ChatBotInitError(Exception):
11
+ """
12
+ ChatBot Init Error
13
+
14
+ Raised when chatbot init failed
15
+ """
16
+ pass
17
+
18
+
19
+ class CreateConversationError(Exception):
20
+ """
21
+ Create Conversation Error
22
+
23
+ Raised when create conversation failed
24
+ """
25
+ pass
26
+
27
+
28
+ class InvalidConversationIDError(Exception):
29
+ """
30
+ Invalid Conversation ID Error
31
+
32
+ Raised when using a invalid conversation id
33
+ """
34
+ pass
35
+
36
+
37
+ class DeleteConversationError(Exception):
38
+ """
39
+ Delete Conversation Error
40
+
41
+ Raised when delete conversation failed
42
+ """
43
+ pass
44
+
45
+
46
+ class ChatError(Exception):
47
+ """
48
+ Chat Error
49
+
50
+ Raised when chat failed
51
+ """
52
+ pass
utils/openllmapi/login.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import json
4
+ import logging
5
+ import re
6
+
7
+
8
+ class Login:
9
+ def __init__(self, email: str, passwd: str) -> None:
10
+ # self.COOKIE_DIR = os.path.dirname(os.path.abspath(__file__)) + "/usercookies"
11
+ # self.COOKIE_PATH = self.COOKIE_DIR + f"/{email}.json"
12
+ # if not os.path.exists(self.COOKIE_DIR):
13
+ # logging.debug("Cookie directory not found, creating...")
14
+ # os.makedirs(self.COOKIE_DIR)
15
+ # logging.debug(f"Cookie store path: {self.COOKIE_DIR}")
16
+ self.DEFAULT_PATH_DIR = os.path.dirname(os.path.abspath(__file__)) + "/usercookies"
17
+ self.DEFAULT_COOKIE_PATH = self.DEFAULT_PATH_DIR + f"/{email}.json"
18
+
19
+ self.email: str = email
20
+ self.passwd: str = passwd
21
+ self.headers = {
22
+ "Referer": "https://huggingface.co/",
23
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.64",
24
+ }
25
+ self.cookies = requests.sessions.RequestsCookieJar()
26
+
27
+ def requestsGet(self, url: str, params=None, allow_redirects=True) -> requests.Response:
28
+ res = requests.get(
29
+ url,
30
+ params=params,
31
+ headers=self.headers,
32
+ cookies=self.cookies,
33
+ allow_redirects=allow_redirects,
34
+ )
35
+ self.refreshCookies(res.cookies)
36
+ return res
37
+
38
+ def requestsPost(self, url: str, headers=None, params=None, data=None, stream=False,
39
+ allow_redirects=True) -> requests.Response:
40
+ res = requests.post(
41
+ url,
42
+ stream=stream,
43
+ params=params,
44
+ data=data,
45
+ headers=self.headers if headers == None else headers,
46
+ cookies=self.cookies,
47
+ allow_redirects=allow_redirects
48
+ )
49
+ self.refreshCookies(res.cookies)
50
+ return res
51
+
52
+ def refreshCookies(self, cookies: requests.sessions.RequestsCookieJar):
53
+ dic = cookies.get_dict()
54
+ for i in dic:
55
+ self.cookies.set(i, dic[i])
56
+
57
+ def SigninWithEmail(self):
58
+ """
59
+ Login through your email and password.
60
+ PS: I found that it doesn't have any type of encrytion till now,
61
+ which could expose your password to the internet.
62
+ """
63
+ url = "https://huggingface.co/login"
64
+ data = {
65
+ "username": self.email,
66
+ "password": self.passwd,
67
+ }
68
+ res = self.requestsPost(url=url, data=data, allow_redirects=False)
69
+ if res.status_code == 400:
70
+ raise Exception("wrong username or password")
71
+
72
+ def getAuthURL(self):
73
+ url = "https://huggingface.co/chat/login"
74
+ headers = {
75
+ "Referer": "https://huggingface.co/chat/login",
76
+ "User-Agent": self.headers["User-Agent"],
77
+ "Content-Type": "application/x-www-form-urlencoded"
78
+ }
79
+ res = self.requestsPost(url, headers=headers, allow_redirects=False)
80
+ if res.status_code == 200:
81
+ # location = res.headers.get("Location", None)
82
+ location = res.json()["location"]
83
+ if location:
84
+ return location
85
+ else:
86
+ raise Exception("No authorize url found, please check your email or password.")
87
+ elif res.status_code == 303:
88
+ location = res.headers.get("Location")
89
+ if location:
90
+ return location
91
+ else:
92
+ raise Exception("No authorize url found, please check your email or password.")
93
+ else:
94
+ raise Exception("Something went wrong!")
95
+
96
+ def grantAuth(self, url: str) -> int:
97
+ res = self.requestsGet(url, allow_redirects=False)
98
+ if res.headers.__contains__("location"):
99
+ location = res.headers["location"]
100
+ res = self.requestsGet(location, allow_redirects=False)
101
+ if res.cookies.__contains__("hf-chat"):
102
+ return 1
103
+ # raise Exception("grantAuth fatal")
104
+ if res.status_code != 200:
105
+ raise Exception("grant auth fatal!")
106
+ csrf = re.findall('/oauth/authorize.*?name="csrf" value="(.*?)"', res.text)
107
+ if len(csrf) == 0:
108
+ raise Exception("No csrf found!")
109
+ data = {
110
+ "csrf": csrf[0]
111
+ }
112
+
113
+ res = self.requestsPost(url, data=data, allow_redirects=False)
114
+ if res.status_code != 303:
115
+ raise Exception(f"get hf-chat cookies fatal! - {res.status_code}")
116
+ else:
117
+ location = res.headers.get("Location")
118
+ res = self.requestsGet(location, allow_redirects=False)
119
+ if res.status_code != 302:
120
+ raise Exception(f"get hf-chat cookie fatal! - {res.status_code}")
121
+ else:
122
+ return 1
123
+
124
+ def login(self) -> requests.sessions.RequestsCookieJar:
125
+ self.SigninWithEmail()
126
+ location = self.getAuthURL()
127
+ if self.grantAuth(location):
128
+ return self.cookies
129
+ else:
130
+ raise Exception(f"Grant auth fatal, please check your email or password\ncookies gained: \n{self.cookies}")
131
+
132
+ def saveCookiesToDir(self, cookie_dir_path: str = None) -> str:
133
+ """
134
+ cookies will be saved into: cookie_dir_path/<email>.json
135
+ """
136
+ cookie_dir_path = self.DEFAULT_PATH_DIR if not cookie_dir_path else cookie_dir_path
137
+ if not cookie_dir_path.endswith("/"):
138
+ cookie_dir_path += "/"
139
+ cookie_path = cookie_dir_path + f"{self.email}.json"
140
+ if not os.path.exists(cookie_dir_path):
141
+ logging.info("Cookie directory not exist, creating...")
142
+ os.makedirs(cookie_dir_path)
143
+ logging.info(f"Cookie store path: {cookie_path}")
144
+
145
+ with open(cookie_path, "w", encoding="utf-8") as f:
146
+ f.write(json.dumps(self.cookies.get_dict()))
147
+ return cookie_path
148
+
149
+ def _getCookiePath(self, cookie_dir_path) -> str:
150
+ if not cookie_dir_path.endswith("/"):
151
+ cookie_dir_path += "/"
152
+ if not os.path.exists(cookie_dir_path):
153
+ return ""
154
+ files = os.listdir(cookie_dir_path)
155
+ for i in files:
156
+ if i == f"{self.email}.json":
157
+ return cookie_dir_path + i
158
+ return ""
159
+
160
+ def loadCookiesFromDir(self, cookie_dir_path: str = None) -> requests.sessions.RequestsCookieJar:
161
+ """
162
+ cookie files needs to be named as: cookie_dir_path/<email>.json
163
+ """
164
+ cookie_dir_path = self.DEFAULT_PATH_DIR if not cookie_dir_path else cookie_dir_path
165
+ cookie_path = self._getCookiePath(cookie_dir_path)
166
+ if not cookie_path:
167
+ raise Exception(f"Cookie not found. please check the path given: {cookie_dir_path}.\n" +
168
+ f"Cookie file must be named like this: 'your_email'+'.json': '{self.email}.json'")
169
+
170
+ with open(cookie_path, "r", encoding="utf-8") as f:
171
+ try:
172
+ js = json.loads(f.read())
173
+ for i in js.keys():
174
+ self.cookies.set(i, js[i])
175
+ logging.info(f"{i} loaded")
176
+ return self.cookies
177
+ except:
178
+ raise Exception("load cookies from files fatal. Please check the format")
179
+
180
+
181
+ if __name__ == "__main__":
182
+ EMAIL = os.getenv("EMAIL")
183
+ PASSWD = os.getenv("PASSWD")