Spaces:

Niansuh
/

HFLLMAPI

Running

Husnain commited on Apr 20, 2024

Commit

a44e5b9

unverified ·

1 Parent(s): 97134c0

⚡ [Enhance] Quieter openai auth, use cffi to request hf-chat id, and …

Files changed (1) hide show

networks/openai_streamer.py CHANGED Viewed

@@ -171,18 +171,21 @@ class OpenaiStreamer:
     def check_token_limit(self, messages: list[dict]):
         token_limit = TOKEN_LIMIT_MAP[self.model]
-        token_redundancy = int(
-            token_limit - TOKEN_RESERVED - self.count_tokens(messages)
-        )
         if token_redundancy <= 0:
-            raise ValueError(f"Prompt exceeded token limit: {token_limit}")
         return True
-    def chat_response(self, messages: list[dict]):
         self.check_token_limit(messages)
         requester = OpenaiRequester()
         requester.auth()
-        return requester.chat_completions(messages, verbose=False)
     def chat_return_generator(self, stream_response: requests.Response, verbose=False):
         content_offset = 0
@@ -203,6 +206,7 @@ class OpenaiStreamer:
                 is_finished = True
             else:
                 content_type = "Completions"
                 try:
                     data = json.loads(line, strict=False)
                     message_role = data["message"]["author"]["role"]

     def check_token_limit(self, messages: list[dict]):
         token_limit = TOKEN_LIMIT_MAP[self.model]
+        token_count = self.count_tokens(messages)
+        token_redundancy = int(token_limit - TOKEN_RESERVED - token_count)
         if token_redundancy <= 0:
+            raise ValueError(
+                f"Prompt exceeded token limit: {token_count} > {token_limit}"
+            )
         return True
+    def chat_response(self, messages: list[dict], verbose=False):
         self.check_token_limit(messages)
+        logger.enter_quiet(not verbose)
         requester = OpenaiRequester()
         requester.auth()
+        logger.exit_quiet(not verbose)
+        return requester.chat_completions(messages, verbose=verbose)
     def chat_return_generator(self, stream_response: requests.Response, verbose=False):
         content_offset = 0
                 is_finished = True
             else:
                 content_type = "Completions"
+                delta_content = ""
                 try:
                     data = json.loads(line, strict=False)
                     message_role = data["message"]["author"]["role"]