Update llm/oai.py
Browse files- llm/oai.py +24 -1
llm/oai.py
CHANGED
|
@@ -129,7 +129,30 @@ class TextChatAtOAI(BaseFnCallModel):
|
|
| 129 |
messages = self.convert_messages_to_dicts(messages)
|
| 130 |
|
| 131 |
try:
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if delta_stream:
|
| 134 |
for chunk in response:
|
| 135 |
if chunk.choices:
|
|
|
|
| 129 |
messages = self.convert_messages_to_dicts(messages)
|
| 130 |
|
| 131 |
try:
|
| 132 |
+
MAX_RETRIES = 5
|
| 133 |
+
INITIAL_DELAY = 2
|
| 134 |
+
response = None
|
| 135 |
+
|
| 136 |
+
for attempt in range(MAX_RETRIES):
|
| 137 |
+
try:
|
| 138 |
+
response = self._chat_complete_create(model=self.model, messages=messages, stream=True, **generate_cfg)
|
| 139 |
+
break
|
| 140 |
+
|
| 141 |
+
except RateLimitError as ex:
|
| 142 |
+
if attempt == MAX_RETRIES - 1:
|
| 143 |
+
logger.error(f"API rate limit error after {MAX_RETRIES} retries. Raising exception.")
|
| 144 |
+
raise ModelServiceError(exception=ex) from ex
|
| 145 |
+
|
| 146 |
+
delay = INITIAL_DELAY * (2 ** attempt) + random.uniform(0, 1)
|
| 147 |
+
logger.warning(
|
| 148 |
+
f"Rate limit exceeded. Retrying in {delay:.2f} seconds... (Attempt {attempt + 1}/{MAX_RETRIES})"
|
| 149 |
+
)
|
| 150 |
+
time.sleep(delay)
|
| 151 |
+
|
| 152 |
+
except OpenAIError as ex:
|
| 153 |
+
logger.error(f"An OpenAI error occurred: {ex}")
|
| 154 |
+
raise ModelServiceError(exception=ex) from ex
|
| 155 |
+
# response = self._chat_complete_create(model=self.model, messages=messages, stream=True, **generate_cfg)
|
| 156 |
if delta_stream:
|
| 157 |
for chunk in response:
|
| 158 |
if chunk.choices:
|