Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -70,7 +70,7 @@ def limit_tokens(input_string, token_limit=6000):
|
|
70 |
def calculate_tokens(msgs):
|
71 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
72 |
|
73 |
-
def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
|
74 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
75 |
if len(messages) > max_llm_history:
|
76 |
messages = [messages[0]] + messages[-max_llm_history:]
|
@@ -80,7 +80,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
|
|
80 |
raise ValueError("Unable to reduce message length below token limit")
|
81 |
|
82 |
try:
|
83 |
-
response = or_client.chat.completions.create(
|
84 |
model=model,
|
85 |
messages=messages,
|
86 |
max_tokens=max_output_tokens,
|
@@ -88,7 +88,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
|
|
88 |
)
|
89 |
|
90 |
full_response = ""
|
91 |
-
for chunk in response:
|
92 |
if chunk.choices[0].delta.content is not None:
|
93 |
content = chunk.choices[0].delta.content
|
94 |
full_response += content
|
@@ -100,6 +100,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
|
|
100 |
except Exception as e:
|
101 |
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
|
102 |
|
|
|
103 |
async def verify_api_key(api_key: str = Security(api_key_header)):
|
104 |
if api_key != API_KEY:
|
105 |
raise HTTPException(status_code=403, detail="Could not validate credentials")
|
|
|
70 |
def calculate_tokens(msgs):
|
71 |
return sum(len(encoding.encode(str(m))) for m in msgs)
|
72 |
|
73 |
+
async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
|
74 |
while calculate_tokens(messages) > (8000 - max_output_tokens):
|
75 |
if len(messages) > max_llm_history:
|
76 |
messages = [messages[0]] + messages[-max_llm_history:]
|
|
|
80 |
raise ValueError("Unable to reduce message length below token limit")
|
81 |
|
82 |
try:
|
83 |
+
response = await or_client.chat.completions.create(
|
84 |
model=model,
|
85 |
messages=messages,
|
86 |
max_tokens=max_output_tokens,
|
|
|
88 |
)
|
89 |
|
90 |
full_response = ""
|
91 |
+
async for chunk in response:
|
92 |
if chunk.choices[0].delta.content is not None:
|
93 |
content = chunk.choices[0].delta.content
|
94 |
full_response += content
|
|
|
100 |
except Exception as e:
|
101 |
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
|
102 |
|
103 |
+
|
104 |
async def verify_api_key(api_key: str = Security(api_key_header)):
|
105 |
if api_key != API_KEY:
|
106 |
raise HTTPException(status_code=403, detail="Could not validate credentials")
|