pvanand commited on
Commit
9fa0975
1 Parent(s): 5c4af3f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +4 -3
main.py CHANGED
@@ -70,7 +70,7 @@ def limit_tokens(input_string, token_limit=6000):
70
  def calculate_tokens(msgs):
71
  return sum(len(encoding.encode(str(m))) for m in msgs)
72
 
73
- def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
74
  while calculate_tokens(messages) > (8000 - max_output_tokens):
75
  if len(messages) > max_llm_history:
76
  messages = [messages[0]] + messages[-max_llm_history:]
@@ -80,7 +80,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
80
  raise ValueError("Unable to reduce message length below token limit")
81
 
82
  try:
83
- response = or_client.chat.completions.create(
84
  model=model,
85
  messages=messages,
86
  max_tokens=max_output_tokens,
@@ -88,7 +88,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
88
  )
89
 
90
  full_response = ""
91
- for chunk in response:
92
  if chunk.choices[0].delta.content is not None:
93
  content = chunk.choices[0].delta.content
94
  full_response += content
@@ -100,6 +100,7 @@ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, m
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
 
103
  async def verify_api_key(api_key: str = Security(api_key_header)):
104
  if api_key != API_KEY:
105
  raise HTTPException(status_code=403, detail="Could not validate credentials")
 
70
  def calculate_tokens(msgs):
71
  return sum(len(encoding.encode(str(m))) for m in msgs)
72
 
73
+ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
74
  while calculate_tokens(messages) > (8000 - max_output_tokens):
75
  if len(messages) > max_llm_history:
76
  messages = [messages[0]] + messages[-max_llm_history:]
 
80
  raise ValueError("Unable to reduce message length below token limit")
81
 
82
  try:
83
+ response = await or_client.chat.completions.create(
84
  model=model,
85
  messages=messages,
86
  max_tokens=max_output_tokens,
 
88
  )
89
 
90
  full_response = ""
91
+ async for chunk in response:
92
  if chunk.choices[0].delta.content is not None:
93
  content = chunk.choices[0].delta.content
94
  full_response += content
 
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
103
+
104
  async def verify_api_key(api_key: str = Security(api_key_header)):
105
  if api_key != API_KEY:
106
  raise HTTPException(status_code=403, detail="Could not validate credentials")