pvanand commited on
Commit
bc4a455
1 Parent(s): 9fa0975

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +2 -8
main.py CHANGED
@@ -77,7 +77,8 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
77
  else:
78
  max_llm_history -= 1
79
  if max_llm_history < 2:
80
- raise ValueError("Unable to reduce message length below token limit")
 
81
 
82
  try:
83
  response = await or_client.chat.completions.create(
@@ -96,7 +97,6 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
96
 
97
  # After streaming, add the full response to the conversation history
98
  messages.append({"role": "assistant", "content": full_response})
99
- return full_response
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
@@ -175,12 +175,6 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
175
 
176
  # Limit tokens in the conversation history
177
  limited_conversation = conversations[query.conversation_id]
178
- while calculate_tokens(limited_conversation) > 8000:
179
- if len(limited_conversation) > 2: # Keep at least the system message and the latest user message
180
- limited_conversation.pop(1)
181
- else:
182
- error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
183
- raise HTTPException(status_code=400, detail=error_message)
184
 
185
  async def process_response():
186
  full_response = ""
 
77
  else:
78
  max_llm_history -= 1
79
  if max_llm_history < 2:
80
+ error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
81
+ raise HTTPException(status_code=400, detail=error_message)
82
 
83
  try:
84
  response = await or_client.chat.completions.create(
 
97
 
98
  # After streaming, add the full response to the conversation history
99
  messages.append({"role": "assistant", "content": full_response})
 
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
 
175
 
176
  # Limit tokens in the conversation history
177
  limited_conversation = conversations[query.conversation_id]
 
 
 
 
 
 
178
 
179
  async def process_response():
180
  full_response = ""