pvanand commited on
Commit
f8ac6db
1 Parent(s): bc4a455

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +6 -7
main.py CHANGED
@@ -70,7 +70,7 @@ def limit_tokens(input_string, token_limit=6000):
70
  def calculate_tokens(msgs):
71
  return sum(len(encoding.encode(str(m))) for m in msgs)
72
 
73
- async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
74
  while calculate_tokens(messages) > (8000 - max_output_tokens):
75
  if len(messages) > max_llm_history:
76
  messages = [messages[0]] + messages[-max_llm_history:]
@@ -78,10 +78,10 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
78
  max_llm_history -= 1
79
  if max_llm_history < 2:
80
  error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
81
- raise HTTPException(status_code=400, detail=error_message)
82
 
83
  try:
84
- response = await or_client.chat.completions.create(
85
  model=model,
86
  messages=messages,
87
  max_tokens=max_output_tokens,
@@ -89,7 +89,7 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
89
  )
90
 
91
  full_response = ""
92
- async for chunk in response:
93
  if chunk.choices[0].delta.content is not None:
94
  content = chunk.choices[0].delta.content
95
  full_response += content
@@ -100,7 +100,6 @@ async def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_histor
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
103
-
104
  async def verify_api_key(api_key: str = Security(api_key_header)):
105
  if api_key != API_KEY:
106
  raise HTTPException(status_code=403, detail="Could not validate credentials")
@@ -176,9 +175,9 @@ async def coding_assistant(query: QueryModel, background_tasks: BackgroundTasks,
176
  # Limit tokens in the conversation history
177
  limited_conversation = conversations[query.conversation_id]
178
 
179
- async def process_response():
180
  full_response = ""
181
- async for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
182
  full_response += content
183
  yield content
184
  background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)
 
70
  def calculate_tokens(msgs):
71
  return sum(len(encoding.encode(str(m))) for m in msgs)
72
 
73
+ def chat_with_llama_stream(messages, model="gpt-3.5-turbo", max_llm_history=4, max_output_tokens=2500):
74
  while calculate_tokens(messages) > (8000 - max_output_tokens):
75
  if len(messages) > max_llm_history:
76
  messages = [messages[0]] + messages[-max_llm_history:]
 
78
  max_llm_history -= 1
79
  if max_llm_history < 2:
80
  error_message = "Token limit exceeded. Please shorten your input or start a new conversation."
81
+ raise HTTPException(status_code=400, detail=error_message)
82
 
83
  try:
84
+ response = or_client.chat.completions.create(
85
  model=model,
86
  messages=messages,
87
  max_tokens=max_output_tokens,
 
89
  )
90
 
91
  full_response = ""
92
+ for chunk in response:
93
  if chunk.choices[0].delta.content is not None:
94
  content = chunk.choices[0].delta.content
95
  full_response += content
 
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
102
 
 
103
  async def verify_api_key(api_key: str = Security(api_key_header)):
104
  if api_key != API_KEY:
105
  raise HTTPException(status_code=403, detail="Could not validate credentials")
 
175
  # Limit tokens in the conversation history
176
  limited_conversation = conversations[query.conversation_id]
177
 
178
+ def process_response():
179
  full_response = ""
180
+ for content in chat_with_llama_stream(limited_conversation, model=query.model_id):
181
  full_response += content
182
  yield content
183
  background_tasks.add_task(update_db, query.user_id, query.conversation_id, query.user_query, full_response)