mixtral-search / components /stream_handler.py
pragneshbarik's picture
removed end token
3fbf93c
raw
history blame
1.3 kB
import time
import streamlit as st
COST_PER_1000_TOKENS_USD = 0.139 / 80
def stream_handler(session_state, chat_stream, prompt, placeholder):
# 1. Uses the chat_stream and streams message on placeholder
# 2. returns full_response for token calculation
start_time = time.time()
full_response = ""
for chunk in chat_stream:
if chunk.token.text in ["</s>", "<|im_end|>"]:
break;
full_response += chunk.token.text
placeholder.markdown(full_response + "β–Œ")
placeholder.markdown(full_response)
end_time = time.time()
elapsed_time = end_time - start_time
total_tokens_processed = len(full_response.split())
tokens_per_second = total_tokens_processed // elapsed_time
len_response = (len(prompt.split()) + len(full_response.split())) * 1.25
col1, col2, col3 = st.columns(3)
with col1:
st.write(f"**{tokens_per_second} tokens/second**")
with col2:
st.write(f"**{int(len_response)} tokens generated**")
with col3:
st.write(
f"**$ {round(len_response * COST_PER_1000_TOKENS_USD / 1000, 5)} cost incurred**"
)
session_state["tps"] = tokens_per_second
session_state["tokens_used"] = len_response + session_state["tokens_used"]
return full_response