import time import streamlit as st COST_PER_1000_TOKENS_USD = 0.139 / 80 def stream_handler(session_state, chat_stream, prompt, placeholder): # 1. Uses the chat_stream and streams message on placeholder # 2. returns full_response for token calculation start_time = time.time() full_response = "" for chunk in chat_stream: if chunk.token.text != "": full_response += chunk.token.text placeholder.markdown(full_response + "▌") placeholder.markdown(full_response) end_time = time.time() elapsed_time = end_time - start_time total_tokens_processed = len(full_response.split()) tokens_per_second = total_tokens_processed // elapsed_time len_response = (len(prompt.split()) + len(full_response.split())) * 1.25 col1, col2, col3 = st.columns(3) with col1: st.write(f"**{tokens_per_second} tokens/second**") with col2: st.write(f"**{int(len_response)} tokens generated**") with col3: st.write( f"**$ {round(len_response * COST_PER_1000_TOKENS_USD / 1000, 5)} cost incurred**" ) session_state["tps"] = tokens_per_second session_state["tokens_used"] = len_response + session_state["tokens_used"] return full_response