File size: 1,299 Bytes
e5b3236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import time
import streamlit as st

COST_PER_1000_TOKENS_USD = 0.139 / 80


def stream_handler(session_state, chat_stream, prompt, placeholder):
    # 1. Uses the chat_stream and streams message on placeholder
    # 2. returns full_response for token calculation
    start_time = time.time()
    full_response = ""

    for chunk in chat_stream:
        if chunk.token.text in ["</s>", "<|im_end|>"]:
            break;
        full_response += chunk.token.text
        placeholder.markdown(full_response + "▌")
    placeholder.markdown(full_response)

    end_time = time.time()
    elapsed_time = end_time - start_time
    total_tokens_processed = len(full_response.split())
    tokens_per_second = total_tokens_processed // elapsed_time
    len_response = (len(prompt.split()) + len(full_response.split())) * 1.25
    col1, col2, col3 = st.columns(3)

    with col1:
        st.write(f"**{tokens_per_second} tokens/second**")

    with col2:
        st.write(f"**{int(len_response)} tokens generated**")

    with col3:
        st.write(
            f"**$ {round(len_response * COST_PER_1000_TOKENS_USD  / 1000, 5)} cost incurred**"
        )

    session_state["tps"] = tokens_per_second
    session_state["tokens_used"] = len_response + session_state["tokens_used"]

    return full_response