Spaces:

LVKinyanjui
/

FastInferenceChat

Sleeping

File size: 1,154 Bytes

e35ef75

# CREDITS
# https://gist.github.com/truevis/f31706b8af60e8c73d62b281bddb988f

import streamlit as st
from groq import Groq

import os
client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

def generate_response(user_input):
    stream = client.chat.completions.create(
        model="llama-3.2-3b-preview", #128K model
        messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": user_input},
        ],
        temperature=0.1,
        # max_tokens=128000,
        top_p=1,
        stream=True,
        stop=None,
    )

    for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            yield content  # Yield content for streaming

st.title("Groq API Response Streaming")
user_input = st.chat_input('Message to Assistant...', key='prompt_input')
if user_input: # Get user input
    with st.spinner("Generating response..."):
        st.write_stream(generate_response(user_input))  # Use st.write_stream to display streamed content
        st.markdown("Message: " + user_input) 
        st.markdown("---")  # Add a newline after the