FastInferenceChat / st_long_context_basic.py
LVKinyanjui's picture
Renamed the right main app file
e35ef75
# CREDITS
# https://gist.github.com/truevis/f31706b8af60e8c73d62b281bddb988f
import streamlit as st
from groq import Groq
import os
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
def generate_response(user_input):
stream = client.chat.completions.create(
model="llama-3.2-3b-preview", #128K model
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": user_input},
],
temperature=0.1,
# max_tokens=128000,
top_p=1,
stream=True,
stop=None,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
yield content # Yield content for streaming
st.title("Groq API Response Streaming")
user_input = st.chat_input('Message to Assistant...', key='prompt_input')
if user_input: # Get user input
with st.spinner("Generating response..."):
st.write_stream(generate_response(user_input)) # Use st.write_stream to display streamed content
st.markdown("Message: " + user_input)
st.markdown("---") # Add a newline after the