Spaces:

LVKinyanjui
/

FastInferenceChat

Sleeping

FastInferenceChat / st_long_context_basic.py

Renamed the right main app file

e35ef75 about 2 months ago

1.15 kB

	# CREDITS
	# https://gist.github.com/truevis/f31706b8af60e8c73d62b281bddb988f

	import streamlit as st
	from groq import Groq

	import os
	client = Groq(
	api_key=os.environ.get("GROQ_API_KEY"),
	)

	def generate_response(user_input):
	stream = client.chat.completions.create(
	model="llama-3.2-3b-preview", #128K model
	messages=[
	{"role": "system", "content": "You are a helpful assistant"},
	{"role": "user", "content": user_input},
	],
	temperature=0.1,
	# max_tokens=128000,
	top_p=1,
	stream=True,
	stop=None,
	)

	for chunk in stream:
	content = chunk.choices[0].delta.content
	if content:
	yield content # Yield content for streaming

	st.title("Groq API Response Streaming")
	user_input = st.chat_input('Message to Assistant...', key='prompt_input')
	if user_input: # Get user input
	with st.spinner("Generating response..."):
	st.write_stream(generate_response(user_input)) # Use st.write_stream to display streamed content
	st.markdown("Message: " + user_input)
	st.markdown("---") # Add a newline after the