import requests import json import os import streamlit as st from clients import OllamaClient, NvidiaClient, GroqClient st.set_page_config( page_title="QA Inference Streamlit App using Ollama, Nvidia and Groq APIs" ) # Cache the header of the app to prevent re-rendering on each load @st.cache_resource def display_app_header(): """Display the header of the Streamlit app.""" st.title("QA Inference with Ollama & Nvidia & Groq as LLMs providers") st.subheader("ChatBot based on provider's OpenAI-like APIs and clients") # Display the header of the app display_app_header() # UI sidebar ########################################## st.sidebar.subheader("Models") # LLM llm_providers = { "Local Ollama": "ollama", "Cloud Nvidia": "nvidia", "Cloud Groq": "groq", } llm_provider = st.sidebar.radio( "Choose your LLM Provider", llm_providers.keys(), key="llm_provider" ) if llm_provider == "Local Ollama": ollama_list_models = OllamaClient().list_models() if ollama_list_models: ollama_models = [x["name"] for x in ollama_list_models["models"]] ollama_llm = st.sidebar.radio( "Select your Ollama model", ollama_models, key="ollama_llm" ) # retrive with st.session_state["ollama_llm"] else: st.sidebar.error('Ollama is not running') elif llm_provider == "Cloud Nvidia": if nvidia_api_token := st.sidebar.text_input("Enter your Nvidia API Key"): st.sidebar.info("Nvidia authentification ok") nvidia_list_models = NvidiaClient().list_models() # api_key is not needed to list the available models nvidia_models = [x["id"] for x in nvidia_list_models["data"]] nvidia_llm = st.sidebar.radio( "Select your Nvidia LLM", nvidia_models, key="nvidia_llm" ) else: st.sidebar.warning("You must enter your Nvidia API key") elif llm_provider == "Cloud Groq": if groq_api_token := st.sidebar.text_input("Enter your Groq API Key"): st.sidebar.info("Groq authentification ok") groq_list_models = GroqClient(api_key=groq_api_token).list_models() groq_models = [x["id"] for x in groq_list_models["data"]] groq_llm = st.sidebar.radio("Choose your Groq LLM", groq_models, key="groq_llm") else: st.sidebar.warning("You must enter your Groq API key") # LLM parameters st.sidebar.subheader("Parameters") max_tokens = st.sidebar.number_input("Token numbers", value=1024, key="max_tokens") temperature = st.sidebar.slider( "Temperature", min_value=0.0, max_value=1.0, value=0.5, step=0.1, key="temperature" ) top_p = st.sidebar.slider( "Top P", min_value=0.0, max_value=1.0, value=0.7, step=0.1, key="top_p" ) # LLM response function ######################################## def get_llm_response(provider, prompt): options = dict( max_tokens=st.session_state["max_tokens"], top_p=st.session_state["top_p"], temperature=st.session_state["temperature"], ) if provider == "ollama": return OllamaClient( api_key="ollama", model=st.session_state["ollama_llm"], ).api_chat_completion( prompt, **options ) # or .client_chat_completion(prompt,**options) elif provider == "nvidia": return NvidiaClient( api_key=nvidia_api_token, model=st.session_state["nvidia_llm"], ).api_chat_completion( prompt, **options ) # or .client_chat_completion(prompt,**options) elif provider == "groq": return GroqClient( api_key=groq_api_token, model=st.session_state["groq_llm"], ).api_chat_completion( prompt, **options ) # or .client_chat_completion(prompt,**options) # UI main ##################################################### # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # React to user input if prompt := st.chat_input("What is up?"): # Display user message in chat message container with st.chat_message("user"): st.markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) response = f"Echo: {prompt}" # Display assistant response in chat message container with st.chat_message("assistant"): response = get_llm_response(llm_providers[st.session_state["llm_provider"]], prompt) st.markdown(response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response})