from openai import OpenAI import streamlit as st import os api_key = os.getenv("NVIDIANIM_API_KEY") client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key=api_key, ) model_name = "meta/llama-3.1-405b-instruct" def get_llama_response(question): completion = client.chat.completions.create( model=model_name, messages=[{"role": "user", "content": question}], temperature=0.2, top_p=0.7, max_tokens=1024, stream=True ) response = "" for chunk in completion: if chunk.choices[0].delta.content is not None: response += chunk.choices[0].delta.content return response.strip() st.title("Ask Llama 3.1 405B on Nvidia NIM") user_question = st.text_input("Enter your question:") if st.button("Submit"): if user_question: llama_response = get_llama_response(user_question) st.write("**Llama 3.1 405B Response:**") st.write(llama_response) else: st.warning("Please enter a question.")