import streamlit as st import requests import os api_token = os.environ.get("api_token") API_URL = "https://api-inference.huggingface.co/models/google/gemma-7b" headers = {"Authorization": f"Bearer {api_token}"} st.title("Google Gemma 7B Chat") st.write("New powerful text generation model by Google AI trained on 7B parameters. Enter your message and get response in few seconds!") mainc = st.container(height=600) cont = mainc.container(height=400) prompt = mainc.chat_input(placeholder="Eg. Why astronaut riding a horse is most popular prompt for testing ai models?") def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() if prompt: if len(prompt) > 8000: errormsg = st.chat_message("Assistant", avatar="⚠") errormsg.markdown(":red[Sorry, prompt can't be longer than 8000 tokens!]") else: output = query({ "inputs": prompt, }) if output != None: user_msg = cont.chat_message("User") user_msg.write(prompt) as_msg = cont.chat_message("Assistant") as_msg.write(output[0]['generated_text'])