Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import os | |
api_token = os.environ.get("api_token") | |
API_URL = "https://api-inference.huggingface.co/models/google/gemma-7b" | |
headers = {"Authorization": f"Bearer {api_token}"} | |
st.title("Google Gemma 7B Chat") | |
st.write("New powerful text generation model by Google AI trained on 7B parameters. Enter your message and get response in few seconds!") | |
mainc = st.container(height=600) | |
cont = mainc.container(height=400) | |
prompt = mainc.chat_input(placeholder="Eg. Why astronaut riding a horse is most popular prompt for testing ai models?") | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
if prompt: | |
if len(prompt) > 8000: | |
errormsg = st.chat_message("Assistant", avatar="⚠") | |
errormsg.markdown(":red[Sorry, prompt can't be longer than 8000 tokens!]") | |
else: | |
output = query({ | |
"inputs": prompt, | |
}) | |
if output != None: | |
user_msg = cont.chat_message("User") | |
user_msg.write(prompt) | |
as_msg = cont.chat_message("Assistant") | |
as_msg.write(output[0]['generated_text']) |