chatgpt-vision / app_.py
dipta007's picture
init
8d37eb3
from openai import OpenAI
import streamlit as st
from st_multimodal_chatinput import multimodal_chatinput
##hack to make sure that chatinput is always at the bottom of the page
##will only work if multimodal_chatinput is called inside the first st.container of the page
##############################################################################
def reconfig_chatinput():
st.markdown(
"""
<style>
div[data-testid="stVerticalBlock"] div[data-testid="stVerticalBlock"]:first-of-type {
position: fixed;
bottom: 0;
background-color: #0E117;
z-index: 1000;
/* Other styles as needed */
}
</style>
""",
unsafe_allow_html=True,
)
return
reconfig_chatinput()
##############################################################################
st.title("ChatGPT with Vision")
client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
if "openai_model" not in st.session_state:
st.session_state["openai_model"] = "gpt-3.5-turbo"
if "messages" not in st.session_state:
st.session_state.messages = []
chatbox = st.container()
for message in st.session_state.messages:
with chatbox.chat_message(message["role"]):
contents = message["content"]
for content in contents:
if content["type"] == "text":
chatbox.markdown(content["text"])
elif content["type"] == "image_url":
chatbox.image(content["image_url"]["url"])
def push_message(role, content, images=None):
contents = []
contents.append({"type": "text", "text": content})
if images:
for image in images:
obj = {
"type": "image_url",
"image_url": {
"url": image,
},
}
contents.append(obj)
print("pushing message", role, contents)
message = {"role": role, "content": contents}
st.session_state.messages.append(message)
return message
with st.container():
multimodal_prompt = multimodal_chatinput()
if multimodal_prompt:
prompt = multimodal_prompt["text"]
push_message("user", prompt)
# with st.chat_message("user"):
# st.markdown(prompt)
# with st.chat_message("assistant"):
messages = [
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
]
print("api call", messages)
stream = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=messages,
# stream=True,
max_tokens=1024,
)
# response = st.write_stream(stream)
# print("api response", stream)
stream = stream.choices[0].message.content
# response = st.markdown(stream)
response = stream
push_message("assistant", response)
# chat_placeholder = st.empty()
# with chat_placeholder.container():