Spaces:

umbc-nlp
/

chatgpt-vision

Running

App Files Files Community

dipta007 commited on Feb 25

Commit

8d37eb3

•

1 Parent(s): 70826e6

init

Browse files

Files changed (7) hide show

.streamlit/config.toml +3 -0
.streamlit/secrets.toml +1 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +107 -0
app_.py +103 -0
requirements.txt +2 -0
utils.py +15 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[server]
+enableXsrfProtection = false
+enableCORS = false

.streamlit/secrets.toml ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_KEY="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"

__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (529 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from openai import OpenAI
+import streamlit as st
+from utils import im_2_b64
+RANDOM_SEED = 42
+st.title("ChatGPT with Vision")
+client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "uploader_key" not in st.session_state:
+    st.session_state["uploader_key"] = 0
+def clear_uploader():
+    st.session_state["uploader_key"] += 1
+    st.rerun()
+with st.sidebar:
+    if st.button("Clear chat"):
+        st.session_state.messages = []
+        clear_uploader()
+    st.header("Configuration")
+    st.subheader("Temperature")
+    temperature = st.slider(label="x", min_value=0.1, max_value=1.0, value=0.5, step=0.1, label_visibility='hidden')
+    st.subheader("Max Tokens")
+    max_tokens = st.slider(label="x", min_value=32, max_value=1024, value=256, step=32, label_visibility='hidden')
+    images = st.file_uploader(
+        "Upload image(s)",
+        accept_multiple_files=True,
+        type=["png", "jpg", "jpeg"],
+        key=st.session_state["uploader_key"],
+    )
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        contents = message["content"]
+        for content in contents:
+            if content["type"] == "text":
+                st.markdown(content["text"])
+        number_of_images = sum(1 for c in contents if c["type"] == "image_url")
+        if number_of_images > 0:
+            cols = st.columns(number_of_images)
+            i = 0
+            for content in contents:
+                if content["type"] == "image_url":
+                    with cols[i]:
+                        st.image(content["image_url"]["url"])
+                        i += 1
+def push_message(role, content, images=None):
+    contents = []
+    contents.append({"type": "text", "text": content})
+    if images:
+        for image in images:
+            image_b64 = im_2_b64(image)
+            image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
+            obj = {
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url,
+                },
+            }
+            contents.append(obj)
+    message = {"role": role, "content": contents}
+    st.session_state.messages.append(message)
+    return message
+chat_input_disabled = False
+if prompt := st.chat_input("Type a message", key="chat_input", disabled=chat_input_disabled):
+    push_message("user", prompt, images)
+    with st.chat_message("user"):
+        st.markdown(prompt)
+        if images:
+            cols = st.columns(len(images))
+            for i, image in enumerate(images):
+                with cols[i]:
+                    st.image(image)
+    with st.chat_message("assistant"):
+        messages = [
+            {"role": m["role"], "content": m["content"]}
+            for m in st.session_state.messages
+        ]
+        # print("api call", messages)
+        chat_input_disabled = True
+        stream = client.chat.completions.create(
+            model="gpt-4-vision-preview",
+            messages=messages,
+            stream=True,
+            seed=RANDOM_SEED,
+            temperature=temperature,
+            max_tokens=max_tokens,
+        )
+        response = st.write_stream(stream)
+    push_message("assistant", response)
+    chat_input_disabled = False
+    clear_uploader()

app_.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from openai import OpenAI
+import streamlit as st
+from st_multimodal_chatinput import multimodal_chatinput
+##hack to make sure that chatinput is always at the bottom of the page
+##will only work if multimodal_chatinput is called inside the first st.container of the page
+##############################################################################
+def reconfig_chatinput():
+    st.markdown(
+        """
+    <style>
+        div[data-testid="stVerticalBlock"] div[data-testid="stVerticalBlock"]:first-of-type {
+            position: fixed;
+            bottom: 0;
+            background-color: #0E117;
+            z-index: 1000;
+            /* Other styles as needed */
+        }
+    </style>
+    """,
+        unsafe_allow_html=True,
+    )
+    return
+reconfig_chatinput()
+##############################################################################
+st.title("ChatGPT with Vision")
+client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
+if "openai_model" not in st.session_state:
+    st.session_state["openai_model"] = "gpt-3.5-turbo"
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+chatbox = st.container()
+for message in st.session_state.messages:
+    with chatbox.chat_message(message["role"]):
+        contents = message["content"]
+        for content in contents:
+            if content["type"] == "text":
+                chatbox.markdown(content["text"])
+            elif content["type"] == "image_url":
+                chatbox.image(content["image_url"]["url"])
+def push_message(role, content, images=None):
+    contents = []
+    contents.append({"type": "text", "text": content})
+    if images:
+        for image in images:
+            obj = {
+                "type": "image_url",
+                "image_url": {
+                    "url": image,
+                },
+            }
+            contents.append(obj)
+    print("pushing message", role, contents)
+    message = {"role": role, "content": contents}
+    st.session_state.messages.append(message)
+    return message
+with st.container():
+    multimodal_prompt = multimodal_chatinput()
+    if multimodal_prompt:
+        prompt = multimodal_prompt["text"]
+        push_message("user", prompt)
+        # with st.chat_message("user"):
+        #     st.markdown(prompt)
+        # with st.chat_message("assistant"):
+        messages = [
+            {"role": m["role"], "content": m["content"]}
+            for m in st.session_state.messages
+        ]
+        print("api call", messages)
+        stream = client.chat.completions.create(
+            model="gpt-4-vision-preview",
+            messages=messages,
+            # stream=True,
+            max_tokens=1024,
+        )
+        # response = st.write_stream(stream)
+        # print("api response", stream)
+        stream = stream.choices[0].message.content
+        # response = st.markdown(stream)
+        response = stream
+        push_message("assistant", response)
+# chat_placeholder = st.empty()
+# with chat_placeholder.container():

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ streamlit
2	+ openai

utils.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from PIL import Image
+from io import BytesIO
+import base64
+# Convert Image to Base64
+def im_2_b64(image):
+    image = Image.open(image)
+    image.thumbnail((512, 512), Image.ANTIALIAS)
+    image = image.convert("RGB")
+    buff = BytesIO()
+    image.save(buff, format="JPEG")
+    img_str = base64.b64encode(buff.getvalue())
+    return img_str