dipta007 commited on
Commit
8d37eb3
1 Parent(s): 70826e6
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [server]
2
+ enableXsrfProtection = false
3
+ enableCORS = false
.streamlit/secrets.toml ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_KEY="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"
__pycache__/utils.cpython-310.pyc ADDED
Binary file (529 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import streamlit as st
3
+ from utils import im_2_b64
4
+
5
+ RANDOM_SEED = 42
6
+
7
+
8
+ st.title("ChatGPT with Vision")
9
+
10
+ client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
11
+
12
+ if "messages" not in st.session_state:
13
+ st.session_state.messages = []
14
+ if "uploader_key" not in st.session_state:
15
+ st.session_state["uploader_key"] = 0
16
+
17
+ def clear_uploader():
18
+ st.session_state["uploader_key"] += 1
19
+ st.rerun()
20
+
21
+ with st.sidebar:
22
+ if st.button("Clear chat"):
23
+ st.session_state.messages = []
24
+ clear_uploader()
25
+
26
+ st.header("Configuration")
27
+ st.subheader("Temperature")
28
+ temperature = st.slider(label="x", min_value=0.1, max_value=1.0, value=0.5, step=0.1, label_visibility='hidden')
29
+ st.subheader("Max Tokens")
30
+ max_tokens = st.slider(label="x", min_value=32, max_value=1024, value=256, step=32, label_visibility='hidden')
31
+
32
+ images = st.file_uploader(
33
+ "Upload image(s)",
34
+ accept_multiple_files=True,
35
+ type=["png", "jpg", "jpeg"],
36
+ key=st.session_state["uploader_key"],
37
+ )
38
+
39
+
40
+ for message in st.session_state.messages:
41
+ with st.chat_message(message["role"]):
42
+ contents = message["content"]
43
+
44
+ for content in contents:
45
+ if content["type"] == "text":
46
+ st.markdown(content["text"])
47
+
48
+ number_of_images = sum(1 for c in contents if c["type"] == "image_url")
49
+ if number_of_images > 0:
50
+ cols = st.columns(number_of_images)
51
+ i = 0
52
+ for content in contents:
53
+ if content["type"] == "image_url":
54
+ with cols[i]:
55
+ st.image(content["image_url"]["url"])
56
+ i += 1
57
+
58
+
59
+ def push_message(role, content, images=None):
60
+ contents = []
61
+ contents.append({"type": "text", "text": content})
62
+ if images:
63
+ for image in images:
64
+ image_b64 = im_2_b64(image)
65
+ image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
66
+ obj = {
67
+ "type": "image_url",
68
+ "image_url": {
69
+ "url": image_url,
70
+ },
71
+ }
72
+ contents.append(obj)
73
+
74
+ message = {"role": role, "content": contents}
75
+ st.session_state.messages.append(message)
76
+ return message
77
+
78
+ chat_input_disabled = False
79
+ if prompt := st.chat_input("Type a message", key="chat_input", disabled=chat_input_disabled):
80
+ push_message("user", prompt, images)
81
+ with st.chat_message("user"):
82
+ st.markdown(prompt)
83
+ if images:
84
+ cols = st.columns(len(images))
85
+ for i, image in enumerate(images):
86
+ with cols[i]:
87
+ st.image(image)
88
+
89
+ with st.chat_message("assistant"):
90
+ messages = [
91
+ {"role": m["role"], "content": m["content"]}
92
+ for m in st.session_state.messages
93
+ ]
94
+ # print("api call", messages)
95
+ chat_input_disabled = True
96
+ stream = client.chat.completions.create(
97
+ model="gpt-4-vision-preview",
98
+ messages=messages,
99
+ stream=True,
100
+ seed=RANDOM_SEED,
101
+ temperature=temperature,
102
+ max_tokens=max_tokens,
103
+ )
104
+ response = st.write_stream(stream)
105
+ push_message("assistant", response)
106
+ chat_input_disabled = False
107
+ clear_uploader()
app_.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import streamlit as st
3
+ from st_multimodal_chatinput import multimodal_chatinput
4
+
5
+ ##hack to make sure that chatinput is always at the bottom of the page
6
+ ##will only work if multimodal_chatinput is called inside the first st.container of the page
7
+
8
+ ##############################################################################
9
+ def reconfig_chatinput():
10
+ st.markdown(
11
+ """
12
+ <style>
13
+ div[data-testid="stVerticalBlock"] div[data-testid="stVerticalBlock"]:first-of-type {
14
+ position: fixed;
15
+ bottom: 0;
16
+ background-color: #0E117;
17
+ z-index: 1000;
18
+ /* Other styles as needed */
19
+ }
20
+ </style>
21
+ """,
22
+ unsafe_allow_html=True,
23
+ )
24
+ return
25
+
26
+ reconfig_chatinput()
27
+ ##############################################################################
28
+
29
+
30
+ st.title("ChatGPT with Vision")
31
+
32
+ client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
33
+
34
+ if "openai_model" not in st.session_state:
35
+ st.session_state["openai_model"] = "gpt-3.5-turbo"
36
+
37
+ if "messages" not in st.session_state:
38
+ st.session_state.messages = []
39
+
40
+
41
+ chatbox = st.container()
42
+
43
+ for message in st.session_state.messages:
44
+ with chatbox.chat_message(message["role"]):
45
+ contents = message["content"]
46
+ for content in contents:
47
+ if content["type"] == "text":
48
+ chatbox.markdown(content["text"])
49
+ elif content["type"] == "image_url":
50
+ chatbox.image(content["image_url"]["url"])
51
+
52
+
53
+ def push_message(role, content, images=None):
54
+ contents = []
55
+ contents.append({"type": "text", "text": content})
56
+ if images:
57
+ for image in images:
58
+ obj = {
59
+ "type": "image_url",
60
+ "image_url": {
61
+ "url": image,
62
+ },
63
+ }
64
+ contents.append(obj)
65
+
66
+ print("pushing message", role, contents)
67
+ message = {"role": role, "content": contents}
68
+ st.session_state.messages.append(message)
69
+ return message
70
+
71
+
72
+ with st.container():
73
+ multimodal_prompt = multimodal_chatinput()
74
+ if multimodal_prompt:
75
+ prompt = multimodal_prompt["text"]
76
+ push_message("user", prompt)
77
+ # with st.chat_message("user"):
78
+ # st.markdown(prompt)
79
+
80
+ # with st.chat_message("assistant"):
81
+ messages = [
82
+ {"role": m["role"], "content": m["content"]}
83
+ for m in st.session_state.messages
84
+ ]
85
+ print("api call", messages)
86
+ stream = client.chat.completions.create(
87
+ model="gpt-4-vision-preview",
88
+ messages=messages,
89
+ # stream=True,
90
+ max_tokens=1024,
91
+ )
92
+ # response = st.write_stream(stream)
93
+ # print("api response", stream)
94
+ stream = stream.choices[0].message.content
95
+ # response = st.markdown(stream)
96
+ response = stream
97
+ push_message("assistant", response)
98
+
99
+
100
+ # chat_placeholder = st.empty()
101
+
102
+ # with chat_placeholder.container():
103
+
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ openai
utils.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from io import BytesIO
3
+ import base64
4
+
5
+
6
+ # Convert Image to Base64
7
+ def im_2_b64(image):
8
+ image = Image.open(image)
9
+ image.thumbnail((512, 512), Image.ANTIALIAS)
10
+ image = image.convert("RGB")
11
+ buff = BytesIO()
12
+ image.save(buff, format="JPEG")
13
+ img_str = base64.b64encode(buff.getvalue())
14
+ return img_str
15
+