import streamlit as st import requests import base64 from PIL import Image import io class ChatBot: def __init__(self, image_base64, api_key): self.image_base64 = image_base64 self.api_key = api_key self.contents = [ { "parts": [ {"text": "You are a helpful assistant."}, {"inline_data": {"mime_type": "image/jpeg", "data": image_base64}}, ] } ] def generate_response(self, prompt: str) -> str: self.contents.append( { "parts": [ {"text": prompt}, {"inline_data": {"mime_type": "image/jpeg", "data": self.image_base64}}, ] } ) headers = { "Content-Type": "application/json", } data = {"contents": self.contents} response = requests.post( f"https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent?key={self.api_key}", headers=headers, json=data, ) response_json = response.json() response_text = response_json.get("contents", [{}])[0].get("parts", [{}])[0].get("text", "") return response_text def get_history(self) -> list: return self.contents # Streamlit app st.title("Image Chatbot with Google's Gemini API") st.sidebar.title("Upload Image or Take a Picture") uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.sidebar.image(image, caption='Uploaded Image.', use_column_width=True) buffered = io.BytesIO() image.save(buffered, format="JPEG") image_base64 = base64.b64encode(buffered.getvalue()).decode() api_key = st.sidebar.text_input("Enter your API key", type="password") if api_key: chatbot = ChatBot(image_base64, api_key) st.header("Chat with the Bot") if 'conversation' not in st.session_state: st.session_state.conversation = [] user_input = st.text_input("Ask a question about the image:") if user_input: response = chatbot.generate_response(user_input) st.session_state.conversation.append({"user": user_input, "bot": response}) if st.session_state.conversation: for chat in st.session_state.conversation: st.write(f"**You:** {chat['user']}") st.write(f"**Bot:** {chat['bot']}") else: st.sidebar.text("Please upload an image to start the conversation.")