import os import base64 import requests import streamlit as st # OpenAI API Key # Access the secret API key # if the app is running locally, you can set the API key as an environment variable api_key = os.getenv("OPENAI_API_KEY") # Make sure this environment variable is set correctly headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } def main(): st.title("Multimodal using GPT-4 Turbo Model") text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering) CCS 229 - Intelligent Systems Department of Computer Science College of Information and Communications Technology West Visayas State University """ with st.expander("About"): st.text(text) st.write("Upload an image and select the image analysis task.") # File upload for image uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"]) if uploaded_image is not None: # Encode the uploaded image to base64 base64_image = base64.b64encode(uploaded_image.getvalue()).decode('utf-8') # Display the uploaded image st.image(uploaded_image, caption="Uploaded Image", use_container_width=True) # List of image analysis tasks analysis_tasks = [ "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.", "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.", "Image Captioning: Generate a concise and accurate caption that describes the content of the image.", "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'", "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.", "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.", "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.", "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.", "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.", "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases.", "Obtaining text data: Extract text from the image, such as names, date, time, etc.", ] # Task selection dropdown selected_task = st.selectbox("Select an image analysis task:", analysis_tasks) # Button to generate response if st.button("Generate Response"): if uploaded_image is None or not selected_task: # Check if a task is selected st.error("Please upload an image and select a task.") else: # Prepare the multimodal prompt payload = { "model": "gpt-4-turbo", "messages": [ { "role": "user", "content": [ { "type": "text", "text": selected_task }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 2048, } with st.spinner("Processing..."): # Generate response response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) # Check for errors in the response if response.status_code != 200: st.error(f"Error: {response.status_code} - {response.text}") else: # Display the response content = response.json() # Show the response content st.success("Response generated!") # Extract the content of the response # Access the content correctly from the response JSON contentstring = content['choices'][0]['message']['content'] st.markdown(f"AI Response: {contentstring}") if __name__ == "__main__": main()