Spaces:
Sleeping
Sleeping
import os | |
import base64 | |
import requests | |
import streamlit as st | |
# OpenAI API Key | |
# Access the secret API key | |
# if the app is running locally, you can set the API key as an environment variable | |
api_key = os.getenv("OPENAI_API_KEY") # Make sure this environment variable is set correctly | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
def main(): | |
st.title("Multimodal using GPT-4 Turbo Model") | |
text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering) | |
CCS 229 - Intelligent Systems | |
Department of Computer Science | |
College of Information and Communications Technology | |
West Visayas State University | |
""" | |
with st.expander("About"): | |
st.text(text) | |
st.write("Upload an image and select the image analysis task.") | |
# File upload for image | |
uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"]) | |
if uploaded_image is not None: | |
# Encode the uploaded image to base64 | |
base64_image = base64.b64encode(uploaded_image.getvalue()).decode('utf-8') | |
# Display the uploaded image | |
st.image(uploaded_image, caption="Uploaded Image", use_container_width=True) | |
# List of image analysis tasks | |
analysis_tasks = [ | |
"Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.", | |
"Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.", | |
"Image Captioning: Generate a concise and accurate caption that describes the content of the image.", | |
"Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'", | |
"Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.", | |
"Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.", | |
"Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.", | |
"Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.", | |
"Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.", | |
"Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases.", | |
"Obtaining text data: Extract text from the image, such as names, date, time, etc.", | |
] | |
# Task selection dropdown | |
selected_task = st.selectbox("Select an image analysis task:", analysis_tasks) | |
# Button to generate response | |
if st.button("Generate Response"): | |
if uploaded_image is None or not selected_task: # Check if a task is selected | |
st.error("Please upload an image and select a task.") | |
else: | |
# Prepare the multimodal prompt | |
payload = { | |
"model": "gpt-4-turbo", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": selected_task | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": 2048, | |
} | |
with st.spinner("Processing..."): | |
# Generate response | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
# Check for errors in the response | |
if response.status_code != 200: | |
st.error(f"Error: {response.status_code} - {response.text}") | |
else: | |
# Display the response | |
content = response.json() | |
# Show the response content | |
st.success("Response generated!") | |
# Extract the content of the response | |
# Access the content correctly from the response JSON | |
contentstring = content['choices'][0]['message']['content'] | |
st.markdown(f"AI Response: {contentstring}") | |
if __name__ == "__main__": | |
main() |