File size: 4,783 Bytes
c4914ed
 
 
 
 
 
 
 
0ff252c
c4914ed
 
 
 
 
 
 
0ff252c
c4914ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ff252c
c4914ed
 
 
 
 
 
 
 
 
 
 
 
0ff252c
 
c4914ed
 
 
 
 
 
 
0ff252c
 
c4914ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ff252c
 
 
 
 
 
 
 
 
 
 
 
 
 
c4914ed
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import base64
import requests
import streamlit as st

# OpenAI API Key
# Access the secret API key
# if the app is running locally, you can set the API key as an environment variable
api_key = os.getenv("OPENAI_API_KEY")  # Make sure this environment variable is set correctly

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
}

def main():
    st.title("Multimodal using GPT-4 Turbo Model")

    text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
    CCS 229 - Intelligent Systems
    Department of Computer Science
    College of Information and Communications Technology
    West Visayas State University
    """
    with st.expander("About"):
        st.text(text)

    st.write("Upload an image and select the image analysis task.")

    # File upload for image
    uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
    if uploaded_image is not None:
        # Encode the uploaded image to base64
        base64_image = base64.b64encode(uploaded_image.getvalue()).decode('utf-8')

        # Display the uploaded image
        st.image(uploaded_image, caption="Uploaded Image", use_container_width=True)

    # List of image analysis tasks
    analysis_tasks = [
        "Scene Analysis: Describe the scene depicted in the image. Identify the objects present, their spatial relationships, and any actions taking place.",
        "Object Detection and Classification: Identify and classify all objects present in the image. Provide detailed descriptions of each object, including its size, shape, color, and texture.",
        "Image Captioning: Generate a concise and accurate caption that describes the content of the image.",
        "Visual Question Answering: Answer specific questions about the image, such as 'What color is the car?' or 'How many people are in the image?'",
        "Image Similarity Search: Given a query image, find similar images from a large dataset based on visual features.",
        "Image Segmentation: Segment the image into different regions corresponding to objects or areas of interest.",
        "Optical Character Recognition (OCR): Extract text from the image, such as printed or handwritten text.",
        "Diagram Understanding: Analyze a diagram (e.g., flowchart, circuit diagram) and extract its structure and meaning.",
        "Art Analysis: Describe the artistic style, subject matter, and emotional impact of an image.",
        "Medical Image Analysis: Analyze medical images (e.g., X-rays, MRIs) to detect abnormalities or diagnose diseases.",
        "Obtaining text data: Extract text from the image, such as names, date, time, etc.",
    ]

    # Task selection dropdown
    selected_task = st.selectbox("Select an image analysis task:", analysis_tasks)

    # Button to generate response
    if st.button("Generate Response"):
        if uploaded_image is None or not selected_task:  # Check if a task is selected
            st.error("Please upload an image and select a task.")
        else:
            # Prepare the multimodal prompt
            payload = {
                "model": "gpt-4-turbo",
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": selected_task
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                }
                            }
                        ]
                    }
                ],
                "max_tokens": 2048,
            }

            with st.spinner("Processing..."):
                # Generate response
                response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
                
                # Check for errors in the response
                if response.status_code != 200:
                    st.error(f"Error: {response.status_code} - {response.text}")
                else:    
                    # Display the response
                    content = response.json()

                    # Show the response content
                    st.success("Response generated!")
                    # Extract the content of the response
                    # Access the content correctly from the response JSON
                    contentstring = content['choices'][0]['message']['content']
                    st.markdown(f"AI Response: {contentstring}")

if __name__ == "__main__":
    main()