File size: 5,710 Bytes
d55cd8e
 
 
754c404
 
e1c9862
754c404
46a4b10
d55cd8e
99fcde7
d55cd8e
 
e7eb220
a7709a7
 
 
d55cd8e
754c404
 
 
 
 
99fcde7
e7eb220
15c3c81
754c404
 
d55cd8e
e1c9862
 
 
 
754c404
 
 
 
 
 
 
 
 
 
d55cd8e
754c404
 
 
 
 
 
 
 
 
d55cd8e
754c404
e1c9862
 
99fcde7
754c404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d55cd8e
754c404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d55cd8e
b601e7d
e7eb220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7709a7
3e3a527
55983d8
d94a6a3
a7709a7
424cf41
a7709a7
 
 
 
424cf41
a7709a7
424cf41
b2fddf7
 
 
 
 
a7709a7
424cf41
966c960
3e3a527
 
a7709a7
 
966c960
 
 
d55cd8e
 
b2fddf7
 
1d62955
b2fddf7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# import the relevant packages
import os
import csv
import requests
import time
import base64
import pandas as pd
import numpy as np
import openai
from openai import OpenAI
import gradio as gr
import huggingface_hub
from datasets import load_dataset
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import ast

api_key = os.environ.get("API_TOKEN")
headers = {
    'Authorization': 'Bearer ' + api_key,
    'Content-Type': 'application/json'
}

dataset = load_dataset('csv', data_files='https://huggingface.co/datasets/petcoblue/simulation_data/resolve/main/user_agents.csv')
user_agents = dataset['train'].to_pandas()
user_agents = user_agents.iloc[:,1:]
user_batch = user_agents[:10]

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def create_description(row):
    description = (
        f"Imagine that you are currently {int(row['age'])} years old. You have {int(row['num_pets'])} pets "
        f"and spend an average of ${row['avg_spending']} on Petco purchases. "
        f"Your engagement with Petco marketing has a score of {int(row['engagement_score'])}. "
        f"You have an income level of {int(row['income_level'])} and "
        f"regularly buy items from Petco every {int(row['purchase_regularity'])} months. "
        f"It has been {int(row['time_since_last_purchase'])} days since your last purchase with Petco."
    )
    return description

question = (
    "Here are two images of Petco marketing emails:\n"
    "- Image 0 is shown first.\n"
    "- Image 1 is shown second.\n"
    "Which email are you more likely to click through? Just answer with 0 for the first image or 1 for the second image.\n"
    "Then, provide a list of up to five one-word characteristics of the email you chose that made you want to click through it. Separate each characteristic with a comma.\n\n"
    "Example response:\n"
    "1; Characteristics: Appealing, Sale, Bright, Simple, Exclusive\n"
)

def query_agent(description, question, image0, image1):
    base64_image0 = encode_image(image0)
    base64_image1 = encode_image(image1)

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {"role": "system", "content": description},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": question},
                    {"type": "image", "image_url": f"data:image/jpeg;base64,{base64_image0}"},
                    {"type": "image", "image_url": f"data:image/jpeg;base64,{base64_image1}"}
                ]
            }
        ],
        "max_tokens": 300,
        "logprobs": True,
        "top_logprobs": 1
    }

    for attempt in range(3):
        try:
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            if response.status_code == 200:
                data = response.json()
                preference = data['choices'][0]['message']['content']
                top_logprobs = data['choices'][0]['logprobs']['content'][0]['top_logprobs']
                return preference, top_logprobs
            else:
                print(f"HTTP Error {response.status_code} on attempt {attempt + 1}")
        except requests.exceptions.RequestException as e:
            print(f"Request failed on attempt {attempt + 1}: {e}")
            time.sleep(1)
    else:
        print(f"Failed to analyze {image0} and {image1} after 3 attempts.")
        return None, None

def simulate(image0, image1):
    preferences = []
    reasons = []
    probs = []
    for index, user_agent in user_batch.iterrows():
        description = create_description(user_agent)
        preference, top_logprobs = query_agent(description, question, image0, image1)
        prob = np.round(np.exp(top_logprobs[0]['logprob']) * 100, 2)

        split = preference.split("; Characteristics: ")
        if len(split) == 2:
            choice, reasoning = split[0], split[1]
        else:
            print(preference)
            choice, reasoning = split[0], ""
        
        preferences.append(0 if "0" in choice else 1)    
        reasons.append(reasoning)
        probs.append(prob)
    
    avg_preference = sum(preferences) / len(preferences)
    avg_prob = sum(probs) / len(preferences)

    preference = 0 if avg_preference < 0.5 else 1
    print(preferences, preference)

    all_reasons = ' '.join([word.strip() for item in reasons for word in item.split(',')])
    wordcloud = WordCloud(width = 800, height = 400, background_color ='white').generate(all_reasons)

    plt.figure(figsize=(10,5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

    return preference, plt

# defines who can enter the application with the secrets that are set up
user_db = { 
           os.environ["username"]: os.environ["password"],
          }

subtitle = "Upload two images of emails and see which is generally preferred by Petco customers!"

demo = gr.Interface(fn=simulate, 
                    inputs=[gr.File(label="Upload Email 0", file_types=["image"], file_count="single"), 
                            gr.File(label="Upload Email 1", file_types=["image"], file_count="single")], 
                    outputs=["text",
                            gr.Plot(value=plt)],
                    title="Pairwise Simulation of Petco Email Preference",
                    description=subtitle
                    )

if __name__ == "__main__":
    demo.launch(
        auth=lambda u, p: user_db.get(u) == p,
        auth_message="Welcome! Please enable third-party cookies or you will not be able to login."
        )