Commit
•
8504de8
1
Parent(s):
7b770eb
adding benchmarks
Browse files- app.py +173 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
# --------------- 100k-Marqo-Ecommerce-Easy -------------------
|
5 |
+
|
6 |
+
# Embedding Models for 100k-Marqo-Ecommerce-Easy
|
7 |
+
embedding_models = [
|
8 |
+
'<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html">Amazon-Titan-MultiModal</a>',
|
9 |
+
'<a href="https://huggingface.co/jinaai/jina-clip-v1">Jina-V1-CLIP</a>',
|
10 |
+
'<a href="https://cohere.com/blog/introducing-embed-v3">Cohere-embedding-v3</a>',
|
11 |
+
'<a href="https://cloud.google.com/vertex-ai">GCP-Vertex</a>',
|
12 |
+
'<a href="https://huggingface.co/timm/ViT-SO400M-14-SigLIP">ViT-SO400M-14-SigLip</a>',
|
13 |
+
'<a href="https://huggingface.co/timm/ViT-B-16-SigLIP">ViT-B-16-SigLip</a>',
|
14 |
+
'<a href="https://huggingface.co/timm/ViT-L-16-SigLIP-384">ViT-L-16-SigLip</a>',
|
15 |
+
'<a href="https://huggingface.co/Marqo/marqo-ecommerce-embeddings-B">Marqo-Ecommerce-B</a>',
|
16 |
+
'<a href="https://huggingface.co/Marqo/marqo-ecommerce-embeddings-L">Marqo-Ecommerce-L</a>'
|
17 |
+
]
|
18 |
+
|
19 |
+
# GS-Text2Image-1m
|
20 |
+
gs_text2_image_1m_mAP = [0.694, 0.480, 0.358, 0.740, 0.792, 0.701, 0.754, 0.842, 0.879]
|
21 |
+
gs_text2_image_1m_Recall = [0.868, 0.638, 0.515, 0.910, 0.935, 0.87, 0.907, 0.961, 0.971]
|
22 |
+
gs_text2_image_1m_MRR = [0.693, 0.480, 0.358, 0.740, 0.792, 0.701, 0.754, 0.842, 0.879]
|
23 |
+
gs_text2_image_1m_ndcg = [0.733, 0.511, 0.389, 0.779, 0.825, 0.739, 0.789, 0.871, 0.901]
|
24 |
+
|
25 |
+
gs_text2_image_1m_data = {
|
26 |
+
"Embedding Model": embedding_models,
|
27 |
+
"mAP": gs_text2_image_1m_mAP,
|
28 |
+
"R@10": gs_text2_image_1m_Recall,
|
29 |
+
"MRR": gs_text2_image_1m_MRR,
|
30 |
+
"nDCG@10": gs_text2_image_1m_ndcg
|
31 |
+
}
|
32 |
+
gs_text2_image_1m_df = pd.DataFrame(gs_text2_image_1m_data)
|
33 |
+
gs_text2_image_1m_df = pd.DataFrame(gs_text2_image_1m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
34 |
+
|
35 |
+
# GS-Category2Image-1m
|
36 |
+
gs_category2_image_1m_mAP = [0.308, 0.175, 0.136, 0.417, 0.423, 0.347, 0.392, 0.479, 0.515]
|
37 |
+
gs_category2_image_1m_Precision = [0.231, 0.122, 0.110, 0.298, 0.302, 0.252, 0.281, 0.336, 0.358]
|
38 |
+
gs_category2_image_1m_MRR = [0.558, 0.369, 0.315, 0.636, 0.644, 0.594, 0.627, 0.744, 0.764]
|
39 |
+
gs_category2_image_1m_ndcg = [0.377, 0.229, 0.178, 0.481, 0.487, 0.414, 0.458, 0.558, 0.590]
|
40 |
+
|
41 |
+
gs_category2_image_1m_data = {
|
42 |
+
"Embedding Model": embedding_models,
|
43 |
+
"mAP": gs_category2_image_1m_mAP,
|
44 |
+
"P@10": gs_category2_image_1m_Precision,
|
45 |
+
"MRR": gs_category2_image_1m_MRR,
|
46 |
+
"nDCG@10": gs_category2_image_1m_ndcg
|
47 |
+
}
|
48 |
+
gs_category2_image_1m_df = pd.DataFrame(gs_category2_image_1m_data)
|
49 |
+
gs_category2_image_1m_df = pd.DataFrame(gs_category2_image_1m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
50 |
+
|
51 |
+
# AP-Text2Image-3m
|
52 |
+
ap_text2_image_3m_mAP = [0.762, 0.530, 0.433, 0.808, 0.860, 0.797, 0.842, 0.897, 0.928]
|
53 |
+
ap_text2_image_3m_Recall = [0.889, 0.699, 0.597, 0.933, 0.954, 0.917, 0.940, 0.967, 0.978]
|
54 |
+
ap_text2_image_3m_MRR = [0.763, 0.530, 0.433, 0.808, 0.860, 0.797, 0.842, 0.897, 0.928]
|
55 |
+
ap_text2_image_3m_ndcg = [0.791, 0.565, 0.465, 0.837, 0.882, 0.825, 0.865, 0.914, 0.940]
|
56 |
+
|
57 |
+
ap_text2_image_3m_data = {
|
58 |
+
"Embedding Model": embedding_models,
|
59 |
+
"mAP": ap_text2_image_3m_mAP,
|
60 |
+
"R@10": ap_text2_image_3m_Recall,
|
61 |
+
"MRR": ap_text2_image_3m_MRR,
|
62 |
+
"nDCG@10": ap_text2_image_3m_ndcg
|
63 |
+
}
|
64 |
+
ap_text2_image_3m_df = pd.DataFrame(ap_text2_image_3m_data)
|
65 |
+
ap_text2_image_3m_df = pd.DataFrame(ap_text2_image_3m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
66 |
+
|
67 |
+
|
68 |
+
# --------------- Marqo-Ecommerce-Hard -------------------
|
69 |
+
|
70 |
+
# Embedding Models for Marqo-Ecommerce-Hard
|
71 |
+
hard_embedding_models = [
|
72 |
+
'<a href="https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html">Amazon-Titan-MultiModal</a>',
|
73 |
+
'<a href="https://huggingface.co/jinaai/jina-clip-v1">Jina-V1-CLIP</a>',
|
74 |
+
'<a href="https://huggingface.co/timm/ViT-SO400M-14-SigLIP">ViT-SO400M-14-SigLip</a>',
|
75 |
+
'<a href="https://huggingface.co/timm/ViT-B-16-SigLIP">ViT-B-16-SigLip</a>',
|
76 |
+
'<a href="https://huggingface.co/timm/ViT-L-16-SigLIP-384">ViT-L-16-SigLip</a>',
|
77 |
+
'<a href="https://huggingface.co/Marqo/marqo-ecommerce-embeddings-B">Marqo-Ecommerce-B</a>',
|
78 |
+
'<a href="https://huggingface.co/Marqo/marqo-ecommerce-embeddings-L">Marqo-Ecommerce-L</a>'
|
79 |
+
]
|
80 |
+
|
81 |
+
# GS-Text2Image-1m
|
82 |
+
hard_gs_text2_image_1m_mAP = [0.475, 0.285, 0.573, 0.476, 0.540, 0.623, 0.682]
|
83 |
+
hard_gs_text2_image_1m_Recall = [0.648, 0.402, 0.763, 0.660, 0.722, 0.832, 0.878]
|
84 |
+
hard_gs_text2_image_1m_MRR = [0.475, 0.285, 0.574, 0.477, 0.540, 0.624, 0.683]
|
85 |
+
hard_gs_text2_image_1m_ndcg = [0.509, 0.306, 0.613, 0.513, 0.577, 0.668, 0.726]
|
86 |
+
|
87 |
+
hard_gs_text2_image_1m_data = {
|
88 |
+
"Embedding Model": hard_embedding_models,
|
89 |
+
"mAP": hard_gs_text2_image_1m_mAP,
|
90 |
+
"R@10": hard_gs_text2_image_1m_Recall,
|
91 |
+
"MRR": hard_gs_text2_image_1m_MRR,
|
92 |
+
"nDCG@10": hard_gs_text2_image_1m_ndcg
|
93 |
+
}
|
94 |
+
hard_gs_text2_image_1m_df = pd.DataFrame(hard_gs_text2_image_1m_data)
|
95 |
+
hard_gs_text2_image_1m_df = pd.DataFrame(hard_gs_text2_image_1m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
96 |
+
|
97 |
+
# GS-Category2Image-1m
|
98 |
+
hard_gs_category2_image_1m_mAP = [0.246, 0.123, 0.352, 0.277, 0.324, 0.423, 0.463]
|
99 |
+
hard_gs_category2_image_1m_Precision = [0.429, 0.275, 0.516, 0.458, 0.497, 0.629, 0.652]
|
100 |
+
hard_gs_category2_image_1m_MRR = [0.642, 0.504, 0.707, 0.660, 0.687, 0.810, 0.822]
|
101 |
+
hard_gs_category2_image_1m_ndcg = [0.446, 0.294, 0.529, 0.473, 0.509, 0.644, 0.666]
|
102 |
+
|
103 |
+
hard_gs_category2_image_1m_data = {
|
104 |
+
"Embedding Model": hard_embedding_models,
|
105 |
+
"mAP": hard_gs_category2_image_1m_mAP,
|
106 |
+
"P@10": hard_gs_category2_image_1m_Precision,
|
107 |
+
"MRR": hard_gs_category2_image_1m_MRR,
|
108 |
+
"nDCG@10": hard_gs_category2_image_1m_ndcg
|
109 |
+
}
|
110 |
+
hard_gs_category2_image_1m_df = pd.DataFrame(hard_gs_category2_image_1m_data)
|
111 |
+
hard_gs_category2_image_1m_df = pd.DataFrame(hard_gs_category2_image_1m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
112 |
+
|
113 |
+
# AP-Text2Image-3m
|
114 |
+
hard_ap_text2_image_3m_mAP = [0.456, 0.265, 0.560, 0.480, 0.544, 0.592, 0.658]
|
115 |
+
hard_ap_text2_image_3m_Recall = [0.627, 0.378, 0.742, 0.650, 0.715, 0.795, 0.854]
|
116 |
+
hard_ap_text2_image_3m_MRR = [0.457, 0.266, 0.564, 0.484, 0.548, 0.597, 0.663]
|
117 |
+
hard_ap_text2_image_3m_ndcg = [0.491, 0.285, 0.599, 0.515, 0.580, 0.637, 0.703]
|
118 |
+
|
119 |
+
hard_ap_text2_image_3m_data = {
|
120 |
+
"Embedding Model": hard_embedding_models,
|
121 |
+
"mAP": hard_ap_text2_image_3m_mAP,
|
122 |
+
"R@10": hard_ap_text2_image_3m_Recall,
|
123 |
+
"MRR": hard_ap_text2_image_3m_MRR,
|
124 |
+
"nDCG@10": hard_ap_text2_image_3m_ndcg
|
125 |
+
}
|
126 |
+
hard_ap_text2_image_3m_df = pd.DataFrame(hard_ap_text2_image_3m_data)
|
127 |
+
hard_ap_text2_image_3m_df = pd.DataFrame(hard_ap_text2_image_3m_df).sort_values(by="mAP", ascending=False).reset_index(drop=True)
|
128 |
+
|
129 |
+
|
130 |
+
# Gradio interface with sortable Dataframe
|
131 |
+
with gr.Blocks(css="""
|
132 |
+
.gradio-container {
|
133 |
+
display: flex;
|
134 |
+
justify-content: center;
|
135 |
+
align-items: center;
|
136 |
+
min-height: 100vh;
|
137 |
+
flex-direction: column;
|
138 |
+
}
|
139 |
+
""") as demo:
|
140 |
+
gr.Markdown("# Ecommerce Embedding Model Benchmarks")
|
141 |
+
|
142 |
+
gr.Markdown("This Space contains benchmark results conducted as part of the release of our ecommerce embedding models: [**`Marqo-Ecommerce-L`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-L) and [**`Marqo-Ecommerce-B`**](https://huggingface.co/Marqo/marqo-ecommerce-embeddings-B). ")
|
143 |
+
gr.Markdown('The benchmarks are separated into \'Marqo-Ecommerce-Hard\' and \'100k-Marqo-Ecommerce-Easy\'. The "easy" dataset is about 10-30 times smaller, and designed to accommodate rate-limited models, specifically Cohere-Embeddings-v3 and GCP-Vertex. The "hard" dataset represents the true challenge, since it contains four million ecommerce product listings, which pushes these models to their limits in a real-world, ecommerce scenario. ' )
|
144 |
+
gr.Markdown('Within both these scenarios, the models were benchmarked against three different tasks:')
|
145 |
+
gr.Markdown('- **Google Shopping Text-to-Image**')
|
146 |
+
gr.Markdown('- **Google Shopping Category-to-Image**')
|
147 |
+
gr.Markdown('- **Amazon Products Text-to-Image**')
|
148 |
+
gr.Markdown('As part of this launch, we also released two evaluation datasets: [`Marqo/google-shopping-general-eval`](https://huggingface.co/datasets/Marqo/google-shopping-general-eval) and [`Marqo/amazon-products-eval`](https://huggingface.co/datasets/Marqo/amazon-products-eval).')
|
149 |
+
gr.Markdown('For more information on these models, benchmark results, and how you can run these evaluations yourself, visit our [blog post](https://www.marqo.ai/blog).')
|
150 |
+
|
151 |
+
# Hard
|
152 |
+
gr.Markdown('## Marqo-Ecommerce-Hard')
|
153 |
+
gr.Markdown('### Google Shopping Text to Image 1m')
|
154 |
+
gr.Dataframe(value=hard_gs_text2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
155 |
+
|
156 |
+
gr.Markdown('### Google Shopping Category to Image 1m')
|
157 |
+
gr.Dataframe(value=hard_gs_category2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
158 |
+
|
159 |
+
gr.Markdown('### Amazon Products Text to Image 3m')
|
160 |
+
gr.Dataframe(value=hard_ap_text2_image_3m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
161 |
+
|
162 |
+
# Easy
|
163 |
+
gr.Markdown('## 100k-Marqo-Ecommerce-Easy')
|
164 |
+
gr.Markdown('### Google Shopping Text to Image')
|
165 |
+
gr.Dataframe(value=gs_text2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
166 |
+
|
167 |
+
gr.Markdown('### Google Shopping Category to Image')
|
168 |
+
gr.Dataframe(value=gs_category2_image_1m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
169 |
+
|
170 |
+
gr.Markdown('### Amazon Products Text to Image')
|
171 |
+
gr.Dataframe(value=ap_text2_image_3m_df, headers="keys", interactive=True, datatype=["html", "number", "number", "number", "number"])
|
172 |
+
|
173 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pandas
|