ryanj's picture
[fix] remove bad brand urls
d3ae269
raw
history blame
19.1 kB
import subprocess
subprocess.run(['wget', '-N', '-q', 'https://ampl.com/dl/open/ipopt/ipopt-linux64.zip'], stdout=subprocess.DEVNULL)
subprocess.run(['unzip', '-o', '-q', 'ipopt-linux64'], stdout=subprocess.DEVNULL)
from pyomo.environ import *
import pickle
import pickle
dict_files = {
'banana republic': 'annotations/banana_annotations.pickle',
'h&m': 'annotations/hnm_annotations.pickle',
'uniqlo': 'annotations/uniqlo_annotations.pickle',
'street': 'annotations/street_annotations.pickle',
'abercrombie women': 'annotations/abercrombie_women.pickle',
'aritzia': 'annotations/aritzia.pickle',
'dynamite': 'annotations/dynamite.pickle',
}
style_gender = {
'banana republic': 'male',
'h&m': 'male',
'uniqlo': 'male',
'street': 'male',
'abercrombie women': 'female',
'aritzia': 'female',
'dynamite': 'female',
}
gender_article_keys = {
'male': ['shirt', 'pants_or_shorts', 'hoodie_or_jacket', 'hat', 'shoes'],
'female': ['top', 'bottom', 'dress', 'hoodie_or_jacket', 'hat', 'shoes']
}
style_annotations_dict = {}
def get_annotations(dict_file):
with open(dict_file, 'rb') as f:
annotations = pickle.load(f)
for a in annotations:
if not( ( a.get('shirt', '') and a.get('pants_or_shorts', '') ) or
( a.get('top', '') and a.get('bottom', '') ) or
a.get('dress', '')
):
print( dict_file, '\n', a )
filtered_annotations = [ a for a in annotations if ( (a.get('shirt', '') and a.get('pants_or_shorts', '')) or (
a.get('gender', '') == 'female' ) ) ]
return filtered_annotations
style_annotations_dict = {
k: get_annotations(file_path) for k, file_path in dict_files.items()
}
import collections
from collections import defaultdict
optional = [False, False, True, True, False]
article_keys = ['shirt', 'pants_or_shorts', 'hoodie_or_jacket', 'hat', 'shoes']
def annotation_to_tuple(annotation):
t = tuple( annotation[key] for key in article_keys )
return t
def remove_owned_articles(annotations_tuple, clothes_owned):
return tuple( article if article not in clothes_owned else ''
for article in annotations_tuple )
def annotations_value(annotation_tuple_count, clothes_owned):
new_scores = {}
for annotation_tuple, count in annotation_tuple_count.items():
num_missing = 0
for clothing in annotation_tuple:
if not (clothing == '' or clothing in clothes_owned):
num_missing += 1
if num_missing > 0:
new_scores[annotation_tuple] = count/num_missing
return new_scores
def is_outfit_subset(smaller_outfit, larger_outfit):
if smaller_outfit == larger_outfit:
return False
for i in range(len(larger_outfit)): # assume both r tuples of fixed size
if smaller_outfit[i] != '' and smaller_outfit[i] != larger_outfit[i]:
return False
return True
def most_outfits_helper(annotations, capacity=5, clothes_owned=[]):
annotations = [ annotation_to_tuple(e) for e in annotations ]
outfit_count = dict(collections.Counter(annotations))
outfits = list(outfit_count.keys())
for small_outfit in outfit_count:
for larger_outfit in outfit_count:
if is_outfit_subset(small_outfit, larger_outfit):
outfit_count[small_outfit] += outfit_count[larger_outfit]
clothes_owned += ['']
best_outfits = most_outfits(outfit_count, capacity, set(clothes_owned))
best_outfits.remove('')
return best_outfits
# consider every image in the dataset
# each image has an outfit and casts a vote for that outfit
# a outfit is a tuple of pants_or_short + shirt + jacket_or_hoodie?
# images with shoes or hats have an additional vote for the shoes and hats
# greedily pick the clothes with the highest votes
# then remove those clothes from all outfits in that dataset and recount
# can be solved as an integer programming problem
def most_outfits(annotation_tuple_count, capacity, clothes_owned):
if capacity == 0 :
return clothes_owned
# merge counts based on new keys
updated_annotations = defaultdict(int)
for tup, count in annotation_tuple_count.items():
updated_annotations[ remove_owned_articles(tup, clothes_owned) ] += count
annotation_tuple_count = updated_annotations
outfits_scores = annotations_value(annotation_tuple_count, clothes_owned)
outfits_scores = sorted(outfits_scores.items(), key=lambda x:-x[1])
num_new = 1
for outfit, score in outfits_scores:
clothes_proposed = clothes_owned.union(set(outfit))
num_new = len(clothes_proposed) - len(clothes_owned)
if num_new <= capacity:
clothes_owned = clothes_proposed
break
return most_outfits( annotation_tuple_count, capacity-num_new, clothes_owned )
from pyomo.environ import *
from functools import reduce
def cover_style_ip(annotations, capacity=10, clothes_owned=[], mask=None, solver='ipopt'):
"""Use integer program to find the set of clothes that makes as many outfits as possible.
annotations: List[ Dict ], contains maps from clothing categories to string types
capacity: int, number of new clothes to find
clothes_owned: List[ Str ], iterable of strings of clothing types
mask: Optional( List[ Str ] ), optional iterable of categories of clothes to consider.
Uses all clothing types by default.
solver: Str, the nonlinear optimization solver to use for max bool sat
"""
unique_clothes = set()
interested_clothing_types = article_keys if mask is None else mask
for a in annotations:
for key in interested_clothing_types:
unique_clothes.add( a[key] )
if '' in unique_clothes:
unique_clothes.remove('')
model = ConcreteModel()
# 1. Create the variables we want to optimize
clothing_literals_dict = {clothing: Var(within=Binary) for clothing in unique_clothes}
# set literals to true for clothes already owned
for clothing in clothes_owned:
clothing_literals_dict[clothing] = 1
# capacity changes from number of new clothes to total clothes
capacity += len(clothes_owned)
capacity_constraint = Constraint(expr=sum(list(clothing_literals_dict.values())) <= capacity)
outfit_clauses = []
for a in annotations:
# get the clothing literal if it exists, otherwise it's masked and say it's absent
outfit_literals = [ clothing_literals_dict.get(a[key], 0) for key in article_keys if a[key] != '' ]
outfit_clauses += [ reduce(lambda x,y: x*y, outfit_literals) ]
# 3. Maximize the objective function
objective = Objective( expr=sum( outfit_clauses ), sense=maximize)
for name, literal in clothing_literals_dict.items():
setattr(model, name, literal)
model.capacity_constraint = capacity_constraint
model.objective = objective
SolverFactory(solver).solve(model)
basis_clothes = [ name for name, literal in clothing_literals_dict.items() if not isinstance(literal, int) and round(literal()) ]
return basis_clothes
def annotation_str(annotation):
output = ""
output += annotation['hat']
output += ' + ' + annotation['shirt'] if output and annotation['shirt'] else annotation['shirt']
output += ' + ' + annotation['hoodie_or_jacket'] if output and annotation['hoodie_or_jacket'] else annotation['hoodie_or_jacket']
output += ' + ' + annotation['pants_or_shorts'] if output and annotation['pants_or_shorts'] else annotation['pants_or_shorts']
output += ' + ' + annotation['shoes'] if output and annotation['shoes'] else annotation['shoes']
# output += ' ' + annotation['url'] if annotation.get('url') else ''
return output
def annotation_str(annotation):
gender = annotation.get('gender', 'male')
output = ""
for k in gender_article_keys[gender]:
output += ' + ' + annotation[k] if output and annotation.get(k) else annotation.get(k, '')
return output
def annotation_to_key(annotation):
gender = annotation.get('gender', 'male')
output = ""
if gender == 'male':
output += ' + ' + annotation['shirt'] if output and annotation['shirt'] else annotation['shirt']
output += ' + ' + annotation['hoodie_or_jacket'] if output and annotation['hoodie_or_jacket'] else annotation['hoodie_or_jacket']
output += ' + ' + annotation['pants_or_shorts'] if output and annotation['pants_or_shorts'] else annotation['pants_or_shorts']
else:
useful_keys = ['dress', 'top', 'bottom', 'hoodie_or_jacket']
for k in useful_keys:
output += ' + ' + annotation[k] if output and annotation.get(k) else annotation.get(k, '')
return output
def get_num_outfits(annotations, articles):
outfits = set()
for e in annotations:
if (sum([(e[key] == '' or e[key] in articles ) for key in article_keys]) == len(article_keys)
# and e['shirt'] and e['pants_or_shorts']
):
str_form = annotation_to_key(e) # ignore +- hat, shoes otherwise use annotation_str
outfits.add(str_form)
return sorted(list(outfits))
def get_outfit_urls(annotations, outfits):
outfit_urls = defaultdict(list)
for e in annotations:
str_form = annotation_to_key(e)
if e.get('url') and str_form in outfits:
outfit_urls[str_form] += [ e['url'] ]
return dict(outfit_urls)
def cover_style(annotations, capacity=10, clothes_owned=[]):
clothes = list(cover_style_ip(annotations, capacity=capacity, clothes_owned=clothes_owned))
reachable_outfits = get_num_outfits(annotations, set(clothes+clothes_owned) )
if len(clothes_owned) == 0: # return basis outfits from scratch
return clothes, get_outfit_urls(annotations, reachable_outfits)
elif capacity == 0: # return reach of current clothes
return clothes, get_outfit_urls(annotations, reachable_outfits)
else: # capacity > 0 and len(clothes_owned) > 0, show new clothes and outfits
new_clothes = [ c for c in clothes if c not in clothes_owned ]
reachable_outfits_base = get_num_outfits(annotations, clothes_owned)
new_outfits = [ o for o in reachable_outfits if o not in reachable_outfits_base ]
return new_clothes, get_outfit_urls(annotations, new_outfits)
def str_to_list(input):
tokens = [ token.strip() for token in input.split(',') ]
return [ t for t in tokens if t != '' ]
CLOTHES_HEADER = f'## Most Effective Clothes'
OUTFITS_HEADER = f'## Possible πŸ‘•πŸ˜Ž Outfits'
def cover_style_helper(styles, capacity=10, clothes_owned=''):
if len(styles) == 0:
return f'{CLOTHES_HEADER}\nPlease pick at least one style from the left.' , OUTFITS_HEADER
clothes_owned = str_to_list(clothes_owned)
annotations = []
for style in styles:
annotations += style_annotations_dict[style][:500]
if len(styles) == 1: # hack for h&m having wayyyyy more examples than other brands
annotations = style_annotations_dict[style]
clothes, outfit_urls = cover_style(annotations, capacity, clothes_owned)
clothes_str = f'{CLOTHES_HEADER}\n'
for c in clothes:
clothes_str += f'* {c}\n'
outfits_str = f'{OUTFITS_HEADER} ({len(outfit_urls)})\n'
for outfit, urls in outfit_urls.items():
outfits_str += f'1. {str(outfit)}: '
for i, u in enumerate(urls):
outfits_str += f'<a href="{u}" target="_blank">[{i+1}]</a>' # f'[\[{i}\]]({u})'
outfits_str += '\n'
return clothes_str, outfits_str
# article_keys = gender_article_keys['male']
# print(cover_style_helper( ['banana republic'] ))
def cover_style_helper_wrapper(markdown, styles, capacity=10, clothes_owned=''):
if len(styles) > 0:
global article_keys
gender = style_gender[styles[0]]
article_keys = gender_article_keys[gender]
return cover_style_helper(styles, capacity, clothes_owned)
def filter_test(annotation, filter_set):
for f in filter_set:
num_occur = sum([1 for key in article_keys+['caption'] if f in annotation[key]])
if num_occur == 0:
return False
return True
import gradio as gr
def change_gallery(style_choice, start_from=0, text_filter=''):
global article_keys
gender = style_gender[style_choice]
article_keys = gender_article_keys[gender]
chosen_annotations = style_annotations_dict[style_choice]
if text_filter:
text_filter = set([t.strip() for t in text_filter.split(',')])
chosen_annotations = [ a for a in chosen_annotations if filter_test(a, text_filter) ]
start_from = int(start_from/100*len(chosen_annotations))
# print(len(chosen_annotations), [a['url'] for a in chosen_annotations[start_from:start_from+20]])
return [a['url'] for a in chosen_annotations[start_from:start_from+20]]
def update_styles(gender):
global article_keys
article_keys = gender_article_keys[gender]
default_values = ["banana republic"] if gender == "male" else ["aritzia"]
return gr.CheckboxGroup.update(choices=[k for k in style_annotations_dict.keys() if style_gender[k] == gender], value=default_values, label='Styles')
article_keys = gender_article_keys['male']
INTRO_MARKDOWN = """**Good clothes are the ones that you can use to make many outfits.**
Finding stuff that works with your style and wardrobe is hard.
This program helps you find the clothes that make as many fashionable outfits as possible,
given your current wardrobe, style, and budget.
"""
with gr.Blocks() as demo:
with gr.Tabs(selected=0):
with gr.TabItem('Find the Literal Optimal Clothes', id=0):
with gr.Box():
default_clothes = """black dress pants,
blue shirt,
white sweater,
black sweater,
white t shirt,
black t shirt,
black shorts,
white shirt,
grey hoodie,
black t shirt,
gray sweater,
black jacket,
black sneakers,
white sneakers,
blue jeans,
black baseball hat,
black jeans"""
with gr.Row():
with gr.Column():
gr.Markdown(INTRO_MARKDOWN)
with gr.Row():
with gr.Group():
gender = gr.Radio(["male", "female"], value="male", label='gender')
styles = gr.CheckboxGroup([k for k in style_annotations_dict.keys() if style_gender[k] == gender.value], value=["banana republic"], label='Styles')
gender.change(update_styles, inputs=[gender], outputs=[styles])
with gr.Group():
capacity = gr.Number(5, precision=0, label='Number of clothes to recommend')
clothes_owned = gr.Textbox(
label="Clothes Owned",
lines=3,
value=default_clothes, #json.dumps(default_clothes),
)
with gr.Row():
btn = gr.Button("Recommend Clothes")
with gr.Row():
clothes_markdown, outfits_markdown = cover_style_helper(styles.value, capacity.value, clothes_owned.value)
clothes_recommended = gr.Markdown(clothes_markdown)
possible_outfits = gr.Markdown(outfits_markdown)
btn.click(cover_style_helper, inputs=[styles, capacity, clothes_owned], outputs=[clothes_recommended, possible_outfits])
gr.Markdown("## 3 Different Example Uses")
example_label = gr.Textbox('', label='Explanation', visible=False)
gr.Examples(examples=[ ['Find the central clothes that make a style.', ['street'], 10, ''],
['Find new outfits hidden in your wardrobe.', ['h&m'], 0, 'white t shirt, grey t shirt, black pants, black shorts, black t shirt, blue jeans'],
['Find the clothes that best fit your existing wardrobe.', ['banana republic', 'street'], 5, 'white t shirt, grey t shirt, black pants, black shorts, black t shirt, blue jeans']],
inputs=[example_label, styles, capacity, clothes_owned],
outputs=[clothes_recommended, possible_outfits],
fn=cover_style_helper_wrapper,
cache_examples=False)
EXPLORE_MARKDOWN = """# Explore Styles
Hint: Click an image to enable arrow key browsing.
You put in clothes you own to find outfit ideas!
"""
with gr.TabItem('Explore Styles', id=1):
with gr.Box():
gr.Markdown(EXPLORE_MARKDOWN)
with gr.Row():
with gr.Column():
bad_urls = ['dynamite', 'abercrombie women']
styles = gr.Radio([ k for k in style_annotations_dict.keys() if k not in bad_urls ], value="uniqlo", label='Styles')
start_from = gr.Slider(0, 100, label='Start from', value=0, step=10)
with gr.Group():
text_filter = gr.Textbox(value='white t shirt, jeans', label='Outfit includes')
filter_button = gr.Button('Apply Text Filter')
with gr.Column():
outfits_gallery = gr.Gallery(label='Outfit Examples', value=change_gallery(styles.value, start_from.value, text_filter.value))
styles.change(fn=change_gallery, inputs=[styles, start_from, text_filter], outputs=outfits_gallery)
start_from.change(fn=change_gallery, inputs=[styles, start_from, text_filter], outputs=outfits_gallery)
filter_button.click(fn=change_gallery, inputs=[styles, start_from, text_filter], outputs=outfits_gallery)
with gr.TabItem('How it works', id=2):
ABOUT_MARKDOWN = """
# Why
I don't really enjoy shopping, it takes a while a look at all the clothes,
and I'm not sure if I should buy what I picked out.
This program can tell me which clothes are the best for my wardrobe and style,
so I don't have to worry about buying the wrong stuff.
# How
This program poses the problem as a nonlinear integer programming problem.
The problem is to maximize the number of outfits, while contraining the number of clothes to be <= "this max you want to buy".
Let a, b, c etc be binary literals that represent the use of clothing type a, b, c.
These are clothing types like white dress shirt, or black hat.
We maximize the nonlinear expression abc + ade + bec + ... similar to a disjuctive normal form,
where each clause represents an outfit and is the product of clothing literals.
Each outfit clause has value 1 iff every article of clothing is used in the solution.
Pyomo is used to solve this optimization problem.
This objective takes the form of something like,
![objective problem](https://github.com/Ryan-Qiyu-Jiang/writting/blob/master/clothing_optimization_problem.png?raw=true)
Hart, William E., Carl Laird, Jean-Paul Watson, David L. Woodruff, Gabriel A. Hackebeil, Bethany L. Nicholson, and John D. Siirola. Pyomo – Optimization Modeling in Python. Springer, 2017.
# Caveats
This model understands fashion from a macro level.
It understands what a white shirt is, but is entirely blind to micro features like fit, brand, or shape.
It also bins different kinds of clothing and colors together.
So the model could mix up light grey sweatpants and dark grey chinos because they're both grey pants.
There is also error introduced from bad image annotations, approximate solvers, and style dataset size.
Hence it's important to look at the outfits images under possible outfits for a human touch!
"""
with gr.Box():
gr.Markdown(ABOUT_MARKDOWN)
article_keys = gender_article_keys['male']
demo.launch(debug=False)