|
import ast |
|
import json |
|
import base64 |
|
import spaces |
|
import requests |
|
import numpy as np |
|
import gradio as gr |
|
from PIL import Image |
|
from io import BytesIO |
|
import face_recognition |
|
from turtle import title |
|
from openai import OpenAI |
|
from collections import Counter |
|
from transformers import pipeline |
|
|
|
|
|
|
|
client = OpenAI() |
|
|
|
pipe = pipeline("zero-shot-image-classification", model="patrickjohncyh/fashion-clip") |
|
|
|
color_file_path = 'color_config.json' |
|
attributes_file_path = 'attributes_config.json' |
|
import os |
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
|
|
with open(color_file_path, 'r') as file: |
|
color_data = json.load(file) |
|
|
|
|
|
with open(attributes_file_path, 'r') as file: |
|
attributes_data = json.load(file) |
|
|
|
COLOURS_DICT = color_data['color_mapping'] |
|
ATTRIBUTES_DICT = attributes_data['attribute_mapping'] |
|
|
|
|
|
def shot(input, category): |
|
subColour,mainColour,score = get_colour(ast.literal_eval(str(input)),category) |
|
common_result = get_predicted_attributes(ast.literal_eval(str(input)),category) |
|
openai_parsed_response = get_openAI_tags(ast.literal_eval(str(input))) |
|
face_embeddings = get_face_embeddings(ast.literal_eval(str(input))) |
|
return { |
|
"colors":{ |
|
"main":mainColour, |
|
"sub":subColour, |
|
"score":round(score*100,2) |
|
}, |
|
"attributes":common_result, |
|
"image_mapping":openai_parsed_response, |
|
"face_embeddings":face_embeddings |
|
} |
|
|
|
|
|
|
|
@spaces.GPU |
|
def get_colour(image_urls, category): |
|
colourLabels = list(COLOURS_DICT.keys()) |
|
for i in range(len(colourLabels)): |
|
colourLabels[i] = colourLabels[i] + " clothing: " + category |
|
|
|
responses = pipe(image_urls, candidate_labels=colourLabels) |
|
|
|
mainColour = responses[0][0]['label'].split(" clothing:")[0] |
|
|
|
|
|
if mainColour not in COLOURS_DICT: |
|
return None, None, None |
|
|
|
|
|
labels = COLOURS_DICT[mainColour] |
|
for i in range(len(labels)): |
|
labels[i] = labels[i] + " clothing: " + category |
|
|
|
|
|
responses = pipe(image_urls, candidate_labels=labels) |
|
subColour = responses[0][0]['label'].split(" clothing:")[0] |
|
|
|
return subColour, mainColour, responses[0][0]['score'] |
|
|
|
@spaces.GPU |
|
def get_predicted_attributes(image_urls, category): |
|
|
|
|
|
|
|
attributes = list(ATTRIBUTES_DICT.get(category,{}).keys()) |
|
|
|
|
|
common_result = [] |
|
for attribute in attributes: |
|
|
|
values = ATTRIBUTES_DICT.get(category,{}).get(attribute,[]) |
|
|
|
if len(values) == 0: |
|
continue |
|
|
|
|
|
attribute = attribute.replace("colartype", "collar").replace("sleevelength", "sleeve length").replace("fabricstyle", "fabric") |
|
values = [f"{attribute}: {value}, clothing: {category}" for value in values] |
|
|
|
|
|
responses = pipe(image_urls, candidate_labels=values) |
|
result = [response[0]['label'].split(", clothing:")[0] for response in responses] |
|
|
|
|
|
if attribute == "details": |
|
result += [response[1]['label'].split(", clothing:")[0] for response in responses] |
|
common_result.append(Counter(result).most_common(2)) |
|
else: |
|
common_result.append(Counter(result).most_common(1)) |
|
|
|
|
|
for i, result in enumerate(common_result): |
|
common_result[i] = ", ".join([f"{x[0]}" for x in result]) |
|
|
|
result = {} |
|
|
|
|
|
for item in common_result: |
|
|
|
key, value = item.split(': ', 1) |
|
|
|
result[key] = value |
|
|
|
return result |
|
|
|
|
|
def get_openAI_tags(image_urls): |
|
|
|
imageList = [] |
|
for image in image_urls: |
|
imageList.append({"type": "image_url", "image_url": {"url": image}}) |
|
|
|
openai_response = client.chat.completions.create( |
|
model="gpt-4o", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": "You're a tagging assistant, you will help label and tag product pictures for my online e-commerce platform. Your tasks will be to return which angle the product images were taken from. You will have to choose from 'full-body', 'half-body', 'side', 'back', or 'zoomed' angles. You should label each of the images with one of these labels depending on which you think fits best (ideally, every label should be used at least once, but only if there are 5 or more images), and should respond with an unformatted dictionary where the key is a string representation of the url index of the url and the value is the assigned label." |
|
} |
|
] |
|
}, |
|
{ |
|
"role": "user", |
|
"content": imageList |
|
}, |
|
], |
|
temperature=1, |
|
max_tokens=500, |
|
top_p=1, |
|
frequency_penalty=0, |
|
presence_penalty=0 |
|
) |
|
response = json.loads(openai_response.choices[0].message.content) |
|
return response |
|
|
|
|
|
@spaces.GPU |
|
def get_face_embeddings(image_urls): |
|
|
|
results = {} |
|
|
|
|
|
for index, url in enumerate(image_urls): |
|
try: |
|
|
|
response = requests.get(url) |
|
|
|
response.raise_for_status() |
|
|
|
|
|
image = face_recognition.load_image_file(BytesIO(response.content)) |
|
|
|
|
|
face_encodings = face_recognition.face_encodings(image) |
|
|
|
|
|
if not face_encodings: |
|
results[index] = [] |
|
else: |
|
|
|
results[index] = face_encodings[0].tolist() |
|
except Exception as e: |
|
|
|
results[index] = f"Error processing image: {str(e)}" |
|
|
|
return results |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=shot, |
|
inputs=[ |
|
gr.Textbox(label="Image URLs (starting with http/https) comma seperated "), |
|
gr.Textbox(label="Category") |
|
], |
|
outputs="text" , |
|
examples=[ |
|
[['https://d2q1sfov6ca7my.cloudfront.net/eyJidWNrZXQiOiAiaGljY3VwLWltYWdlLWhvc3RpbmciLCAia2V5IjogIlc4MDAwMDAwMTM0LU9SL1c4MDAwMDAwMTM0LU9SLTEuanBnIiwgImVkaXRzIjogeyJyZXNpemUiOiB7IndpZHRoIjogODAwLCAiaGVpZ2h0IjogMTIwMC4wLCAiZml0IjogIm91dHNpZGUifX19', |
|
'https://d2q1sfov6ca7my.cloudfront.net/eyJidWNrZXQiOiAiaGljY3VwLWltYWdlLWhvc3RpbmciLCAia2V5IjogIlc4MDAwMDAwMTM0LU9SL1c4MDAwMDAwMTM0LU9SLTIuanBnIiwgImVkaXRzIjogeyJyZXNpemUiOiB7IndpZHRoIjogODAwLCAiaGVpZ2h0IjogMTIwMC4wLCAiZml0IjogIm91dHNpZGUifX19', |
|
'https://d2q1sfov6ca7my.cloudfront.net/eyJidWNrZXQiOiAiaGljY3VwLWltYWdlLWhvc3RpbmciLCAia2V5IjogIlc4MDAwMDAwMTM0LU9SL1c4MDAwMDAwMTM0LU9SLTMuanBnIiwgImVkaXRzIjogeyJyZXNpemUiOiB7IndpZHRoIjogODAwLCAiaGVpZ2h0IjogMTIwMC4wLCAiZml0IjogIm91dHNpZGUifX19'], "women-top-shirt"]], |
|
description="Add an image URL (starting with http/https) or upload a picture, and provide a list of labels separated by commas.", |
|
title="Full product flow" |
|
) |
|
|
|
|
|
iface.launch() |
|
|