koenverhagen commited on
Commit
0b89ddd
1 Parent(s): 79f0df2
Files changed (6) hide show
  1. .gitignore +4 -0
  2. Dockerfile +12 -0
  3. createlookalike.py +160 -0
  4. requirements.txt +38 -0
  5. server.py +17 -0
  6. state.json +1 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv
2
+ *.iml
3
+ *.xml
4
+ .idea
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM tensorflow/tensorflow:2.10.0-gpu
2
+
3
+ WORKDIR /app
4
+
5
+ RUN mkdir -p shops
6
+
7
+ COPY ./requirements.txt /requirements.txt
8
+
9
+ RUN pip install --no-cache-dir --upgrade -r /requirements.txt
10
+
11
+ CMD ["uvicorn", "app.server:app", "--host", "0.0.0.0", "--port", "7860"]
12
+
createlookalike.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile as tfile
2
+ from datetime import datetime
3
+ from urllib.request import urlopen
4
+
5
+ import requests
6
+ from keras.utils import img_to_array
7
+ from lxml import etree
8
+ import keras
9
+
10
+ from keras.applications.imagenet_utils import decode_predictions, preprocess_input
11
+ from keras.models import Model
12
+ from PIL import Image
13
+ from io import BytesIO
14
+
15
+ import numpy as np
16
+
17
+ from sklearn.decomposition import PCA
18
+ from scipy.spatial import distance
19
+ from collections import OrderedDict
20
+ from remove import remove_files
21
+
22
+ from generate_csv_file import generate_csv_files
23
+ from load_data import load_data, get_shops
24
+
25
+
26
+ def get_ids_from_feed(feed_url):
27
+ # create temp xml file
28
+ temp_file = tfile.NamedTemporaryFile(mode="w", suffix=".xml", prefix="feed")
29
+
30
+ f = temp_file.name
31
+
32
+ temp_file.write(urlopen(feed_url).read().decode('utf-8'))
33
+
34
+ # open xml file
35
+ tree = etree.parse(f)
36
+
37
+ temp_file.close()
38
+
39
+ root = tree.getroot()
40
+
41
+ # get image ids and shop base url
42
+ list_ids = []
43
+
44
+ shop_url = root[0][1].text
45
+
46
+ for item in root.findall(".//g:mpn", root.nsmap):
47
+ list_ids.append(item.text)
48
+
49
+ return list_ids, shop_url
50
+
51
+
52
+ def get_image(url):
53
+ res = requests.get(url)
54
+ im = Image.open(BytesIO(res.content)).convert("RGB").resize((224, 224))
55
+ img = img_to_array(im)
56
+ x = img_to_array(img)
57
+ x = np.expand_dims(x, axis=0)
58
+ x = preprocess_input(x)
59
+ return img, x
60
+
61
+
62
+ def load_image(url, img_id):
63
+ print('get image url', id)
64
+ request_url = '{}/flat_thumb/{}/1/224'.format(url, img_id)
65
+ print('get image', request_url)
66
+ img, x = get_image(request_url)
67
+ return img, x
68
+
69
+
70
+ def create_feature_files():
71
+ model = keras.applications.VGG16(weights='imagenet', include_top=True)
72
+ feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output)
73
+ final_json = []
74
+ data = get_shops()
75
+
76
+ if data:
77
+ for p in data:
78
+ final_json.append(calculate_shop(p, feat_extractor))
79
+
80
+ load_data(generate_csv_files(final_json))
81
+ remove_files()
82
+
83
+ return
84
+
85
+
86
+ def calculate_shop(shop, feat_extractor):
87
+ start = datetime.today()
88
+ if shop['id'] not in ['']: # temp
89
+ print(shop['id'], shop['base_url'])
90
+ google_xml_feed_url = '{}/google_xml_feed'.format(shop['base_url'])
91
+ try:
92
+ list_ids, shop_url = get_ids_from_feed(google_xml_feed_url)
93
+ except Exception as e:
94
+ list_ids = []
95
+ print('could not get images from ', shop['id'], e)
96
+ features = []
97
+
98
+ list_of_fitted_designs = []
99
+
100
+ design_json = {}
101
+ if len(list_ids) > 0:
102
+ for l in list_ids:
103
+
104
+ try:
105
+ img, x = load_image(shop_url, l)
106
+ feat = feat_extractor.predict(x)[0]
107
+
108
+ features.append(feat)
109
+ list_of_fitted_designs.append(l)
110
+
111
+ except Exception as e:
112
+ print(l, ' failed loading feature extraction', e)
113
+
114
+ try:
115
+ features = np.array(features)
116
+ # print(features.shape)
117
+ components = len(features) if len(features) < 300 else 300
118
+ pca = PCA(n_components=components) # 300
119
+ pca.fit(features)
120
+ pca_features = pca.transform(features)
121
+ except Exception as e:
122
+ print('pca too small?', e)
123
+
124
+ if len(list_of_fitted_designs) >= 80:
125
+ max_list_per_design = 80
126
+ else:
127
+ max_list_per_design = len(list_of_fitted_designs)
128
+
129
+ try:
130
+ for im in list_of_fitted_designs:
131
+
132
+ query_image_idx = list_of_fitted_designs.index(im)
133
+
134
+ similar_idx = [distance.cosine(pca_features[query_image_idx], feat) for feat in pca_features]
135
+
136
+ filterd_idx = dict()
137
+
138
+ for i in range(len(similar_idx)):
139
+ filterd_idx[i] = {"dist": similar_idx[i], "id": list_of_fitted_designs[i]}
140
+
141
+ sorted_dict = dict(
142
+ OrderedDict(sorted(filterd_idx.items(), key=lambda i: i[1]['dist'])[1:max_list_per_design]))
143
+
144
+ design_list = []
145
+
146
+ for k, v in sorted_dict.items():
147
+ design_list.append(v)
148
+
149
+ design_dict = {im: design_list}
150
+ # idx_closest = sorted(range(len(similar_idx)), key=lambda k: similar_idx[k])
151
+
152
+ design_json.update(design_dict)
153
+ # print(idx_closest)
154
+
155
+ except Exception as e:
156
+ print("could not create json with look-a-like for shop:", shop['id'], e)
157
+
158
+ end = datetime.today()
159
+
160
+ return {'shop_id': shop['id'], 'start_time': start, 'end_time': end, 'designs': design_json}
requirements.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ joblib==1.2.0
2
+ keras==2.10.0
3
+ lxml==4.9.1
4
+ numpy==1.23.4
5
+ scikit-learn==1.1.3
6
+ scipy==1.9.3
7
+
8
+ keras==2.10.0
9
+ Pillow==9.2.0
10
+ pandas==1.5.1
11
+ requests==2.28.1
12
+
13
+ python-multipart==0.0.5
14
+ matplotlib==3.6.1
15
+ scikit-image==0.19.3
16
+ six==1.16.0
17
+ termcolor==2.0.1
18
+ tensorboard==2.12.1
19
+ tensorboard-data-server==0.6.1
20
+ tensorboard-plugin-wit==1.8.1
21
+ tensorflow-estimator==2.12.0
22
+
23
+ threadpoolctl==3.1.0
24
+ tifffile==2022.10.10
25
+ typing_extensions==4.4.0
26
+
27
+
28
+ google-auth==2.13.0
29
+ google-auth-oauthlib==0.4.6
30
+ google-pasta==0.2.0
31
+
32
+
33
+
34
+ fastapi==0.66.0
35
+ python-multipart==0.0.5
36
+ uvicorn==0.14.0
37
+
38
+ tensorflow==2.12.0
server.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Response, Request, Header, Form, UploadFile, Body
2
+ from starlette.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import json
5
+ from load_data import get_design_data, get_design_resolutions_for_shop
6
+
7
+ app = FastAPI()
8
+ app.add_middleware(
9
+ CORSMiddleware,
10
+ allow_origins=["*"], # Allows all origins
11
+ allow_credentials=True,
12
+ allow_methods=["*"], # Allows all methods
13
+ allow_headers=["*"], # Allows all headers
14
+ )
15
+
16
+ # API_TOKEN = os.environ["API_TOKEN"]
17
+ API_TOKEN = '34dsadfF$$%#$TGREGEFGE%Q*)(*&%'
state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"state": 0}