JoJosmin commited on
Commit
6373138
โ€ข
1 Parent(s): 1576524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -15
app.py CHANGED
@@ -130,23 +130,38 @@ def segment_clothing(img, clothes=["Hat", "Upper-clothes", "Skirt", "Pants", "Dr
130
  #
131
  # return structured_results
132
 
133
- def find_similar_images(query_embedding, collection, top_k=5):
134
- # ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ๊ณผ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๊ฐ€์ ธ์˜ค๊ธฐ
135
- all_data = collection.get(include=['embeddings', 'metadatas'])
136
- all_embeddings = np.array(all_data['embeddings']) # ์ž„๋ฒ ๋”ฉ ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜
137
- all_metadatas = all_data['metadatas'] # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ฆฌ์ŠคํŠธ
138
-
139
- # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ๊ณผ์˜ ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
140
  query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ฐจ์› ์กฐ์ •
141
- similarities = cosine_similarity(query_embedding, all_embeddings).flatten() # ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
142
-
143
- # ์œ ์‚ฌ๋„์™€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ์Œ์œผ๋กœ ๋ฌถ๊ณ , ์œ ์‚ฌ๋„๊ฐ€ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ ์ •๋ ฌ
144
- similarity_metadata_pairs = list(zip(similarities, all_metadatas))
145
- sorted_pairs = sorted(similarity_metadata_pairs, key=lambda x: x[0], reverse=True)[:top_k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- # ์ตœ์ข… ๊ฒฐ๊ณผ ํ˜•์‹์œผ๋กœ ๋ฐ˜ํ™˜
148
- results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in sorted_pairs]
149
- return results
150
 
151
 
152
 
 
130
  #
131
  # return structured_results
132
 
133
+ def find_similar_images(query_embedding, collection, top_k=5, batch_size=500):
 
 
 
 
 
 
134
  query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ฐจ์› ์กฐ์ •
135
+ all_results = []
136
+
137
+ # ์ „์ฒด ๋ฐ์ดํ„ฐ์˜ ๊ธธ์ด๋ฅผ ๊ณ„์‚ฐ
138
+ data_length = len(collection.get(include=['metadatas'])['metadatas'])
139
+
140
+ for start in range(0, data_length, batch_size):
141
+ end = min(start + batch_size, data_length)
142
+
143
+ # ํ•ด๋‹น ๋ฐฐ์น˜ ๋ฒ”์œ„์˜ ์ž„๋ฒ ๋”ฉ๊ณผ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ด
144
+ all_data = collection.get(
145
+ include=['embeddings', 'metadatas'],
146
+ start=start,
147
+ end=end
148
+ )
149
+
150
+ batch_embeddings = np.array(all_data['embeddings'])
151
+ batch_metadatas = all_data['metadatas']
152
+
153
+ # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
154
+ similarities = cosine_similarity(query_embedding, batch_embeddings).flatten()
155
+
156
+ # ํ˜„์žฌ ๋ฐฐ์น˜์—์„œ ์œ ์‚ฌ๋„์™€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ์Œ์œผ๋กœ ๋ฌถ์–ด ์ถ”๊ฐ€
157
+ batch_results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in zip(similarities, batch_metadatas)]
158
+ all_results.extend(batch_results)
159
+
160
+ # ์ „์ฒด ๊ฒฐ๊ณผ ์ค‘์—์„œ ์œ ์‚ฌ๋„๊ฐ€ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ top_k ๊ฐœ๋งŒ ์„ ํƒ
161
+ sorted_results = sorted(all_results, key=lambda x: x['similarity'], reverse=True)[:top_k]
162
+
163
+ return sorted_results
164
 
 
 
 
165
 
166
 
167