Upload 10 files
Browse files- .gitignore +4 -0
- README.md +32 -0
- app.py +91 -0
- kitpot.categoriesNew.csv +0 -0
- kitpot.searchingdata14.pkl +3 -0
- model.py +76 -0
- requirements.txt +5 -0
- res_filenames.pkl +3 -0
- res_vector_embeddings.pkl +3 -0
- rest_train.py +48 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/kitpotproduct
|
2 |
+
/uploads
|
3 |
+
/__pycache__
|
4 |
+
res_vector_embeddings.pkl
|
README.md
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Image Retrieval System using ResNet50 and Nearest Neighbors
|
2 |
+
|
3 |
+
This repository contains code for an image retrieval system built using ResNet50, a pre-trained convolutional neural network, and Nearest Neighbors algorithm to find similar images based on feature embeddings.
|
4 |
+
|
5 |
+
## Overview
|
6 |
+
|
7 |
+
The system leverages ResNet50, a powerful deep learning model pre-trained on ImageNet, for extracting image features. These features are stored as embeddings in a pickle file, along with associated filenames.
|
8 |
+
|
9 |
+
### Files Included
|
10 |
+
|
11 |
+
- `app.py`: Python script implementing the image retrieval system.
|
12 |
+
- `res_vector_embeddings`: Pickle file containing feature embeddings of images.
|
13 |
+
- `filenames.pkl`: Pickle file storing filenames corresponding to the image embeddings.
|
14 |
+
|
15 |
+
## Getting Started
|
16 |
+
|
17 |
+
### Prerequisites
|
18 |
+
|
19 |
+
- Python 3.10
|
20 |
+
- Dependencies: Keras, NumPy, scikit-learn
|
21 |
+
|
22 |
+
You can install the dependencies via:
|
23 |
+
|
24 |
+
Requirements installation
|
25 |
+
```bash
|
26 |
+
pip install -r requirements.txt
|
27 |
+
```
|
28 |
+
Run the Model
|
29 |
+
|
30 |
+
```bash
|
31 |
+
python app.py
|
32 |
+
```
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from model import load_model, find_similar_images,extract_filenames
|
3 |
+
import os
|
4 |
+
import difflib
|
5 |
+
import pandas as pd
|
6 |
+
from flask_cors import CORS
|
7 |
+
import json
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
CORS(app)
|
11 |
+
data = pd.read_csv('kitpot.categoriesNew.csv', encoding='ISO-8859-1')
|
12 |
+
titles = data['DATA'].tolist()
|
13 |
+
|
14 |
+
lowercase_titles = [str(title).lower() if isinstance(title, str) else '' for title in titles]
|
15 |
+
data = list(filter(None, lowercase_titles))
|
16 |
+
|
17 |
+
def find_closest_match(input_word, data):
|
18 |
+
if input_word in data:
|
19 |
+
return [input_word]
|
20 |
+
|
21 |
+
closest_match = difflib.get_close_matches(input_word, data)
|
22 |
+
return closest_match
|
23 |
+
|
24 |
+
def process_search_term(search_term, data):
|
25 |
+
words = search_term.split(' ')
|
26 |
+
processed_words = []
|
27 |
+
|
28 |
+
for word in words:
|
29 |
+
|
30 |
+
if word.lower() in data:
|
31 |
+
processed_words.append(word)
|
32 |
+
else:
|
33 |
+
closest_match = find_closest_match(word.lower(), data)
|
34 |
+
processed_words.append(closest_match[0] if closest_match else word)
|
35 |
+
|
36 |
+
processed_sentence = ' '.join(processed_words)
|
37 |
+
return processed_sentence
|
38 |
+
|
39 |
+
@app.route('/get_closest_match', methods=['GET'])
|
40 |
+
def get_closest_match():
|
41 |
+
search_term = request.args.get('search_key', default='', type=str)
|
42 |
+
|
43 |
+
if not search_term:
|
44 |
+
response = {
|
45 |
+
'message': 'Search term is empty.',
|
46 |
+
}
|
47 |
+
return jsonify(response)
|
48 |
+
|
49 |
+
processed_sentence = process_search_term(search_term, data)
|
50 |
+
|
51 |
+
if processed_sentence:
|
52 |
+
response = {
|
53 |
+
'result': processed_sentence,
|
54 |
+
}
|
55 |
+
else:
|
56 |
+
response = {
|
57 |
+
'message': 'No similar words found.',
|
58 |
+
}
|
59 |
+
|
60 |
+
return jsonify(response)
|
61 |
+
|
62 |
+
|
63 |
+
@app.route('/similar_images', methods=['POST'])
|
64 |
+
def get_similar_images():
|
65 |
+
if 'imageSearch' not in request.files:
|
66 |
+
return jsonify({'error': 'No file part'})
|
67 |
+
|
68 |
+
file = request.files['imageSearch']
|
69 |
+
if file.filename == '':
|
70 |
+
return jsonify({'error': 'No selected file'})
|
71 |
+
|
72 |
+
if file:
|
73 |
+
filename = file.filename
|
74 |
+
upload_folder = 'uploads'
|
75 |
+
if not os.path.exists(upload_folder):
|
76 |
+
os.makedirs(upload_folder)
|
77 |
+
filepath = os.path.join(upload_folder, filename)
|
78 |
+
file.save(filepath)
|
79 |
+
|
80 |
+
similar_images = find_similar_images(filepath)
|
81 |
+
actualFormatedData=extract_filenames(similar_images)
|
82 |
+
# actualFormatedData = convert_file_paths(similar_images)
|
83 |
+
os.remove(filepath)
|
84 |
+
|
85 |
+
return jsonify({'result': actualFormatedData})
|
86 |
+
else:
|
87 |
+
return jsonify({'error': 'File format not supported'})
|
88 |
+
|
89 |
+
if __name__ == '__main__':
|
90 |
+
app.run(host='0.0.0.0',port=5000,debug=True)
|
91 |
+
|
kitpot.categoriesNew.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
kitpot.searchingdata14.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4fe84ab190f6bb0a662807ef09d6d9b501c4a3398348e9e402550d90aae4a97
|
3 |
+
size 6132294
|
model.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import sys
|
4 |
+
import keras
|
5 |
+
import numpy as np
|
6 |
+
from keras.preprocessing import image
|
7 |
+
from keras.layers import GlobalMaxPooling2D
|
8 |
+
from keras.applications.resnet50 import ResNet50, preprocess_input
|
9 |
+
from sklearn.neighbors import NearestNeighbors
|
10 |
+
from numpy.linalg import norm
|
11 |
+
|
12 |
+
model = None
|
13 |
+
feature_list = None
|
14 |
+
filenames = None
|
15 |
+
|
16 |
+
def load_model():
|
17 |
+
global model, feature_list, filenames
|
18 |
+
if model is None:
|
19 |
+
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
|
20 |
+
model.trainable = False
|
21 |
+
model = keras.Sequential([
|
22 |
+
model,
|
23 |
+
GlobalMaxPooling2D()
|
24 |
+
])
|
25 |
+
|
26 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
27 |
+
embeddings_path = os.path.join(script_dir, 'res_vector_embeddings.pkl')
|
28 |
+
filenames_path = os.path.join(script_dir, 'res_filenames.pkl')
|
29 |
+
|
30 |
+
try:
|
31 |
+
with open(embeddings_path, 'rb') as emb_file, open(filenames_path, 'rb') as name_file:
|
32 |
+
feature_list = pickle.load(emb_file)
|
33 |
+
filenames = pickle.load(name_file)
|
34 |
+
except FileNotFoundError as e:
|
35 |
+
print(f"Error: {e}. Check if the required files exist in the specified path.")
|
36 |
+
sys.exit(1)
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error loading pickle files: {e}")
|
39 |
+
sys.exit(1)
|
40 |
+
|
41 |
+
def find_similar_images(image_path):
|
42 |
+
if model is None or feature_list is None or filenames is None:
|
43 |
+
load_model()
|
44 |
+
|
45 |
+
try:
|
46 |
+
query_img = image.load_img(image_path, target_size=(224, 224))
|
47 |
+
query_img_array = image.img_to_array(query_img)
|
48 |
+
expanded_query_img_array = np.expand_dims(query_img_array, axis=0)
|
49 |
+
preprocessed_query_img = preprocess_input(expanded_query_img_array)
|
50 |
+
query_result = model.predict(preprocessed_query_img).flatten()
|
51 |
+
normalized_query_result = query_result / norm(query_result)
|
52 |
+
|
53 |
+
neighbors = NearestNeighbors(n_neighbors=100, algorithm='brute', metric='euclidean')
|
54 |
+
neighbors.fit(feature_list)
|
55 |
+
|
56 |
+
distances, indices = neighbors.kneighbors([normalized_query_result])
|
57 |
+
|
58 |
+
similar_image_paths = [filenames[idx] for idx in indices[0][1:]]
|
59 |
+
return similar_image_paths
|
60 |
+
except FileNotFoundError as e:
|
61 |
+
print(f"Error: {e}. Check if the specified image file exists.")
|
62 |
+
return []
|
63 |
+
except Exception as e:
|
64 |
+
print(f"An error occurred: {e}")
|
65 |
+
return []
|
66 |
+
|
67 |
+
# def convert_file_paths(array_data):
|
68 |
+
# base_path = "uploads/catalog/product/"
|
69 |
+
# transformed_paths = [base_path + path.replace("\\", "/") for path in array_data]
|
70 |
+
|
71 |
+
# # Remove the "kitpotproduct/" prefix from each path
|
72 |
+
# transformed_paths = [path.replace("uploads/catalog/product/kitpotproduct/", "uploads/catalog/product/") for path in transformed_paths]
|
73 |
+
|
74 |
+
# return transformed_paths
|
75 |
+
def extract_filenames(paths):
|
76 |
+
return [path.split("\\")[-1] for path in paths]
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
scikit-image==0.22.0
|
2 |
+
scikit-learn==1.3.2
|
3 |
+
keras
|
4 |
+
Flask
|
5 |
+
tenserflow
|
res_filenames.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed291de01c088dabe36e0117743b160ba631f9fa122adb15d828207c00f9a0cc
|
3 |
+
size 1078046
|
res_vector_embeddings.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9181e1a6f91c09f4fbf34bf612c1a37f1240f3f8ce0d8d7cf17814c212c303ea
|
3 |
+
size 115122306
|
rest_train.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import pickle
|
4 |
+
import tensorflow as tf
|
5 |
+
from tqdm import tqdm
|
6 |
+
from keras.preprocessing import image
|
7 |
+
from keras.layers import GlobalMaxPooling2D
|
8 |
+
from keras.applications.resnet50 import ResNet50, preprocess_input
|
9 |
+
from numpy.linalg import norm
|
10 |
+
|
11 |
+
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
|
12 |
+
model.trainable = False
|
13 |
+
|
14 |
+
model = tf.keras.Sequential([
|
15 |
+
model,
|
16 |
+
GlobalMaxPooling2D()
|
17 |
+
])
|
18 |
+
|
19 |
+
def extract_features(img_path, model):
|
20 |
+
img = image.load_img(img_path, target_size=(224, 224))
|
21 |
+
img_array = image.img_to_array(img)
|
22 |
+
expanded_img_array = np.expand_dims(img_array, axis=0)
|
23 |
+
preprocessed_img = preprocess_input(expanded_img_array)
|
24 |
+
result = model.predict(preprocessed_img).flatten()
|
25 |
+
normalized_result = result / norm(result)
|
26 |
+
return normalized_result
|
27 |
+
|
28 |
+
root_folder = 'kitpotproduct'
|
29 |
+
feature_list = []
|
30 |
+
filenames = []
|
31 |
+
|
32 |
+
for root, dirs, files in os.walk(root_folder):
|
33 |
+
for file in tqdm(files):
|
34 |
+
if file.lower().endswith(('.png', '.jpg', '.jpeg','PNG','JPG','JPEG')):
|
35 |
+
img_path = os.path.join(root, file)
|
36 |
+
try:
|
37 |
+
img = image.load_img(img_path, target_size=(224, 224))
|
38 |
+
filenames.append(img_path)
|
39 |
+
feature_list.append(extract_features(img_path, model))
|
40 |
+
except (OSError, IOError, ValueError, Exception) as e:
|
41 |
+
print(f"Error processing file: {img_path}")
|
42 |
+
print(f"Error message: {str(e)}")
|
43 |
+
continue
|
44 |
+
with open('res_vector_embeddings.pkl', 'wb') as f:
|
45 |
+
pickle.dump(feature_list, f)
|
46 |
+
|
47 |
+
with open('res_filenames.pkl', 'wb') as f:
|
48 |
+
pickle.dump(filenames, f)
|