sksayril commited on
Commit
ee4fe67
1 Parent(s): bf5d690

Upload 10 files

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /kitpotproduct
2
+ /uploads
3
+ /__pycache__
4
+ res_vector_embeddings.pkl
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image Retrieval System using ResNet50 and Nearest Neighbors
2
+
3
+ This repository contains code for an image retrieval system built using ResNet50, a pre-trained convolutional neural network, and Nearest Neighbors algorithm to find similar images based on feature embeddings.
4
+
5
+ ## Overview
6
+
7
+ The system leverages ResNet50, a powerful deep learning model pre-trained on ImageNet, for extracting image features. These features are stored as embeddings in a pickle file, along with associated filenames.
8
+
9
+ ### Files Included
10
+
11
+ - `app.py`: Python script implementing the image retrieval system.
12
+ - `res_vector_embeddings`: Pickle file containing feature embeddings of images.
13
+ - `filenames.pkl`: Pickle file storing filenames corresponding to the image embeddings.
14
+
15
+ ## Getting Started
16
+
17
+ ### Prerequisites
18
+
19
+ - Python 3.10
20
+ - Dependencies: Keras, NumPy, scikit-learn
21
+
22
+ You can install the dependencies via:
23
+
24
+ Requirements installation
25
+ ```bash
26
+ pip install -r requirements.txt
27
+ ```
28
+ Run the Model
29
+
30
+ ```bash
31
+ python app.py
32
+ ```
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from model import load_model, find_similar_images,extract_filenames
3
+ import os
4
+ import difflib
5
+ import pandas as pd
6
+ from flask_cors import CORS
7
+ import json
8
+
9
+ app = Flask(__name__)
10
+ CORS(app)
11
+ data = pd.read_csv('kitpot.categoriesNew.csv', encoding='ISO-8859-1')
12
+ titles = data['DATA'].tolist()
13
+
14
+ lowercase_titles = [str(title).lower() if isinstance(title, str) else '' for title in titles]
15
+ data = list(filter(None, lowercase_titles))
16
+
17
+ def find_closest_match(input_word, data):
18
+ if input_word in data:
19
+ return [input_word]
20
+
21
+ closest_match = difflib.get_close_matches(input_word, data)
22
+ return closest_match
23
+
24
+ def process_search_term(search_term, data):
25
+ words = search_term.split(' ')
26
+ processed_words = []
27
+
28
+ for word in words:
29
+
30
+ if word.lower() in data:
31
+ processed_words.append(word)
32
+ else:
33
+ closest_match = find_closest_match(word.lower(), data)
34
+ processed_words.append(closest_match[0] if closest_match else word)
35
+
36
+ processed_sentence = ' '.join(processed_words)
37
+ return processed_sentence
38
+
39
+ @app.route('/get_closest_match', methods=['GET'])
40
+ def get_closest_match():
41
+ search_term = request.args.get('search_key', default='', type=str)
42
+
43
+ if not search_term:
44
+ response = {
45
+ 'message': 'Search term is empty.',
46
+ }
47
+ return jsonify(response)
48
+
49
+ processed_sentence = process_search_term(search_term, data)
50
+
51
+ if processed_sentence:
52
+ response = {
53
+ 'result': processed_sentence,
54
+ }
55
+ else:
56
+ response = {
57
+ 'message': 'No similar words found.',
58
+ }
59
+
60
+ return jsonify(response)
61
+
62
+
63
+ @app.route('/similar_images', methods=['POST'])
64
+ def get_similar_images():
65
+ if 'imageSearch' not in request.files:
66
+ return jsonify({'error': 'No file part'})
67
+
68
+ file = request.files['imageSearch']
69
+ if file.filename == '':
70
+ return jsonify({'error': 'No selected file'})
71
+
72
+ if file:
73
+ filename = file.filename
74
+ upload_folder = 'uploads'
75
+ if not os.path.exists(upload_folder):
76
+ os.makedirs(upload_folder)
77
+ filepath = os.path.join(upload_folder, filename)
78
+ file.save(filepath)
79
+
80
+ similar_images = find_similar_images(filepath)
81
+ actualFormatedData=extract_filenames(similar_images)
82
+ # actualFormatedData = convert_file_paths(similar_images)
83
+ os.remove(filepath)
84
+
85
+ return jsonify({'result': actualFormatedData})
86
+ else:
87
+ return jsonify({'error': 'File format not supported'})
88
+
89
+ if __name__ == '__main__':
90
+ app.run(host='0.0.0.0',port=5000,debug=True)
91
+
kitpot.categoriesNew.csv ADDED
The diff for this file is too large to render. See raw diff
 
kitpot.searchingdata14.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fe84ab190f6bb0a662807ef09d6d9b501c4a3398348e9e402550d90aae4a97
3
+ size 6132294
model.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import sys
4
+ import keras
5
+ import numpy as np
6
+ from keras.preprocessing import image
7
+ from keras.layers import GlobalMaxPooling2D
8
+ from keras.applications.resnet50 import ResNet50, preprocess_input
9
+ from sklearn.neighbors import NearestNeighbors
10
+ from numpy.linalg import norm
11
+
12
+ model = None
13
+ feature_list = None
14
+ filenames = None
15
+
16
+ def load_model():
17
+ global model, feature_list, filenames
18
+ if model is None:
19
+ model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
20
+ model.trainable = False
21
+ model = keras.Sequential([
22
+ model,
23
+ GlobalMaxPooling2D()
24
+ ])
25
+
26
+ script_dir = os.path.dirname(os.path.abspath(__file__))
27
+ embeddings_path = os.path.join(script_dir, 'res_vector_embeddings.pkl')
28
+ filenames_path = os.path.join(script_dir, 'res_filenames.pkl')
29
+
30
+ try:
31
+ with open(embeddings_path, 'rb') as emb_file, open(filenames_path, 'rb') as name_file:
32
+ feature_list = pickle.load(emb_file)
33
+ filenames = pickle.load(name_file)
34
+ except FileNotFoundError as e:
35
+ print(f"Error: {e}. Check if the required files exist in the specified path.")
36
+ sys.exit(1)
37
+ except Exception as e:
38
+ print(f"Error loading pickle files: {e}")
39
+ sys.exit(1)
40
+
41
+ def find_similar_images(image_path):
42
+ if model is None or feature_list is None or filenames is None:
43
+ load_model()
44
+
45
+ try:
46
+ query_img = image.load_img(image_path, target_size=(224, 224))
47
+ query_img_array = image.img_to_array(query_img)
48
+ expanded_query_img_array = np.expand_dims(query_img_array, axis=0)
49
+ preprocessed_query_img = preprocess_input(expanded_query_img_array)
50
+ query_result = model.predict(preprocessed_query_img).flatten()
51
+ normalized_query_result = query_result / norm(query_result)
52
+
53
+ neighbors = NearestNeighbors(n_neighbors=100, algorithm='brute', metric='euclidean')
54
+ neighbors.fit(feature_list)
55
+
56
+ distances, indices = neighbors.kneighbors([normalized_query_result])
57
+
58
+ similar_image_paths = [filenames[idx] for idx in indices[0][1:]]
59
+ return similar_image_paths
60
+ except FileNotFoundError as e:
61
+ print(f"Error: {e}. Check if the specified image file exists.")
62
+ return []
63
+ except Exception as e:
64
+ print(f"An error occurred: {e}")
65
+ return []
66
+
67
+ # def convert_file_paths(array_data):
68
+ # base_path = "uploads/catalog/product/"
69
+ # transformed_paths = [base_path + path.replace("\\", "/") for path in array_data]
70
+
71
+ # # Remove the "kitpotproduct/" prefix from each path
72
+ # transformed_paths = [path.replace("uploads/catalog/product/kitpotproduct/", "uploads/catalog/product/") for path in transformed_paths]
73
+
74
+ # return transformed_paths
75
+ def extract_filenames(paths):
76
+ return [path.split("\\")[-1] for path in paths]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ scikit-image==0.22.0
2
+ scikit-learn==1.3.2
3
+ keras
4
+ Flask
5
+ tenserflow
res_filenames.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed291de01c088dabe36e0117743b160ba631f9fa122adb15d828207c00f9a0cc
3
+ size 1078046
res_vector_embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9181e1a6f91c09f4fbf34bf612c1a37f1240f3f8ce0d8d7cf17814c212c303ea
3
+ size 115122306
rest_train.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pickle
4
+ import tensorflow as tf
5
+ from tqdm import tqdm
6
+ from keras.preprocessing import image
7
+ from keras.layers import GlobalMaxPooling2D
8
+ from keras.applications.resnet50 import ResNet50, preprocess_input
9
+ from numpy.linalg import norm
10
+
11
+ model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
12
+ model.trainable = False
13
+
14
+ model = tf.keras.Sequential([
15
+ model,
16
+ GlobalMaxPooling2D()
17
+ ])
18
+
19
+ def extract_features(img_path, model):
20
+ img = image.load_img(img_path, target_size=(224, 224))
21
+ img_array = image.img_to_array(img)
22
+ expanded_img_array = np.expand_dims(img_array, axis=0)
23
+ preprocessed_img = preprocess_input(expanded_img_array)
24
+ result = model.predict(preprocessed_img).flatten()
25
+ normalized_result = result / norm(result)
26
+ return normalized_result
27
+
28
+ root_folder = 'kitpotproduct'
29
+ feature_list = []
30
+ filenames = []
31
+
32
+ for root, dirs, files in os.walk(root_folder):
33
+ for file in tqdm(files):
34
+ if file.lower().endswith(('.png', '.jpg', '.jpeg','PNG','JPG','JPEG')):
35
+ img_path = os.path.join(root, file)
36
+ try:
37
+ img = image.load_img(img_path, target_size=(224, 224))
38
+ filenames.append(img_path)
39
+ feature_list.append(extract_features(img_path, model))
40
+ except (OSError, IOError, ValueError, Exception) as e:
41
+ print(f"Error processing file: {img_path}")
42
+ print(f"Error message: {str(e)}")
43
+ continue
44
+ with open('res_vector_embeddings.pkl', 'wb') as f:
45
+ pickle.dump(feature_list, f)
46
+
47
+ with open('res_filenames.pkl', 'wb') as f:
48
+ pickle.dump(filenames, f)