Siyun He commited on
Commit
54f0f05
·
1 Parent(s): a1ee05b

update files

Browse files
.gitignore CHANGED
@@ -1 +1,2 @@
1
  .DS_Store
 
 
1
  .DS_Store
2
+ ./__pycache__
__pycache__/classification.cpython-311.pyc ADDED
Binary file (12.5 kB). View file
 
classification.py CHANGED
@@ -9,55 +9,6 @@ def resize_image(image_path, save_path):
9
  img = cv2.resize(img, (128, 128))
10
  cv2.imwrite(save_path, img)
11
 
12
- # read image data from ./grass/ folder
13
- if not os.path.exists('./grass_resized/'):
14
- os.makedirs('./grass_resized/')
15
-
16
- # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
17
- count = 1
18
- for file in os.listdir('./grass/'):
19
- if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
20
- resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
21
- count += 1
22
-
23
- print('Done!')
24
-
25
- # save the resized image to ./wood_resized/ folder
26
- if not os.path.exists('./wood_resized/'):
27
- os.makedirs('./wood_resized/')
28
-
29
- # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
30
- count = 1
31
- for file in os.listdir('./wood/'):
32
- if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
33
- resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
34
- count += 1
35
-
36
- print('Done!')
37
-
38
- # Divide the data into training and testing data: 70% training, 30% testing
39
- # Merge grass and wood data into training and testing data
40
- # Save the training data to ./train/ folder
41
- # Save the testing data to ./test/ folder
42
- import shutil
43
-
44
- if not os.path.exists('./train/'):
45
- os.makedirs('./train/')
46
- if not os.path.exists('./test/'):
47
- os.makedirs('./test/')
48
-
49
- # Rename files so that they do not overwrite each other
50
- for i in range(1, 36):
51
- shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
52
- for i in range(36, 51):
53
- shutil.copy('./grass_resized/' +
54
- str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
55
- for i in range(1, 36):
56
- shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
57
- for i in range(36, 51):
58
- shutil.copy('./wood_resized/' +
59
- str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')
60
-
61
  # Do data augmentation by flipping the images horizontally on train data
62
  # Save the augmented data to the same folders
63
  def augment_image(image_path, save_path):
@@ -69,13 +20,6 @@ def augment_image(image_path, save_path):
69
  if np.random.rand() > 0.5:
70
  img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
71
  cv2.imwrite(save_path, img)
72
-
73
- for i in range(1, 36):
74
- augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
75
- for i in range(36, 51):
76
- augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
77
-
78
-
79
 
80
  # Compute the GLCM for each image.
81
  # Extract features like contrast, correlaton, energy, and homogeneity.
@@ -112,27 +56,13 @@ def compute_glcm(image_path, ispath=True):
112
  mean_homogeneity = np.mean(homogeneity_flat)
113
  return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]
114
 
115
- # Compute the GLCM for each image in the training data
116
- data = []
117
- for i in range(1, 71):
118
- data.append(compute_glcm('./train/' + str(i) + '.jpg'))
119
- df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
120
- df['class'] = ['grass']*35 + ['wood']*35
121
- df.to_csv('train_glcm.csv', index=False)
122
-
123
- # Compute the GLCM for each image in the testing data
124
- data = []
125
- for i in range(1, 31):
126
- data.append(compute_glcm('./test/' + str(i) + '.jpg'))
127
- df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
128
- df['class'] = ['grass']*15 + ['wood']*15
129
- df.to_csv('test_glcm.csv', index=False)
130
-
131
  # Apply the LBP operator to each image.
132
  # Generate histograms of LBP codes to create feature vectors.
133
  # Save the features to a CSV file.
134
  # Label each feature vector with the correct class (grass or wood).
135
  from skimage.feature import local_binary_pattern
 
 
136
 
137
  def compute_lbp(image_path, ispath=True):
138
  if ispath:
@@ -143,78 +73,6 @@ def compute_lbp(image_path, ispath=True):
143
  hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
144
  return hist
145
 
146
- # Compute the LBP for each image in the training data
147
- data = []
148
- for i in range(1, 71):
149
- data.append(compute_lbp('./train/' + str(i) + '.jpg'))
150
- df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
151
- df['class'] = ['grass']*35 + ['wood']*35
152
- df.to_csv('train_lbp.csv', index=False)
153
-
154
- # Compute the LBP for each image in the testing data
155
- data = []
156
- for i in range(1, 31):
157
- data.append(compute_lbp('./test/' + str(i) + '.jpg'))
158
- df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
159
- df['class'] = ['grass']*15 + ['wood']*15
160
- df.to_csv('test_lbp.csv', index=False)
161
-
162
- # Select Support Vector Machines (SVM) as the classifier.
163
- # Train the classifier using the training data.
164
- # Test the classifier using the testing data.
165
- from sklearn.svm import SVC
166
- from sklearn.metrics import accuracy_score
167
- from sklearn.metrics import precision_score
168
- import pandas as pd
169
-
170
- train_glcm = pd.read_csv('train_glcm.csv')
171
- test_glcm = pd.read_csv('test_glcm.csv')
172
- train_lbp = pd.read_csv('train_lbp.csv')
173
- test_lbp = pd.read_csv('test_lbp.csv')
174
-
175
- X_train_glcm = train_glcm.drop('class', axis=1)
176
- y_train_glcm = train_glcm['class']
177
- X_test_glcm = test_glcm.drop('class', axis=1)
178
- y_test_glcm = test_glcm['class']
179
-
180
- X_train_lbp = train_lbp.drop('class', axis=1)
181
- y_train_lbp = train_lbp['class']
182
- X_test_lbp = test_lbp.drop('class', axis=1)
183
- y_test_lbp = test_lbp['class']
184
-
185
- clf_glcm = SVC()
186
- clf_glcm.fit(X_train_glcm, y_train_glcm)
187
- y_pred_glcm = clf_glcm.predict(X_test_glcm)
188
- print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
189
- # calculate the precsion
190
- precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
191
- print('Precision for GLCM features:', precision)
192
-
193
- clf_lbp = SVC()
194
- clf_lbp.fit(X_train_lbp, y_train_lbp)
195
- y_pred_lbp = clf_lbp.predict(X_test_lbp)
196
- print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
197
- # calculate the precsion
198
- precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
199
- print('Precision for LBP features:', precision)
200
-
201
- # Evaluate each classifier on the tesing set.
202
- # Compare the results.
203
- # Save the results to a CSV file.
204
- results = pd.DataFrame({'GLCM': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP': [accuracy_score(y_test_lbp, y_pred_lbp)]})
205
- # Add the precision to the results
206
- results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
207
- results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
208
- results.to_csv('results.csv', index=False)
209
-
210
- import pickle
211
- # save clf_glcm and clf_lbp as pickle files
212
- with open('clf_glcm.pkl', 'wb') as f:
213
- pickle.dump(clf_glcm, f)
214
- with open('clf_lbp.pkl', 'wb') as f:
215
- pickle.dump(clf_lbp, f)
216
-
217
- import warnings
218
  def classify_image(image, algorithm):
219
  # Suppress the warning about feature names
220
  warnings.filterwarnings("ignore", message="X does not have valid feature names")
@@ -239,4 +97,145 @@ def classify_image(image, algorithm):
239
  else:
240
  prediction = clf_lbp.predict(features_df)[0]
241
 
242
- return prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  img = cv2.resize(img, (128, 128))
10
  cv2.imwrite(save_path, img)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Do data augmentation by flipping the images horizontally on train data
13
  # Save the augmented data to the same folders
14
  def augment_image(image_path, save_path):
 
20
  if np.random.rand() > 0.5:
21
  img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
22
  cv2.imwrite(save_path, img)
 
 
 
 
 
 
 
23
 
24
  # Compute the GLCM for each image.
25
  # Extract features like contrast, correlaton, energy, and homogeneity.
 
56
  mean_homogeneity = np.mean(homogeneity_flat)
57
  return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Apply the LBP operator to each image.
60
  # Generate histograms of LBP codes to create feature vectors.
61
  # Save the features to a CSV file.
62
  # Label each feature vector with the correct class (grass or wood).
63
  from skimage.feature import local_binary_pattern
64
+ import pickle
65
+ import warnings
66
 
67
  def compute_lbp(image_path, ispath=True):
68
  if ispath:
 
73
  hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
74
  return hist
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def classify_image(image, algorithm):
77
  # Suppress the warning about feature names
78
  warnings.filterwarnings("ignore", message="X does not have valid feature names")
 
97
  else:
98
  prediction = clf_lbp.predict(features_df)[0]
99
 
100
+ return prediction
101
+
102
+ if __name__ == '__main__':
103
+ # read image data from ./grass/ folder
104
+ if not os.path.exists('./grass_resized/'):
105
+ os.makedirs('./grass_resized/')
106
+
107
+ # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
108
+ count = 1
109
+ for file in os.listdir('./grass/'):
110
+ if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
111
+ resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
112
+ count += 1
113
+
114
+ print('Done!')
115
+
116
+ # save the resized image to ./wood_resized/ folder
117
+ if not os.path.exists('./wood_resized/'):
118
+ os.makedirs('./wood_resized/')
119
+
120
+ # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
121
+ count = 1
122
+ for file in os.listdir('./wood/'):
123
+ if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
124
+ resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
125
+ count += 1
126
+
127
+ print('Done!')
128
+
129
+ # Divide the data into training and testing data: 70% training, 30% testing
130
+ # Merge grass and wood data into training and testing data
131
+ # Save the training data to ./train/ folder
132
+ # Save the testing data to ./test/ folder
133
+ import shutil
134
+
135
+ if not os.path.exists('./train/'):
136
+ os.makedirs('./train/')
137
+ if not os.path.exists('./test/'):
138
+ os.makedirs('./test/')
139
+
140
+ # Rename files so that they do not overwrite each other
141
+ for i in range(1, 36):
142
+ shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
143
+ for i in range(36, 51):
144
+ shutil.copy('./grass_resized/' +
145
+ str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
146
+ for i in range(1, 36):
147
+ shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
148
+ for i in range(36, 51):
149
+ shutil.copy('./wood_resized/' +
150
+ str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')
151
+
152
+
153
+ for i in range(1, 36):
154
+ augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
155
+ for i in range(36, 51):
156
+ augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
157
+
158
+ # Compute the LBP for each image in the training data
159
+ data = []
160
+ for i in range(1, 71):
161
+ data.append(compute_lbp('./train/' + str(i) + '.jpg'))
162
+ df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
163
+ df['class'] = ['grass']*35 + ['wood']*35
164
+ df.to_csv('train_lbp.csv', index=False)
165
+
166
+ # Compute the LBP for each image in the testing data
167
+ data = []
168
+ for i in range(1, 31):
169
+ data.append(compute_lbp('./test/' + str(i) + '.jpg'))
170
+ df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
171
+ df['class'] = ['grass']*15 + ['wood']*15
172
+ df.to_csv('test_lbp.csv', index=False)
173
+
174
+ # Compute the GLCM for each image in the training data
175
+ data = []
176
+ for i in range(1, 71):
177
+ data.append(compute_glcm('./train/' + str(i) + '.jpg'))
178
+ df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
179
+ df['class'] = ['grass']*35 + ['wood']*35
180
+ df.to_csv('train_glcm.csv', index=False)
181
+
182
+ # Compute the GLCM for each image in the testing data
183
+ data = []
184
+ for i in range(1, 31):
185
+ data.append(compute_glcm('./test/' + str(i) + '.jpg'))
186
+ df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
187
+ df['class'] = ['grass']*15 + ['wood']*15
188
+ df.to_csv('test_glcm.csv', index=False)
189
+
190
+ # Select Support Vector Machines (SVM) as the classifier.
191
+ # Train the classifier using the training data.
192
+ # Test the classifier using the testing data.
193
+ from sklearn.svm import SVC
194
+ from sklearn.metrics import accuracy_score
195
+ from sklearn.metrics import precision_score
196
+ import pandas as pd
197
+
198
+ train_glcm = pd.read_csv('train_glcm.csv')
199
+ test_glcm = pd.read_csv('test_glcm.csv')
200
+ train_lbp = pd.read_csv('train_lbp.csv')
201
+ test_lbp = pd.read_csv('test_lbp.csv')
202
+
203
+ X_train_glcm = train_glcm.drop('class', axis=1)
204
+ y_train_glcm = train_glcm['class']
205
+ X_test_glcm = test_glcm.drop('class', axis=1)
206
+ y_test_glcm = test_glcm['class']
207
+
208
+ X_train_lbp = train_lbp.drop('class', axis=1)
209
+ y_train_lbp = train_lbp['class']
210
+ X_test_lbp = test_lbp.drop('class', axis=1)
211
+ y_test_lbp = test_lbp['class']
212
+
213
+ clf_glcm = SVC()
214
+ clf_glcm.fit(X_train_glcm, y_train_glcm)
215
+ y_pred_glcm = clf_glcm.predict(X_test_glcm)
216
+ print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
217
+ # calculate the precsion
218
+ precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
219
+ print('Precision for GLCM features:', precision)
220
+
221
+ clf_lbp = SVC()
222
+ clf_lbp.fit(X_train_lbp, y_train_lbp)
223
+ y_pred_lbp = clf_lbp.predict(X_test_lbp)
224
+ print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
225
+ # calculate the precsion
226
+ precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
227
+ print('Precision for LBP features:', precision)
228
+
229
+ # Evaluate each classifier on the tesing set.
230
+ # Compare the results.
231
+ # Save the results to a CSV file.
232
+ results = pd.DataFrame({'GLCM': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP': [accuracy_score(y_test_lbp, y_pred_lbp)]})
233
+ # Add the precision to the results
234
+ results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
235
+ results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
236
+ results.to_csv('results.csv', index=False)
237
+ # save clf_glcm and clf_lbp as pickle files
238
+ with open('clf_glcm.pkl', 'wb') as f:
239
+ pickle.dump(clf_glcm, f)
240
+ with open('clf_lbp.pkl', 'wb') as f:
241
+ pickle.dump(clf_lbp, f)