Spaces:
Sleeping
Sleeping
Siyun He
commited on
Commit
·
54f0f05
1
Parent(s):
a1ee05b
update files
Browse files- .gitignore +1 -0
- __pycache__/classification.cpython-311.pyc +0 -0
- classification.py +144 -145
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
.DS_Store
|
|
|
|
1 |
.DS_Store
|
2 |
+
./__pycache__
|
__pycache__/classification.cpython-311.pyc
ADDED
Binary file (12.5 kB). View file
|
|
classification.py
CHANGED
@@ -9,55 +9,6 @@ def resize_image(image_path, save_path):
|
|
9 |
img = cv2.resize(img, (128, 128))
|
10 |
cv2.imwrite(save_path, img)
|
11 |
|
12 |
-
# read image data from ./grass/ folder
|
13 |
-
if not os.path.exists('./grass_resized/'):
|
14 |
-
os.makedirs('./grass_resized/')
|
15 |
-
|
16 |
-
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
|
17 |
-
count = 1
|
18 |
-
for file in os.listdir('./grass/'):
|
19 |
-
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
|
20 |
-
resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
|
21 |
-
count += 1
|
22 |
-
|
23 |
-
print('Done!')
|
24 |
-
|
25 |
-
# save the resized image to ./wood_resized/ folder
|
26 |
-
if not os.path.exists('./wood_resized/'):
|
27 |
-
os.makedirs('./wood_resized/')
|
28 |
-
|
29 |
-
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
|
30 |
-
count = 1
|
31 |
-
for file in os.listdir('./wood/'):
|
32 |
-
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
|
33 |
-
resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
|
34 |
-
count += 1
|
35 |
-
|
36 |
-
print('Done!')
|
37 |
-
|
38 |
-
# Divide the data into training and testing data: 70% training, 30% testing
|
39 |
-
# Merge grass and wood data into training and testing data
|
40 |
-
# Save the training data to ./train/ folder
|
41 |
-
# Save the testing data to ./test/ folder
|
42 |
-
import shutil
|
43 |
-
|
44 |
-
if not os.path.exists('./train/'):
|
45 |
-
os.makedirs('./train/')
|
46 |
-
if not os.path.exists('./test/'):
|
47 |
-
os.makedirs('./test/')
|
48 |
-
|
49 |
-
# Rename files so that they do not overwrite each other
|
50 |
-
for i in range(1, 36):
|
51 |
-
shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
|
52 |
-
for i in range(36, 51):
|
53 |
-
shutil.copy('./grass_resized/' +
|
54 |
-
str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
|
55 |
-
for i in range(1, 36):
|
56 |
-
shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
|
57 |
-
for i in range(36, 51):
|
58 |
-
shutil.copy('./wood_resized/' +
|
59 |
-
str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')
|
60 |
-
|
61 |
# Do data augmentation by flipping the images horizontally on train data
|
62 |
# Save the augmented data to the same folders
|
63 |
def augment_image(image_path, save_path):
|
@@ -69,13 +20,6 @@ def augment_image(image_path, save_path):
|
|
69 |
if np.random.rand() > 0.5:
|
70 |
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
|
71 |
cv2.imwrite(save_path, img)
|
72 |
-
|
73 |
-
for i in range(1, 36):
|
74 |
-
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
|
75 |
-
for i in range(36, 51):
|
76 |
-
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
|
77 |
-
|
78 |
-
|
79 |
|
80 |
# Compute the GLCM for each image.
|
81 |
# Extract features like contrast, correlaton, energy, and homogeneity.
|
@@ -112,27 +56,13 @@ def compute_glcm(image_path, ispath=True):
|
|
112 |
mean_homogeneity = np.mean(homogeneity_flat)
|
113 |
return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]
|
114 |
|
115 |
-
# Compute the GLCM for each image in the training data
|
116 |
-
data = []
|
117 |
-
for i in range(1, 71):
|
118 |
-
data.append(compute_glcm('./train/' + str(i) + '.jpg'))
|
119 |
-
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
|
120 |
-
df['class'] = ['grass']*35 + ['wood']*35
|
121 |
-
df.to_csv('train_glcm.csv', index=False)
|
122 |
-
|
123 |
-
# Compute the GLCM for each image in the testing data
|
124 |
-
data = []
|
125 |
-
for i in range(1, 31):
|
126 |
-
data.append(compute_glcm('./test/' + str(i) + '.jpg'))
|
127 |
-
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
|
128 |
-
df['class'] = ['grass']*15 + ['wood']*15
|
129 |
-
df.to_csv('test_glcm.csv', index=False)
|
130 |
-
|
131 |
# Apply the LBP operator to each image.
|
132 |
# Generate histograms of LBP codes to create feature vectors.
|
133 |
# Save the features to a CSV file.
|
134 |
# Label each feature vector with the correct class (grass or wood).
|
135 |
from skimage.feature import local_binary_pattern
|
|
|
|
|
136 |
|
137 |
def compute_lbp(image_path, ispath=True):
|
138 |
if ispath:
|
@@ -143,78 +73,6 @@ def compute_lbp(image_path, ispath=True):
|
|
143 |
hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
|
144 |
return hist
|
145 |
|
146 |
-
# Compute the LBP for each image in the training data
|
147 |
-
data = []
|
148 |
-
for i in range(1, 71):
|
149 |
-
data.append(compute_lbp('./train/' + str(i) + '.jpg'))
|
150 |
-
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
|
151 |
-
df['class'] = ['grass']*35 + ['wood']*35
|
152 |
-
df.to_csv('train_lbp.csv', index=False)
|
153 |
-
|
154 |
-
# Compute the LBP for each image in the testing data
|
155 |
-
data = []
|
156 |
-
for i in range(1, 31):
|
157 |
-
data.append(compute_lbp('./test/' + str(i) + '.jpg'))
|
158 |
-
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
|
159 |
-
df['class'] = ['grass']*15 + ['wood']*15
|
160 |
-
df.to_csv('test_lbp.csv', index=False)
|
161 |
-
|
162 |
-
# Select Support Vector Machines (SVM) as the classifier.
|
163 |
-
# Train the classifier using the training data.
|
164 |
-
# Test the classifier using the testing data.
|
165 |
-
from sklearn.svm import SVC
|
166 |
-
from sklearn.metrics import accuracy_score
|
167 |
-
from sklearn.metrics import precision_score
|
168 |
-
import pandas as pd
|
169 |
-
|
170 |
-
train_glcm = pd.read_csv('train_glcm.csv')
|
171 |
-
test_glcm = pd.read_csv('test_glcm.csv')
|
172 |
-
train_lbp = pd.read_csv('train_lbp.csv')
|
173 |
-
test_lbp = pd.read_csv('test_lbp.csv')
|
174 |
-
|
175 |
-
X_train_glcm = train_glcm.drop('class', axis=1)
|
176 |
-
y_train_glcm = train_glcm['class']
|
177 |
-
X_test_glcm = test_glcm.drop('class', axis=1)
|
178 |
-
y_test_glcm = test_glcm['class']
|
179 |
-
|
180 |
-
X_train_lbp = train_lbp.drop('class', axis=1)
|
181 |
-
y_train_lbp = train_lbp['class']
|
182 |
-
X_test_lbp = test_lbp.drop('class', axis=1)
|
183 |
-
y_test_lbp = test_lbp['class']
|
184 |
-
|
185 |
-
clf_glcm = SVC()
|
186 |
-
clf_glcm.fit(X_train_glcm, y_train_glcm)
|
187 |
-
y_pred_glcm = clf_glcm.predict(X_test_glcm)
|
188 |
-
print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
|
189 |
-
# calculate the precsion
|
190 |
-
precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
|
191 |
-
print('Precision for GLCM features:', precision)
|
192 |
-
|
193 |
-
clf_lbp = SVC()
|
194 |
-
clf_lbp.fit(X_train_lbp, y_train_lbp)
|
195 |
-
y_pred_lbp = clf_lbp.predict(X_test_lbp)
|
196 |
-
print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
|
197 |
-
# calculate the precsion
|
198 |
-
precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
|
199 |
-
print('Precision for LBP features:', precision)
|
200 |
-
|
201 |
-
# Evaluate each classifier on the tesing set.
|
202 |
-
# Compare the results.
|
203 |
-
# Save the results to a CSV file.
|
204 |
-
results = pd.DataFrame({'GLCM': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP': [accuracy_score(y_test_lbp, y_pred_lbp)]})
|
205 |
-
# Add the precision to the results
|
206 |
-
results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
|
207 |
-
results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
|
208 |
-
results.to_csv('results.csv', index=False)
|
209 |
-
|
210 |
-
import pickle
|
211 |
-
# save clf_glcm and clf_lbp as pickle files
|
212 |
-
with open('clf_glcm.pkl', 'wb') as f:
|
213 |
-
pickle.dump(clf_glcm, f)
|
214 |
-
with open('clf_lbp.pkl', 'wb') as f:
|
215 |
-
pickle.dump(clf_lbp, f)
|
216 |
-
|
217 |
-
import warnings
|
218 |
def classify_image(image, algorithm):
|
219 |
# Suppress the warning about feature names
|
220 |
warnings.filterwarnings("ignore", message="X does not have valid feature names")
|
@@ -239,4 +97,145 @@ def classify_image(image, algorithm):
|
|
239 |
else:
|
240 |
prediction = clf_lbp.predict(features_df)[0]
|
241 |
|
242 |
-
return prediction
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
img = cv2.resize(img, (128, 128))
|
10 |
cv2.imwrite(save_path, img)
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Do data augmentation by flipping the images horizontally on train data
|
13 |
# Save the augmented data to the same folders
|
14 |
def augment_image(image_path, save_path):
|
|
|
20 |
if np.random.rand() > 0.5:
|
21 |
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
|
22 |
cv2.imwrite(save_path, img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Compute the GLCM for each image.
|
25 |
# Extract features like contrast, correlaton, energy, and homogeneity.
|
|
|
56 |
mean_homogeneity = np.mean(homogeneity_flat)
|
57 |
return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
# Apply the LBP operator to each image.
|
60 |
# Generate histograms of LBP codes to create feature vectors.
|
61 |
# Save the features to a CSV file.
|
62 |
# Label each feature vector with the correct class (grass or wood).
|
63 |
from skimage.feature import local_binary_pattern
|
64 |
+
import pickle
|
65 |
+
import warnings
|
66 |
|
67 |
def compute_lbp(image_path, ispath=True):
|
68 |
if ispath:
|
|
|
73 |
hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
|
74 |
return hist
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def classify_image(image, algorithm):
|
77 |
# Suppress the warning about feature names
|
78 |
warnings.filterwarnings("ignore", message="X does not have valid feature names")
|
|
|
97 |
else:
|
98 |
prediction = clf_lbp.predict(features_df)[0]
|
99 |
|
100 |
+
return prediction
|
101 |
+
|
102 |
+
if __name__ == '__main__':
|
103 |
+
# read image data from ./grass/ folder
|
104 |
+
if not os.path.exists('./grass_resized/'):
|
105 |
+
os.makedirs('./grass_resized/')
|
106 |
+
|
107 |
+
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
|
108 |
+
count = 1
|
109 |
+
for file in os.listdir('./grass/'):
|
110 |
+
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
|
111 |
+
resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
|
112 |
+
count += 1
|
113 |
+
|
114 |
+
print('Done!')
|
115 |
+
|
116 |
+
# save the resized image to ./wood_resized/ folder
|
117 |
+
if not os.path.exists('./wood_resized/'):
|
118 |
+
os.makedirs('./wood_resized/')
|
119 |
+
|
120 |
+
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
|
121 |
+
count = 1
|
122 |
+
for file in os.listdir('./wood/'):
|
123 |
+
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
|
124 |
+
resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
|
125 |
+
count += 1
|
126 |
+
|
127 |
+
print('Done!')
|
128 |
+
|
129 |
+
# Divide the data into training and testing data: 70% training, 30% testing
|
130 |
+
# Merge grass and wood data into training and testing data
|
131 |
+
# Save the training data to ./train/ folder
|
132 |
+
# Save the testing data to ./test/ folder
|
133 |
+
import shutil
|
134 |
+
|
135 |
+
if not os.path.exists('./train/'):
|
136 |
+
os.makedirs('./train/')
|
137 |
+
if not os.path.exists('./test/'):
|
138 |
+
os.makedirs('./test/')
|
139 |
+
|
140 |
+
# Rename files so that they do not overwrite each other
|
141 |
+
for i in range(1, 36):
|
142 |
+
shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
|
143 |
+
for i in range(36, 51):
|
144 |
+
shutil.copy('./grass_resized/' +
|
145 |
+
str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
|
146 |
+
for i in range(1, 36):
|
147 |
+
shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
|
148 |
+
for i in range(36, 51):
|
149 |
+
shutil.copy('./wood_resized/' +
|
150 |
+
str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')
|
151 |
+
|
152 |
+
|
153 |
+
for i in range(1, 36):
|
154 |
+
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
|
155 |
+
for i in range(36, 51):
|
156 |
+
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
|
157 |
+
|
158 |
+
# Compute the LBP for each image in the training data
|
159 |
+
data = []
|
160 |
+
for i in range(1, 71):
|
161 |
+
data.append(compute_lbp('./train/' + str(i) + '.jpg'))
|
162 |
+
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
|
163 |
+
df['class'] = ['grass']*35 + ['wood']*35
|
164 |
+
df.to_csv('train_lbp.csv', index=False)
|
165 |
+
|
166 |
+
# Compute the LBP for each image in the testing data
|
167 |
+
data = []
|
168 |
+
for i in range(1, 31):
|
169 |
+
data.append(compute_lbp('./test/' + str(i) + '.jpg'))
|
170 |
+
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
|
171 |
+
df['class'] = ['grass']*15 + ['wood']*15
|
172 |
+
df.to_csv('test_lbp.csv', index=False)
|
173 |
+
|
174 |
+
# Compute the GLCM for each image in the training data
|
175 |
+
data = []
|
176 |
+
for i in range(1, 71):
|
177 |
+
data.append(compute_glcm('./train/' + str(i) + '.jpg'))
|
178 |
+
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
|
179 |
+
df['class'] = ['grass']*35 + ['wood']*35
|
180 |
+
df.to_csv('train_glcm.csv', index=False)
|
181 |
+
|
182 |
+
# Compute the GLCM for each image in the testing data
|
183 |
+
data = []
|
184 |
+
for i in range(1, 31):
|
185 |
+
data.append(compute_glcm('./test/' + str(i) + '.jpg'))
|
186 |
+
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
|
187 |
+
df['class'] = ['grass']*15 + ['wood']*15
|
188 |
+
df.to_csv('test_glcm.csv', index=False)
|
189 |
+
|
190 |
+
# Select Support Vector Machines (SVM) as the classifier.
|
191 |
+
# Train the classifier using the training data.
|
192 |
+
# Test the classifier using the testing data.
|
193 |
+
from sklearn.svm import SVC
|
194 |
+
from sklearn.metrics import accuracy_score
|
195 |
+
from sklearn.metrics import precision_score
|
196 |
+
import pandas as pd
|
197 |
+
|
198 |
+
train_glcm = pd.read_csv('train_glcm.csv')
|
199 |
+
test_glcm = pd.read_csv('test_glcm.csv')
|
200 |
+
train_lbp = pd.read_csv('train_lbp.csv')
|
201 |
+
test_lbp = pd.read_csv('test_lbp.csv')
|
202 |
+
|
203 |
+
X_train_glcm = train_glcm.drop('class', axis=1)
|
204 |
+
y_train_glcm = train_glcm['class']
|
205 |
+
X_test_glcm = test_glcm.drop('class', axis=1)
|
206 |
+
y_test_glcm = test_glcm['class']
|
207 |
+
|
208 |
+
X_train_lbp = train_lbp.drop('class', axis=1)
|
209 |
+
y_train_lbp = train_lbp['class']
|
210 |
+
X_test_lbp = test_lbp.drop('class', axis=1)
|
211 |
+
y_test_lbp = test_lbp['class']
|
212 |
+
|
213 |
+
clf_glcm = SVC()
|
214 |
+
clf_glcm.fit(X_train_glcm, y_train_glcm)
|
215 |
+
y_pred_glcm = clf_glcm.predict(X_test_glcm)
|
216 |
+
print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
|
217 |
+
# calculate the precsion
|
218 |
+
precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
|
219 |
+
print('Precision for GLCM features:', precision)
|
220 |
+
|
221 |
+
clf_lbp = SVC()
|
222 |
+
clf_lbp.fit(X_train_lbp, y_train_lbp)
|
223 |
+
y_pred_lbp = clf_lbp.predict(X_test_lbp)
|
224 |
+
print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
|
225 |
+
# calculate the precsion
|
226 |
+
precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
|
227 |
+
print('Precision for LBP features:', precision)
|
228 |
+
|
229 |
+
# Evaluate each classifier on the tesing set.
|
230 |
+
# Compare the results.
|
231 |
+
# Save the results to a CSV file.
|
232 |
+
results = pd.DataFrame({'GLCM': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP': [accuracy_score(y_test_lbp, y_pred_lbp)]})
|
233 |
+
# Add the precision to the results
|
234 |
+
results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
|
235 |
+
results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
|
236 |
+
results.to_csv('results.csv', index=False)
|
237 |
+
# save clf_glcm and clf_lbp as pickle files
|
238 |
+
with open('clf_glcm.pkl', 'wb') as f:
|
239 |
+
pickle.dump(clf_glcm, f)
|
240 |
+
with open('clf_lbp.pkl', 'wb') as f:
|
241 |
+
pickle.dump(clf_lbp, f)
|