yuanphon commited on
Commit
3d63b15
1 Parent(s): 6f28186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -15
app.py CHANGED
@@ -164,9 +164,9 @@ def predict(upload_image):
164
  # Load the test data
165
  # Load the image
166
 
167
- # img2 = cv2.imread(test_image_path)
168
- # print("cv2: ", img2)
169
- # print("cv2 shape: ", img2.shape)
170
  # img = upload_image
171
  # img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
172
  pil_image = upload_image.convert('RGB')
@@ -174,8 +174,8 @@ def predict(upload_image):
174
  # Convert RGB to BGR
175
  img = open_cv_image[:, :, ::-1].copy()
176
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
177
- # print("gradio: ", img)
178
- # print("gradio shape: ", img.shape)
179
 
180
 
181
  # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
@@ -195,7 +195,7 @@ def predict(upload_image):
195
 
196
  return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
197
 
198
- def captioning():
199
 
200
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
201
  feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
@@ -209,12 +209,18 @@ def captioning():
209
  gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
210
 
211
  images = []
212
- for image_path in [test_image_path]:
213
- i_image = Image.open(image_path)
214
- if i_image.mode != "RGB":
215
- i_image = i_image.convert(mode="RGB")
216
 
217
- images.append(i_image)
 
 
 
 
 
 
218
 
219
  pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
220
  pixel_values = pixel_values.to(device)
@@ -246,11 +252,8 @@ if __name__ == '__main__':
246
  train_model()
247
  # output(predict(), captioning())
248
 
249
-
250
- # def greet(name):
251
- # return "Hello " + name + "!!"
252
  def get_result(upload_image):
253
- result = output(predict(upload_image), captioning())
254
  return result
255
 
256
  iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")
 
164
  # Load the test data
165
  # Load the image
166
 
167
+ img2 = cv2.imread(test_image_path)
168
+ print("cv2: ", img2)
169
+ print("cv2 shape: ", img2.shape)
170
  # img = upload_image
171
  # img = cv2.cvtColor((upload_image * 255).astype(np.uint8), cv2.COLOR_RGB2BGR)
172
  pil_image = upload_image.convert('RGB')
 
174
  # Convert RGB to BGR
175
  img = open_cv_image[:, :, ::-1].copy()
176
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
177
+ print("gradio: ", img)
178
+ print("gradio shape: ", img.shape)
179
 
180
 
181
  # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
195
 
196
  return label_list[predicted_class_idx] if probabilities.max().item() > 0.90 else '不是校狗'
197
 
198
+ def captioning(upload_image):
199
 
200
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
201
  feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 
209
  gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
210
 
211
  images = []
212
+ # for image_path in [test_image_path]:
213
+ # i_image = Image.open(image_path)
214
+ # if i_image.mode != "RGB":
215
+ # i_image = i_image.convert(mode="RGB")
216
 
217
+ # images.append(i_image)
218
+ pil_image = upload_image.convert('RGB')
219
+ open_cv_image = np.array(pil_image)
220
+ # Convert RGB to BGR
221
+ img = open_cv_image[:, :, ::-1].copy()
222
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
223
+ images.append(img)
224
 
225
  pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
226
  pixel_values = pixel_values.to(device)
 
252
  train_model()
253
  # output(predict(), captioning())
254
 
 
 
 
255
  def get_result(upload_image):
256
+ result = output(predict(upload_image), captioning(upload_image))
257
  return result
258
 
259
  iface = gr.Interface(fn=get_result, inputs=gr.Image(type="pil"), outputs="text")