manasch commited on
Commit
3ba4276
·
verified ·
1 Parent(s): 1446d6e

separate models and add image_captioning

Browse files
Files changed (4) hide show
  1. .gitignore +5 -0
  2. app.py +13 -49
  3. lib/image_captioning.py +27 -0
  4. lib/pace_model.py +55 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__
2
+ .vscode
3
+
4
+ *.jpg
5
+ *.png
app.py CHANGED
@@ -1,74 +1,38 @@
1
  from pathlib import Path
2
 
3
  import numpy as np
4
- import tensorflow as tf
5
  import gradio as gr
6
 
7
- import cv2
8
- import keras
9
- from keras import Sequential
10
- from keras.applications.resnet50 import ResNet50
11
- from keras.layers import Flatten, Dense
12
 
13
  pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
14
  resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
15
  height, width, channels = (224, 224, 3)
16
 
17
- class PaceModel:
18
- def __init__(self, height, width, channels):
19
- self.resnet_model = Sequential()
20
- self.height = height
21
- self.width = width
22
- self.channels = channels
23
- self.class_names = ["Fast", "Medium", "Slow"]
24
-
25
- self.create_base_model()
26
- self.create_architecture()
27
-
28
- def create_base_model(self):
29
- self.base_model = ResNet50(
30
- include_top=False,
31
- input_shape=(self.height, self.width, self.channels),
32
- pooling="avg",
33
- classes=211,
34
- weights="imagenet"
35
- )
36
- self.base_model.load_weights(resnet50_tf_model_weights_path)
37
-
38
- for layer in self.base_model.layers:
39
- layer.trainable = False
40
 
41
- def create_architecture(self):
42
- self.resnet_model.add(self.base_model)
43
- self.resnet_model.add(Flatten())
44
- self.resnet_model.add(Dense(1024, activation="relu"))
45
- self.resnet_model.add(Dense(256, activation="relu"))
46
- self.resnet_model.add(Dense(3, activation="softmax"))
47
-
48
- self.resnet_model.load_weights(pace_model_weights_path)
49
-
50
- def predict(self, input_image: np.ndarray):
51
- resized_image = cv2.resize(input_image, (self.height, self.width))
52
- image = np.expand_dims(resized_image, axis=0)
53
-
54
- prediction = self.resnet_model.predict(image)
55
- print(prediction, np.argmax(prediction))
56
- return self.class_names[np.argmax(prediction)]
57
 
58
  def main():
59
- model = PaceModel(height, width, channels)
60
 
61
  demo = gr.Interface(
62
- fn=model.predict,
63
  inputs=gr.Image(
64
- type="numpy",
65
  label="Upload an image",
66
  show_label=True,
67
  container=True
68
  ),
69
  outputs=gr.Textbox(
70
  lines=1,
71
- placeholder="Fast | Medium | Slow",
72
  label="Pace of the image",
73
  show_label=True,
74
  container=True,
 
1
  from pathlib import Path
2
 
3
  import numpy as np
 
4
  import gradio as gr
5
 
6
+ from lib.image_captioning import ImageCaptioning
7
+ from lib.pace_model import PaceModel
 
 
 
8
 
9
  pace_model_weights_path = (Path.cwd() / "models" / "pace_model_weights.h5").resolve()
10
  resnet50_tf_model_weights_path = (Path.cwd() / "models" / "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
11
  height, width, channels = (224, 224, 3)
12
 
13
+ class AudioPalette:
14
+ def __init__(self):
15
+ self.pace_model = PaceModel(height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path)
16
+ self.image_captioning = ImageCaptioning()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ def generate(self, input_image_path):
19
+ generated_text = self.image_captioning.query(input_image_path)[0].get("generated_text")
20
+ return self.pace_model.predict(input_image_path) + " - " + generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def main():
23
+ model = AudioPalette()
24
 
25
  demo = gr.Interface(
26
+ fn=model.generate,
27
  inputs=gr.Image(
28
+ type="filepath",
29
  label="Upload an image",
30
  show_label=True,
31
  container=True
32
  ),
33
  outputs=gr.Textbox(
34
  lines=1,
35
+ placeholder="Pace of the image and the caption",
36
  label="Pace of the image",
37
  show_label=True,
38
  container=True,
lib/image_captioning.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import cv2
4
+ import requests
5
+
6
+ class ImageCaptioning:
7
+ """
8
+ Performing an API call to BLIP's huggingface inference API
9
+ """
10
+ def __init__(self):
11
+ self.api_endpoint = os.environ["blip_api_url"]
12
+ self.org_token = os.environ["auth_token"]
13
+ self.headers = { "Authorization": f"Bearer {self.org_token}" }
14
+
15
+ def read_image(self, image_path):
16
+ with open(image_path, "rb") as f:
17
+ data = f.read()
18
+
19
+ return data
20
+
21
+ def query(self, image_path: str):
22
+ response = requests.post(
23
+ self.api_endpoint,
24
+ headers=self.headers,
25
+ data=self.read_image(image_path)
26
+ )
27
+ return response.json()
lib/pace_model.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import tensorflow as tf
3
+
4
+ import cv2
5
+ import keras
6
+ from keras import Sequential
7
+ from keras.applications.resnet50 import ResNet50
8
+ from keras.layers import Flatten, Dense
9
+
10
+ class PaceModel:
11
+ """
12
+ The pace model which uses ResNet50's architecture as base and builds upon by adding further layers to determine the pace of an image.
13
+ """
14
+ def __init__(self, height, width, channels, resnet50_tf_model_weights_path, pace_model_weights_path):
15
+ self.resnet_model = Sequential()
16
+ self.height = height
17
+ self.width = width
18
+ self.channels = channels
19
+ self.class_names = ["Fast", "Medium", "Slow"]
20
+ self.resnet50_tf_model_weights_path = resnet50_tf_model_weights_path
21
+ self.pace_model_weights_path = pace_model_weights_path
22
+
23
+ self.create_base_model()
24
+ self.create_architecture()
25
+
26
+ def create_base_model(self):
27
+ self.base_model = ResNet50(
28
+ include_top=False,
29
+ input_shape=(self.height, self.width, self.channels),
30
+ pooling="avg",
31
+ classes=211,
32
+ weights="imagenet"
33
+ )
34
+ self.base_model.load_weights(self.resnet50_tf_model_weights_path)
35
+
36
+ for layer in self.base_model.layers:
37
+ layer.trainable = False
38
+
39
+ def create_architecture(self):
40
+ self.resnet_model.add(self.base_model)
41
+ self.resnet_model.add(Flatten())
42
+ self.resnet_model.add(Dense(1024, activation="relu"))
43
+ self.resnet_model.add(Dense(256, activation="relu"))
44
+ self.resnet_model.add(Dense(3, activation="softmax"))
45
+
46
+ self.resnet_model.load_weights(self.pace_model_weights_path)
47
+
48
+ def predict(self, input_image_path: str):
49
+ input_image = cv2.imread(input_image_path)
50
+ resized_image = cv2.resize(input_image, (self.height, self.width))
51
+ image = np.expand_dims(resized_image, axis=0)
52
+
53
+ prediction = self.resnet_model.predict(image)
54
+ print(prediction, np.argmax(prediction))
55
+ return self.class_names[np.argmax(prediction)]