Image-to-music / shred_model.py
Bokanovskii's picture
Upload shred_model.py
6be2a43
import tensorflow.keras as keras
import tensorflow as tf
import PIL.Image
import PIL.ImageOps
import numpy as np
IMG_SIZE = [256,256]
def prepare_image(path):
# Load the image with PIL
img = PIL.Image.open(path)
img, rotated = exif_transpose(img)
img = img.resize(IMG_SIZE)
return np.expand_dims(np.asarray(img), axis=0)
# def prepare_model(checkpoint_folder_path):
# base_model = keras.applications.EfficientNetB7(
# weights='imagenet',
# include_top=False,
# input_shape=tuple(IMG_SIZE + [3])
# )
# base_model.trainable = True
# model = keras.Sequential()
# model.add(keras.Input(shape=tuple(IMG_SIZE + [3])))
# model.add(keras.layers.RandomFlip("horizontal"))
# model.add(keras.layers.RandomRotation(0.1))
# model.add(base_model)
# model.add(keras.layers.GlobalMaxPooling2D())
# model.add(keras.layers.Dense(1, activation='sigmoid'))
# model.compile(optimizer=keras.optimizers.Adam(1e-5), # Low learning rate
# loss=keras.losses.BinaryCrossentropy(from_logits=False),
# metrics=[keras.metrics.BinaryAccuracy(), 'Precision', 'Recall',
# tf.keras.metrics.SpecificityAtSensitivity(.9)],)
# model.load_weights(checkpoint_folder_path)
# return model
def prepare_EfficientNet_model(base_trainable=False, fine_tuning=False):
base_model = keras.applications.EfficientNetB7(
weights="imagenet",
include_top=False,
input_shape=tuple(IMG_SIZE + [3])
)
base_model.trainable = False
model = keras.Sequential()
model.add(keras.Input(shape=tuple(IMG_SIZE + [3])))
model.add(keras.layers.RandomFlip("horizontal"))
model.add(keras.layers.RandomRotation(0.1))
model.add(base_model)
model.add(keras.layers.GlobalMaxPooling2D())
model.add(keras.layers.Dense(1, activation='sigmoid'))
if not fine_tuning:
if not base_trainable:
base_model.trainable = False
model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.BinaryCrossentropy(from_logits=False),
metrics=[keras.metrics.BinaryAccuracy(), 'Precision', 'Recall'],)
else:
base_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-5), # Low learning rate
loss=keras.losses.BinaryCrossentropy(from_logits=False),
metrics=[keras.metrics.BinaryAccuracy(), 'Precision', 'Recall',
tf.keras.metrics.SpecificityAtSensitivity(.9)],)
return model
def exif_transpose(img):
if not img:
return img
exif_orientation_tag = 274
# Check for EXIF data (only present on some files)
if hasattr(img, "_getexif") and isinstance(img._getexif(), dict) and exif_orientation_tag in img._getexif():
exif_data = img._getexif()
orientation = exif_data[exif_orientation_tag]
# Handle EXIF Orientation
if orientation == 1:
# Normal image - nothing to do!
pass
elif orientation == 2:
# Mirrored left to right
img = img.transpose(PIL.Image.FLIP_LEFT_RIGHT)
elif orientation == 3:
# Rotated 180 degrees
img = img.rotate(180)
elif orientation == 4:
# Mirrored top to bottom
img = img.rotate(180).transpose(PIL.Image.FLIP_LEFT_RIGHT)
elif orientation == 5:
# Mirrored along top-left diagonal
img = img.rotate(-90, expand=True).transpose(PIL.Image.FLIP_LEFT_RIGHT)
elif orientation == 6:
# Rotated 90 degrees
img = img.rotate(-90, expand=True)
elif orientation == 7:
# Mirrored along top-right diagonal
img = img.rotate(90, expand=True).transpose(PIL.Image.FLIP_LEFT_RIGHT)
elif orientation == 8:
# Rotated 270 degrees
img = img.rotate(90, expand=True)
return img, True
return img, False