Source code for bob.learn.tensorflow.models.resnet50_modified

# -*- coding: utf-8 -*-
"""
The resnet50 from `tf.keras.applications.Resnet50` has a problem with the convolutional layers.
It basically add bias terms to such layers followed by batch normalizations, which is not correct

https://github.com/tensorflow/tensorflow/issues/37365

This resnet 50 implementation provides a cleaner version
"""

import tensorflow as tf
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.regularizers import l2

global weight_decay
weight_decay = 1e-4


class IdentityBlock(tf.keras.layers.Layer):
    def __init__(
        self, kernel_size, filters, stage, block, weight_decay=1e-4, name=None, **kwargs
    ):

        """Block that has no convolutianal layer as skip connection

        Parameters
        ----------
            kernel_size:
               The kernel size of middle conv layer at main path

            filters:
                list of integers, the filterss of 3 conv layer at main path
            stage:
              Current stage label, used for generating layer names

            block:
                'a','b'..., current block label, used for generating layer names

        """
        super().__init__(name=name, **kwargs)

        filters1, filters2, filters3 = filters
        bn_axis = 3

        conv_name_1 = "conv" + str(stage) + "_" + str(block) + "_1x1_reduce"
        bn_name_1 = "conv" + str(stage) + "_" + str(block) + "_1x1_reduce/bn"
        layers = [
            Conv2D(
                filters1,
                (1, 1),
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_1,
            )
        ]

        layers += [BatchNormalization(axis=bn_axis, name=bn_name_1)]
        layers += [Activation("relu")]

        conv_name_2 = "conv" + str(stage) + "_" + str(block) + "_3x3"
        bn_name_2 = "conv" + str(stage) + "_" + str(block) + "_3x3/bn"
        layers += [
            Conv2D(
                filters2,
                kernel_size,
                padding="same",
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_2,
            )
        ]
        layers += [BatchNormalization(axis=bn_axis, name=bn_name_2)]
        layers += [Activation("relu")]

        conv_name_3 = "conv" + str(stage) + "_" + str(block) + "_1x1_increase"
        bn_name_3 = "conv" + str(stage) + "_" + str(block) + "_1x1_increase/bn"
        layers += [
            Conv2D(
                filters3,
                (1, 1),
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_3,
            )
        ]
        layers += [BatchNormalization(axis=bn_axis, name=bn_name_3)]
        self.layers = layers

    def call(self, input_tensor, training=None):

        x = input_tensor
        for lay in self.layers:
            x = lay(x, training=training)

        x = tf.keras.layers.add([x, input_tensor])
        x = Activation("relu")(x)

        return x


class ConvBlock(tf.keras.layers.Layer):
    def __init__(
        self,
        kernel_size,
        filters,
        stage,
        block,
        strides=(2, 2),
        weight_decay=1e-4,
        name=None,
        **kwargs,
    ):
        """Block that has a conv layer AS shortcut.
        Parameters
        ----------
            kernel_size:
               The kernel size of middle conv layer at main path

            filters:
                list of integers, the filterss of 3 conv layer at main path
            stage:
              Current stage label, used for generating layer names

            block:
                'a','b'..., current block label, used for generating layer names
        """
        super().__init__(name=name, **kwargs)

        filters1, filters2, filters3 = filters
        bn_axis = 3

        conv_name_1 = "conv" + str(stage) + "_" + str(block) + "_1x1_reduce"
        bn_name_1 = "conv" + str(stage) + "_" + str(block) + "_1x1_reduce/bn"
        layers = [
            Conv2D(
                filters1,
                (1, 1),
                strides=strides,
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_1,
            )
        ]
        layers += [BatchNormalization(axis=bn_axis, name=bn_name_1)]
        layers += [Activation("relu")]

        conv_name_2 = "conv" + str(stage) + "_" + str(block) + "_3x3"
        bn_name_2 = "conv" + str(stage) + "_" + str(block) + "_3x3/bn"
        layers += [
            Conv2D(
                filters2,
                kernel_size,
                padding="same",
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_2,
            )
        ]
        layers += [BatchNormalization(axis=bn_axis, name=bn_name_2)]
        layers += [Activation("relu")]

        conv_name_3 = "conv" + str(stage) + "_" + str(block) + "_1x1_increase"
        bn_name_3 = "conv" + str(stage) + "_" + str(block) + "_1x1_increase/bn"
        layers += [
            Conv2D(
                filters3,
                (1, 1),
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_3,
            )
        ]
        layers += [BatchNormalization(axis=bn_axis, name=bn_name_3)]

        conv_name_4 = "conv" + str(stage) + "_" + str(block) + "_1x1_proj"
        bn_name_4 = "conv" + str(stage) + "_" + str(block) + "_1x1_proj/bn"
        shortcut = [
            Conv2D(
                filters3,
                (1, 1),
                strides=strides,
                kernel_initializer="orthogonal",
                use_bias=False,
                kernel_regularizer=l2(weight_decay),
                name=conv_name_4,
            )
        ]
        shortcut += [BatchNormalization(axis=bn_axis, name=bn_name_4)]

        self.layers = layers
        self.shortcut = shortcut

    def call(self, input_tensor, training=None):
        x = input_tensor
        for lay in self.layers:
            x = lay(x, training=training)

        x_s = input_tensor
        for lay in self.shortcut:
            x_s = lay(x_s, training=training)

        x = tf.keras.layers.add([x, x_s])
        x = Activation("relu")(x)
        return x


[docs]def resnet50_modified(input_tensor=None, input_shape=None, **kwargs):
    """
    The resnet50 from `tf.keras.applications.Resnet50` has a problem with the convolutional layers.
    It basically add bias terms to such layers followed by batch normalizations, which is not correct

    https://github.com/tensorflow/tensorflow/issues/37365

    This resnet 50 implementation provides a cleaner version

    """
    if input_tensor is None:
        input_tensor = tf.keras.Input(shape=input_shape)
    else:
        if not tf.keras.backend.is_keras_tensor(input_tensor):
            input_tensor = tf.keras.Input(tensor=input_tensor, shape=input_shape)

    bn_axis = 3
    # inputs are of size 224 x 224 x 3
    layers = [input_tensor]
    layers += [
        Conv2D(
            64,
            (7, 7),
            strides=(2, 2),
            kernel_initializer="orthogonal",
            use_bias=False,
            trainable=True,
            kernel_regularizer=l2(weight_decay),
            padding="same",
            name="conv1/7x7_s2",
        )
    ]

    # inputs are of size 112 x 112 x 64
    layers += [BatchNormalization(axis=bn_axis, name="conv1/7x7_s2/bn")]
    layers += [Activation("relu")]
    layers += [MaxPooling2D((3, 3), strides=(2, 2))]

    # inputs are of size 56 x 56
    layers += [ConvBlock(3, [64, 64, 256], stage=2, block=1, strides=(1, 1))]
    layers += [IdentityBlock(3, [64, 64, 256], stage=2, block=2)]
    layers += [IdentityBlock(3, [64, 64, 256], stage=2, block=3)]

    # inputs are of size 28 x 28
    layers += [ConvBlock(3, [128, 128, 512], stage=3, block=1)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=2)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=3)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=4)]

    # inputs are of size 14 x 14
    layers += [ConvBlock(3, [256, 256, 1024], stage=4, block=1)]
    layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=2)]
    layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=3)]
    layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=4)]
    layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=5)]
    layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=6)]

    # inputs are of size 7 x 7
    layers += [ConvBlock(3, [512, 512, 2048], stage=5, block=1)]
    layers += [IdentityBlock(3, [512, 512, 2048], stage=5, block=2)]
    layers += [IdentityBlock(3, [512, 512, 2048], stage=5, block=3)]

    return tf.keras.Sequential(layers)


[docs]def resnet101_modified(input_tensor=None, input_shape=None, **kwargs):
    """
    The resnet101 from `tf.keras.applications.Resnet101` has a problem with the convolutional layers.
    It basically add bias terms to such layers followed by batch normalizations, which is not correct

    https://github.com/tensorflow/tensorflow/issues/37365

    This resnet 10 implementation provides a cleaner version

    """

    if input_tensor is None:
        input_tensor = tf.keras.Input(shape=input_shape)
    else:
        if not tf.keras.backend.is_keras_tensor(input_tensor):
            input_tensor = tf.keras.Input(tensor=input_tensor, shape=input_shape)

    bn_axis = 3
    # inputs are of size 224 x 224 x 3
    layers = [input_tensor]
    layers += [
        Conv2D(
            64,
            (7, 7),
            strides=(2, 2),
            kernel_initializer="orthogonal",
            use_bias=False,
            trainable=True,
            kernel_regularizer=l2(weight_decay),
            padding="same",
            name="conv1/7x7_s2",
        )
    ]

    # inputs are of size 112 x 112 x 64
    layers += [BatchNormalization(axis=bn_axis, name="conv1/7x7_s2/bn")]
    layers += [Activation("relu")]
    layers += [MaxPooling2D((3, 3), strides=(2, 2))]

    # inputs are of size 56 x 56
    layers += [ConvBlock(3, [64, 64, 256], stage=2, block=1, strides=(1, 1))]
    layers += [IdentityBlock(3, [64, 64, 256], stage=2, block=2)]
    layers += [IdentityBlock(3, [64, 64, 256], stage=2, block=3)]

    # inputs are of size 28 x 28
    layers += [ConvBlock(3, [128, 128, 512], stage=3, block=1)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=2)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=3)]
    layers += [IdentityBlock(3, [128, 128, 512], stage=3, block=4)]

    # inputs are of size 14 x 14
    # 23 blocks here. That's the only difference from
    # resnet-101
    layers += [ConvBlock(3, [256, 256, 1024], stage=4, block=1)]
    for i in range(2, 24):
        layers += [IdentityBlock(3, [256, 256, 1024], stage=4, block=i)]

    # inputs are of size 7 x 7
    layers += [ConvBlock(3, [512, 512, 2048], stage=5, block=1)]
    layers += [IdentityBlock(3, [512, 512, 2048], stage=5, block=2)]
    layers += [IdentityBlock(3, [512, 512, 2048], stage=5, block=3)]

    return tf.keras.Sequential(layers)


if __name__ == "__main__":
    input_tensor = tf.keras.layers.InputLayer([112, 112, 3])
    model = resnet50_modified(input_tensor)

    print(len(model.variables))
    print(model.summary())