Source code for ctlearn.core.model

"""
This module defines the ``CTLearnModel`` classes, which holds the basic functionality for creating a Keras model to be used in CTLearn.
"""

from abc import abstractmethod
import keras

from ctapipe.core import Component
from ctapipe.core.traits import Bool, Int, CaselessStrEnum, List, Dict, Unicode, Path
from ctlearn.core.attention import (
    dual_squeeze_excite_block,
    channel_squeeze_excite_block,
    spatial_squeeze_excite_block,
)
from ctlearn.utils import validate_trait_dict

__all__ = [
    "build_fully_connect_head",
    "CTLearnModel",
    "SingleCNN",
    "ResNet",
    "LoadedModel",
]



[docs]
def build_fully_connect_head(inputs, layers, activation_function, tasks):
    """
    Build the fully connected head for the CTLearn model.

    Function to build the fully connected head of the CTLearn model using the specified parameters.

    Parameters
    ----------
    inputs : keras.layers.Layer
        Keras layer of the model.
    layers : dict
        Dictionary containing the number of neurons (as value) in the fully connected head for each task (as key).
    activation_function : dict
        Dictionary containing the activation function (as value) for the fully connected head for each task (as key).
    tasks : list
        List of tasks to build the head for.

    Returns
    -------
    logits : dict
        Dictionary containing the logits for each task.
    """
    logits = {}
    for task in tasks:
        x = inputs
        for i, units in enumerate(layers[task]):
            if i != len(layers[task]) - 1:
                x = keras.layers.Dense(
                    units=units,
                    activation=activation_function[task],
                    name=f"fc_{task}_{i+1}",
                )(x)
            else:
                x = keras.layers.Dense(units=units, name=task)(x)
        logits[task] = keras.layers.Softmax()(x) if task == "type" else x
    # Temp fix till keras support class weights for multiple outputs or I wrote custom loss
    # https://github.com/keras-team/keras/issues/11735
    if len(tasks) == 1 and tasks[0] == "type":
        logits = logits[tasks[0]]
    return logits




[docs]
class CTLearnModel(Component):
    """
    Base component for creating a Keras model to be used in CTLearn.

    This class defines the basic functionality for creating a Keras model to be used in CTLearn.
    It provides the necessary methods to build the backbone of the model and the fully connected head
    for the specified tasks.
    """

    init_padding = Int(
        default_value=0,
        allow_none=False,
        min=0,
        help="Initial padding to apply to the input data.",
    ).tag(config=True)

    head_layers = Dict(
        default_value={
            "type": [512, 256, 2],
            "energy": [512, 256, 1],
            "cameradirection": [512, 256, 2],
            "skydirection": [512, 256, 2],
        },
        allow_none=False,
        help=(
            "Dictionary containing the number of neurons in the fully connected head for each "
            "task ('type', 'energy', 'cameradirection', 'skydirection'). Note: The number of neurons in the last layer "
            "must match the number of classes or the number of reconstructed values."
        ),
    ).tag(config=True)

    head_activation_function = Dict(
        default_value={
            "type": "relu",
            "energy": "relu",
            "cameradirection": "tanh",
            "skydirection": "tanh",
        },
        allow_none=False,
        help=(
            "Dictionary containing the activation function for the fully connected head for each "
            "task ('type', 'energy', 'cameradirection', 'skydirection'). Note: The default activation functions "
            "are 'relu' for 'type' and 'energy' tasks, and 'tanh' for 'cameradirection' and 'skydirection' tasks. "
            "The 'type' task uses 'softmax' as the final activation function."
        ),
    ).tag(config=True)

    attention_mechanism = CaselessStrEnum(
        ["Dual-SE", "Channel-SE", "Spatial-SE"],
        default_value="Dual-SE",
        allow_none=True,
        help="Type of squeeze and excitation attention mechanism to use.",
    ).tag(config=True)

    attention_reduction_ratio = Int(
        default_value=16,
        allow_none=True,
        min=1,
        help="Reduction ratio for the squeeze and excitation attention mechanism.",
    ).tag(config=True)

    def __init__(
        self,
        config=None,
        parent=None,
        **kwargs,
    ):
        """
        Parameters
        ----------
        config : traitlets.loader.Config
            Configuration specified by config file or cmdline arguments.
            Used to set traitlet values.
            This is mutually exclusive with passing a ``parent``.
        parent : ctapipe.core.Component or ctapipe.core.Tool
            Parent of this component in the configuration hierarchy,
            this is mutually exclusive with passing ``config``
        """
        super().__init__(config=config, parent=parent, **kwargs)

        # Define the squeeze and excitation attention mechanism
        self.attention = None
        if self.attention_mechanism is not None:
            self.attention = {
                "mechanism": self.attention_mechanism,
                "reduction_ratio": self.attention_reduction_ratio,
            }



@abstractmethod
def _build_backbone(self, input_shape):
    """
    Build the backbone of the CTLearn model.

    Function to build the backbone of the CTLearn model using the specified parameters.

    Parameters
    ----------
    input_shape : tuple
        Shape of the input data (batch_size, height, width, channels).

    Returns
    -------
    backbone_model : keras.Model
        Keras model object representing the backbone of the CTLearn model.
    network_input : keras.Input
        Keras input layer object for the backbone model.
    """
    pass



[docs]
class SingleCNN(CTLearnModel):
    """
    ``SingleCNN`` is a simple convolutional neural network model.

    This class extends the functionality of ``CTLearnModel`` by implementing
    methods to build a simple convolutional neural network model.
    """

    name = Unicode(
        "SingleCNN",
        help="Name of the model backbone.",
    ).tag(config=True)

    architecture = List(
        trait=Dict(),
        default_value=[
            {"filters": 32, "kernel_size": 3, "number": 1},
            {"filters": 32, "kernel_size": 3, "number": 1},
            {"filters": 64, "kernel_size": 3, "number": 1},
            {"filters": 128, "kernel_size": 3, "number": 1},
        ],
        allow_none=False,
        help=(
            "List of dicts containing the number of filters, kernel sizes and number of repetition. "
            "E.g. ``[{'filters': 12, 'kernel_size': 3, 'number': 1}, ...]``."
        ),
    ).tag(config=True)

    pooling_type = CaselessStrEnum(
        ["max", "average"],
        default_value="max",
        allow_none=True,
        help="Type of pooling to apply to the convolutional layers with ``pooling_parameters``.",
    ).tag(config=True)

    pooling_parameters = Dict(
        default_value={"size": 2, "strides": 2},
        allow_none=True,
        help=(
            "Parameters for the max or average pooling layers. "
            "E.g. ``{'size': 2, 'strides': 2}``."
        ),
    ).tag(config=True)

    batchnorm = Bool(
        default_value=False,
        allow_none=False,
        help="Apply batch normalization to the convolutional layers.",
    ).tag(config=True)

    bottleneck_filters = Int(
        default_value=None,
        allow_none=True,
        help="Number of filters in the bottleneck layer.",
    ).tag(config=True)

    def __init__(
        self,
        input_shape,
        tasks,
        config=None,
        parent=None,
        **kwargs,
    ):
        super().__init__(
            config=config,
            parent=parent,
            **kwargs,
        )

        # Validate the architecture trait
        for layer in self.architecture:
            validate_trait_dict(layer, ["filters", "kernel_size", "number"])
        # Validate the pooling parameters trait
        validate_trait_dict(self.pooling_parameters, ["size", "strides"])

        # Construct the name of the backbone model by appending "_block" to the model name
        self.backbone_name = self.name + "_block"

        # Build the ResNet model backbone
        self.backbone_model, self.input_layer = self._build_backbone(input_shape)
        backbone_output = self.backbone_model(self.input_layer)
        # Validate the head trait with the provided tasks
        validate_trait_dict(self.head_layers, tasks)
        validate_trait_dict(self.head_activation_function, tasks)
        # Build the fully connected head depending on the tasks
        self.logits = build_fully_connect_head(
            backbone_output, self.head_layers, self.head_activation_function, tasks
        )

        self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")

    def _build_backbone(self, input_shape):
        """
        Build the SingleCNN model backbone.

        Function to build the backbone of the SingleCNN model using the specified parameters.

        Parameters
        ----------
        input_shape : tuple
            Shape of the input data (batch_size, height, width, channels).

        Returns
        -------
        backbone_model : keras.Model
            Keras model object representing the backbone of the SingleCNN model.
        network_input : keras.Input
            Keras input layer object for the backbone model.
        """

        # Define the input layer from the input shape
        network_input = keras.Input(shape=input_shape)
        # Get model arcihtecture parameters for the backbone
        filters_list = [layer["filters"] for layer in self.architecture]
        kernel_sizes = [layer["kernel_size"] for layer in self.architecture]
        numbers_list = [layer["number"] for layer in self.architecture]

        x = network_input
        if self.batchnorm:
            x = keras.layers.BatchNormalization(momentum=0.99)(x)

        for i, (filters, kernel_size, number) in enumerate(
            zip(filters_list, kernel_sizes, numbers_list)
        ):
            for nr in range(number):
                x = keras.layers.Conv2D(
                    filters=filters,
                    kernel_size=kernel_size,
                    padding="same",
                    activation="relu",
                    name=f"{self.backbone_name}_conv_{i+1}_{nr+1}",
                )(x)
            if self.pooling_type is not None:
                if self.pooling_type == "max":
                    x = keras.layers.MaxPool2D(
                        pool_size=self.pooling_parameters["size"],
                        strides=self.pooling_parameters["strides"],
                        name=f"{self.backbone_name}_pool_{i+1}",
                    )(x)
                elif self.pooling_type == "average":
                    x = keras.layers.AveragePooling2D(
                        pool_size=self.pooling_parameters["size"],
                        strides=self.pooling_parameters["strides"],
                        name=f"{self.backbone_name}_pool_{i+1}",
                    )(x)
            if self.batchnorm:
                x = keras.layers.BatchNormalization(momentum=0.99)(x)

        # bottleneck layer
        if self.bottleneck_filters is not None:
            x = keras.layers.Conv2D(
                filters=self.bottleneck_filters,
                kernel_size=1,
                padding="same",
                activation="relu",
                name=f"{self.backbone_name}_bottleneck",
            )(x)
            if self.batchnorm:
                x = keras.layers.BatchNormalization(momentum=0.99)(x)

        # Attention mechanism
        if self.attention is not None:
            if self.attention["mechanism"] == "Dual-SE":
                x = dual_squeeze_excite_block(
                    x, self.attention["ratio"], name=f"{self.backbone_name}_dse"
                )
            elif self.attention["mechanism"] == "Channel-SE":
                x = channel_squeeze_excite_block(
                    x, self.attention["ratio"], name=f"{self.backbone_name}_cse"
                )
            elif self.attention["mechanism"] == "Spatial-SE":
                x = spatial_squeeze_excite_block(x, name=f"{self.backbone_name}_sse")

        # Apply global average pooling as the final layer of the backbone
        network_output = keras.layers.GlobalAveragePooling2D(
            name=self.backbone_name + "_global_avgpool"
        )(x)
        # Create the backbone model
        backbone_model = keras.Model(
            network_input, network_output, name=self.backbone_name
        )
        return backbone_model, network_input




[docs]
class ResNet(CTLearnModel):
    """
    ``ResNet`` is a residual neural network model.

    This class extends the functionality of ``CTLearnModel`` by implementing
    methods to build a residual neural network model.
    """

    name = Unicode(
        "ThinResNet",
        help="Name of the model backbone.",
    ).tag(config=True)

    init_layer = Dict(
        default_value=None,
        allow_none=True,
        help=(
            "Parameters for the first convolutional layer. "
            "E.g. ``{'filters': 64, 'kernel_size': 7, 'strides': 2}``."
        ),
    ).tag(config=True)

    init_max_pool = Dict(
        default_value=None,
        allow_none=True,
        help=(
            "Parameters for the first max pooling layer. "
            "E.g. ``{'size': 3, 'strides': 2}``."
        ),
    ).tag(config=True)

    residual_block_type = CaselessStrEnum(
        ["basic", "bottleneck"],
        default_value="bottleneck",
        allow_none=False,
        help="Type of residual block to use.",
    ).tag(config=True)

    architecture = List(
        trait=Dict(),
        default_value=[
            {"filters": 48, "blocks": 2},
            {"filters": 96, "blocks": 3},
            {"filters": 128, "blocks": 3},
            {"filters": 256, "blocks": 3},
        ],
        allow_none=False,
        help=(
            "List of dicts containing the number of filters and residual blocks. "
            "E.g. ``[{'filters': 12, 'blocks': 2}, ...]``."
        ),
    ).tag(config=True)

    def __init__(
        self,
        input_shape,
        tasks,
        config=None,
        parent=None,
        **kwargs,
    ):
        super().__init__(
            config=config,
            parent=parent,
            **kwargs,
        )

        # Validate the architecture trait
        for layer in self.architecture:
            validate_trait_dict(layer, ["filters", "blocks"])
        # Validate the initial layers trait
        if self.init_layer is not None:
            validate_trait_dict(self.init_layer, ["filters", "kernel_size", "strides"])
        if self.init_max_pool is not None:
            validate_trait_dict(self.init_max_pool, ["size", "strides"])

        # Construct the name of the backbone model by appending "_block" to the model name
        self.backbone_name = self.name + "_block"

        # Build the ResNet model backbone
        self.backbone_model, self.input_layer = self._build_backbone(input_shape)
        backbone_output = self.backbone_model(self.input_layer)
        # Validate the head traits with the provided tasks
        validate_trait_dict(self.head_layers, tasks)
        validate_trait_dict(self.head_activation_function, tasks)
        # Build the fully connected head depending on the tasks
        self.logits = build_fully_connect_head(
            backbone_output, self.head_layers, self.head_activation_function, tasks
        )

        self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")

    def _build_backbone(self, input_shape):
        """
        Build the ResNet model backbone.

        Function to build the backbone of the ResNet model using the specified parameters.

        Parameters
        ----------
        input_shape : tuple
            Shape of the input data (batch_size, height, width, channels).

        Returns
        -------
        backbone_model : keras.Model
            Keras model object representing the ResNet backbone.
        network_input : keras.Input
            Keras input layer object for the backbone model.
        """
        # Define the input layer from the input shape
        network_input = keras.Input(shape=input_shape)
        # Apply initial padding if specified
        if self.init_padding > 0:
            network_input = keras.layers.ZeroPadding2D(
                padding=self.init_padding,
                kernel_size=self.init_layer["kernel_size"],
                strides=self.init_layer["strides"],
                name=self.backbone_name + "_padding",
            )(network_input)
        # Apply initial convolutional layer if specified
        if self.init_layer is not None:
            network_input = keras.layers.Conv2D(
                filters=self.init_layer["filters"],
                kernel_size=self.init_layer["kernel_size"],
                strides=self.init_layer["strides"],
                name=self.backbone_name + "_conv1_conv",
            )(network_input)
        # Apply max pooling if specified
        if self.init_max_pool is not None:
            network_input = keras.layers.MaxPool2D(
                pool_size=self.init_max_pool["size"],
                strides=self.init_max_pool["strides"],
                name=self.backbone_name + "_pool1_pool",
            )(network_input)
        # Build the residual blocks
        engine_output = self._stacked_res_blocks(
            network_input,
            architecture=self.architecture,
            residual_block_type=self.residual_block_type,
            attention=self.attention,
            name=self.backbone_name,
        )
        # Apply global average pooling as the final layer of the backbone
        network_output = keras.layers.GlobalAveragePooling2D(
            name=self.backbone_name + "_global_avgpool"
        )(engine_output)
        # Create the backbone model
        backbone_model = keras.Model(
            network_input, network_output, name=self.backbone_name
        )
        return backbone_model, network_input

    def _stacked_res_blocks(
        self, inputs, architecture, residual_block_type, attention, name=None
    ):
        """
        Build a stack of residual blocks for the CTLearn model.

        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
        Each residual block consists of a series of convolutional layers with skip connections.

        Parameters
        ----------
        inputs : keras.layers.Layer
            Input Keras layer to the residual blocks.
        architecture : list of dict
            List of dictionaries containing the architecture of the ResNet model, which includes:
            - Number of filters for the convolutional layers in the residual blocks.
            - Number of residual blocks to stack.
        residual_block_type : str
            Type of residual block to use. Options are 'basic' or 'bottleneck'.
        attention : dict
            Dictionary containing the configuration parameters for the attention mechanism.
        name : str, optional
            Label for the model.

        Returns
        -------
        x : keras.layers.Layer
            Output Keras layer after passing through the stack of residual blocks.
        """

        # Get hyperparameters for the model architecture
        filters_list = [layer["filters"] for layer in architecture]
        blocks_list = [layer["blocks"] for layer in architecture]
        # Build the ResNet model
        x = self._stack_fn(
            inputs,
            filters_list[0],
            blocks_list[0],
            residual_block_type,
            stride=1,
            attention=attention,
            name=name + "_conv2",
        )
        for i, (filters, blocks) in enumerate(zip(filters_list[1:], blocks_list[1:])):
            x = self._stack_fn(
                x,
                filters,
                blocks,
                residual_block_type,
                attention=attention,
                name=name + "_conv" + str(i + 3),
            )
        return x

    def _stack_fn(
        self,
        inputs,
        filters,
        blocks,
        residual_block_type,
        stride=2,
        attention=None,
        name=None,
    ):
        """
        Stack residual blocks for the CTLearn model.

        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
        Each residual block can be of different types (e.g., basic or bottleneck) and can include attention mechanisms.

        Parameters
        ----------
        inputs : keras.layers.Layer
            Input tensor to the residual blocks.
        filters : int
            Number of filters for the bottleneck layer in a block.
        blocks : int
            Number of residual blocks to stack.
        residual_block_type : str
            Type of residual block ('basic' or 'bottleneck').
        stride : int, optional
            Stride for the first layer in the first block. Default is 2.
        attention : dict, optional
            Configuration parameters for the attention mechanism. Default is None.
        name : str, optional
            Label for the stack. Default is None.

        Returns
        -------
        keras.layers.Layer
            Output tensor for the stacked blocks.
        """

        res_blocks = {
            "basic": self._basic_residual_block,
            "bottleneck": self._bottleneck_residual_block,
        }

        x = res_blocks[residual_block_type](
            inputs,
            filters,
            stride=stride,
            attention=attention,
            name=name + "_block1",
        )
        for i in range(2, blocks + 1):
            x = res_blocks[residual_block_type](
                x,
                filters,
                conv_shortcut=False,
                attention=attention,
                name=name + "_block" + str(i),
            )

        return x

    def _basic_residual_block(
        self,
        inputs,
        filters,
        kernel_size=3,
        stride=1,
        conv_shortcut=True,
        attention=None,
        name=None,
    ):
        """
        Build a basic residual block for the CTLearn model.

        This function constructs a basic residual block, which is a fundamental building block
        of ResNet architectures. The block consists of two convolutional layers with an optional
        convolutional shortcut, and can include attention mechanisms.

        Parameters
        ----------
        inputs : keras.layers.Layer
            Input tensor to the residual block.
        filters : int
            Number of filters for the convolutional layers.
        kernel_size : int, optional
            Size of the convolutional kernel. Default is 3.
        stride : int, optional
            Stride for the convolutional layers. Default is 1.
        conv_shortcut : bool, optional
            Whether to use a convolutional layer for the shortcut connection. Default is True.
        attention : dict, optional
            Configuration parameters for the attention mechanism. Default is None.
        name : str, optional
            Name for the residual block. Default is None.

        Returns
        -------
        keras.layers.Layer
            Output tensor after applying the residual block.
        """

        if conv_shortcut:
            shortcut = keras.layers.Conv2D(
                filters=filters, kernel_size=1, strides=stride, name=name + "_0_conv"
            )(inputs)
        else:
            shortcut = inputs

        x = keras.layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            strides=stride,
            padding="same",
            activation="relu",
            name=name + "_1_conv",
        )(inputs)
        x = keras.layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            padding="same",
            activation="relu",
            name=name + "_2_conv",
        )(x)

        # Attention mechanism
        if attention is not None:
            if attention["mechanism"] == "Dual-SE":
                x = dual_squeeze_excite_block(
                    x, attention["reduction_ratio"], name=name + "_dse"
                )
            elif attention["mechanism"] == "Channel-SE":
                x = channel_squeeze_excite_block(
                    x, attention["reduction_ratio"], name=name + "_cse"
                )
            elif attention["mechanism"] == "Spatial-SE":
                x = spatial_squeeze_excite_block(x, name=name + "_sse")

        x = keras.layers.Add(name=name + "_add")([shortcut, x])
        x = keras.layers.ReLU(name=name + "_out")(x)

        return x

    def _bottleneck_residual_block(
        self,
        inputs,
        filters,
        kernel_size=3,
        stride=1,
        conv_shortcut=True,
        attention=None,
        name=None,
    ):
        """
        Build a bottleneck residual block for the CTLearn model.

        This function constructs a bottleneck residual block, which is a fundamental building block of
        ResNet architectures. The block consists of three convolutional layers: a 1x1 convolution to reduce
        dimensionality, a 3x3 convolution for main computation, and another 1x1 convolution to restore dimensionality.
        It also includes an optional shortcut connection and can include attention mechanisms.

        Parameters
        ----------
        inputs : keras.layers.Layer
            Input tensor to the residual block.
        filters : int
            Number of filters for the convolutional layers.
        kernel_size : int, optional
            Size of the convolutional kernel. Default is 3.
        stride : int, optional
            Stride for the convolutional layers. Default is 1.
        conv_shortcut : bool, optional
            Whether to use a convolutional layer for the shortcut connection. Default is True.
        attention : dict, optional
            Configuration parameters for the attention mechanism. Default is None.
        name : str, optional
            Name for the residual block. Default is None.

        Returns
        -------
        output : keras.layers.Layer
            Output layer of the residual block.
        """

        if conv_shortcut:
            shortcut = keras.layers.Conv2D(
                filters=4 * filters,
                kernel_size=1,
                strides=stride,
                name=name + "_0_conv",
            )(inputs)
        else:
            shortcut = inputs

        x = keras.layers.Conv2D(
            filters=filters,
            kernel_size=1,
            strides=stride,
            activation="relu",
            name=name + "_1_conv",
        )(inputs)
        x = keras.layers.Conv2D(
            filters=filters,
            kernel_size=kernel_size,
            padding="same",
            activation="relu",
            name=name + "_2_conv",
        )(x)
        x = keras.layers.Conv2D(
            filters=4 * filters, kernel_size=1, name=name + "_3_conv"
        )(x)

        # Attention mechanism
        if attention is not None:
            if attention["mechanism"] == "Dual-SE":
                x = dual_squeeze_excite_block(
                    x, attention["reduction_ratio"], name=name + "_dse"
                )
            elif attention["mechanism"] == "Channel-SE":
                x = channel_squeeze_excite_block(
                    x, attention["reduction_ratio"], name=name + "_cse"
                )
            elif attention["mechanism"] == "Spatial-SE":
                x = spatial_squeeze_excite_block(x, name=name + "_sse")

        x = keras.layers.Add(name=name + "_add")([shortcut, x])
        x = keras.layers.ReLU(name=name + "_out")(x)

        return x




[docs]
class LoadedModel(CTLearnModel):
    """
    ``LoadedModel`` is a pre-trained Keras model.

    This class extends the functionality of ``CTLearnModel`` by implementing
    methods to load a pre-trained Keras model. The model can be used as a backbone
    for the CTLearn model.
    """

    load_model_from = Path(
        default_value=None,
        help="Path to a Keras model file (Keras3) or directory Keras2)",
        allow_none=True,
        exists=True,
        directory_ok=True,
        file_ok=True,
    ).tag(config=True)

    overwrite_head = Bool(
        default_value=False,
        allow_none=False,
        help="Set to overwrite the fully connected head from the loaded model.",
    ).tag(config=True)

    trainable_backbone = Bool(
        default_value=True,
        allow_none=False,
        help="Set to set the backbone model to be trainable.",
    ).tag(config=True)

    def __init__(
        self,
        input_shape,
        tasks,
        config=None,
        parent=None,
        **kwargs,
    ):
        super().__init__(
            config=config,
            parent=parent,
            **kwargs,
        )

        # Load the model from the specified path
        self.model = keras.saving.load_model(self.load_model_from)
        # Build the ResNet model backbone
        self.backbone_model, self.input_layer = self._build_backbone(input_shape)
        # Load the fully connected head from the loaded model or build a new one
        if self.overwrite_head:
            backbone_output = self.backbone_model(self.input_layer)
            # Validate the head trait with the provided tasks
            validate_trait_dict(self.head_layers, tasks)
            # Build the fully connected head depending on the tasks
            self.logits = build_fully_connect_head(
                backbone_output, self.head_layers, self.head_activation_function, tasks
            )
            self.model = keras.Model(
                self.input_layer, self.logits, name="CTLearn_model"
            )

    def _build_backbone(self, input_shape):
        """
        Build the LoadedModel backbone.

        Function to build the backbone of the LoadedModel using the specified parameters.

        Parameters
        ----------
        input_shape : tuple
            Shape of the input data (batch_size, height, width, channels).

        Returns
        -------
        backbone_model : keras.Model
            Keras model object representing the LoadedModel backbone.
        network_input : keras.Input
            Keras input layer object for the backbone model.
        """

        # Define the input layer from the input shape
        network_input = keras.Input(shape=input_shape)
        # Set the backbone model to be trainable or not
        for layer in self.model.layers:
            if layer.name.endswith("_block"):
                backbone_layer = self.model.get_layer(layer.name)
                self.backbone_name = backbone_layer.name
                backbone_layer.trainable = self.trainable_backbone
        network_output = backbone_layer(network_input)
        # Create the backbone model
        backbone_model = keras.Model(
            network_input, network_output, name=self.backbone_name
        )
        return backbone_model, network_input