Source code for nntoolbox.components.components

import torch
from torch import nn, Tensor
from typing import Sequence, Callable, Optional, Tuple


__all__ = [
    'LambdaLayer', 'ModifyByLambda', 'ScalingLayer', 'BiasLayer', 'ResidualLinearBlock',
    'LinearlyAugmentedFF', 'HighwayLayer', 'SquareUnitLinear',
    'QuadraticPolynomialLayer', 'MLP'
]


[docs]class LambdaLayer(nn.Module):
    """
    Implement a quick layer wrapper for a function

    Useful for stateless layer (e.g without parameters)
    """
    def __init__(self, fn: Callable[[Tensor], Tensor]):
        super(LambdaLayer, self).__init__()
        self.fn = fn

[docs]    def forward(self, input: Tensor) -> Tensor:
        return self.fn(input)


[docs]class ModifyByLambda(nn.Module):
    def __init__(self, module: nn.Module, fn: Callable[[Tensor], Tensor]):
        super().__init__()
        self.module = module
        self.fn = fn

[docs]    def forward(self, input: Tensor) -> Tensor:
        return self.fn(self.module(input))


[docs]class ScalingLayer(LambdaLayer):
    """
    References:

        Christian Szegedy et al. "Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning."
        https://arxiv.org/pdf/1602.07261.pdf
    """
    def __init__(self, scale: float=0.1):
        super(ScalingLayer, self).__init__(lambda inp: inp * scale)


[docs]class BiasLayer(nn.Module):
    """
    Add a trainable bias vector to input:

    y = x + bias
    """
    def __init__(self, shape: Tuple[int, ...], init: float=0.0):
        super().__init__()
        self.bias = nn.Parameter(torch.zeros(shape) + init, requires_grad=True)

[docs]    def forward(self, input: Tensor) -> Tensor: return input + self.bias[None, :]


[docs]class ResidualLinearBlock(nn.Module):
    """
    A two-layer linear block with residual connection:

    y = f(w_2f(w_1 x + b_1) + b_2) + x
    """
    def __init__(
            self, in_features: int, activation: Callable[..., nn.Module]=nn.ReLU,
            bias: bool=True, use_dropout: bool=False, drop_rate: float=0.5
    ):
        super(ResidualLinearBlock, self).__init__()
        self.add_module(
            "main",
            nn.Sequential(
                nn.Linear(in_features=in_features, out_features=in_features, bias=bias),
                activation(),
                nn.Dropout(drop_rate) if use_dropout else nn.Identity(),
                nn.Linear(in_features=in_features, out_features=in_features, bias=bias),
                activation()
            )
        )

[docs]    def forward(self, input):
        return input + self._modules["main"](input)


[docs]class LinearlyAugmentedFF(nn.Module):
    """
    Based on https://link.springer.com/chapter/10.1007/978-3-642-35289-8_13
    """
    def __init__(self, in_features: int, out_features: int, activation: Callable[..., nn.Module]=nn.Identity):
        super(LinearlyAugmentedFF, self).__init__()
        self._fc = nn.Linear(in_features, out_features)
        self._a = activation()

[docs]    def forward(self, x):
        op = self._fc(x) + torch.sum(x, dim=-1, keepdim=True)
        op = self._a(op)

        return op


[docs]class HighwayLayer(nn.Module):
    """
    Highway layer:

    y = T(x) * H(x) + (1 - T(x)) * x

    Reference:

    https://arxiv.org/pdf/1505.00387.pdf
    """
    def __init__(self, in_features: int, main: nn.Module, gate: Optional[nn.Module]=None):
        """
        :param in_features: Number of features of each input
        :param main: The main network H(x). Take input of with in_features and return output with in_features
        :param gate: the gating function. Take input of with in_features and return output with in_features
        """
        super(HighwayLayer, self).__init__()
        if gate is None:
            self._gate = nn.Sequential(
                nn.Linear(in_features=in_features, out_features=in_features),
                nn.Sigmoid()
            )
        else:
            self._gate = gate
        self._main = main

[docs]    def forward(self, input):
        """
        :param input: (batch_size, in_features)
        :return: output: (batch_size, in_features)
        """
        gate = self._gate(input)
        return gate * self._main(input) + (1 - gate) * input


[docs]class SquareUnitLinear(nn.Linear):
    """
    Augment input with square units:

    g(x) = W concat([x, x^2]) + b

    Reference:

    Flake, Gary. "Square Unit Augmented, Radially Extended, Multilayer Perceptrons." Neural Network: Tricks of the Trade
    """
    def __init__(self, in_features, out_features, bias: bool=True):
        super(SquareUnitLinear, self).__init__(in_features=in_features * 2, out_features=out_features, bias=bias)

[docs]    def forward(self, input):
        input = torch.cat([input, input * input], dim=-1)
        return super(SquareUnitLinear, self).forward(input)


[docs]class QuadraticPolynomialLayer(nn.Module):
    """
    h(x) = sigma( sum_k(A_k x)^2 + bx + c)

    References:

        Bergstra et al. "Quadratic Polynomials Learn Better Image Features."
        http://www.iro.umontreal.ca/~lisa/publications2/index.php/attachments/single/205 (dead link, use web archive)

        Joseph Turian, James Bergstra and Yoshua Bengio. "Quadratic Features and Deep Architectures for Chunking."
        https://www.aclweb.org/anthology/N09-2062
    """
    def __init__(
            self, in_features: int, out_features: int, rank: int, sqrt: bool=False, bias: bool=False, eps: float=1e-6
    ):
        super(QuadraticPolynomialLayer, self).__init__()
        self.linear = nn.Linear(in_features=in_features, out_features=out_features, bias=bias)
        self.quadratic = nn.Linear(in_features=in_features, out_features=out_features * rank, bias=False)
        self.out_features = out_features
        self.rank = rank
        self.sqrt = sqrt
        self.eps = eps

[docs]    def forward(self, input: Tensor) -> Tensor:
        linear_features = self.linear(input)
        quadratic_features = self.quadratic(input).pow(2)
        quadratic_features = quadratic_features.view(-1, self.rank, self.out_features).sum(-2)
        if self.sqrt:
            quadratic_features = torch.sqrt(quadratic_features + self.eps)
        return quadratic_features + linear_features


[docs]class MLP(nn.Sequential):
    """
    Implement a generic multilayer perceptron
    """
    def __init__(
            self, in_features: int, out_features: int, hidden_layer_sizes: Sequence[int]=(512,),
            activation: Callable[..., Tensor]=nn.ReLU, bn_final: bool=False,
            drop_ps=(0.5, 0.5), use_batch_norm: bool=True
    ):
        layers = []
        if isinstance(drop_ps, float):
            drop_ps = [drop_ps for _ in range(len(hidden_layer_sizes) + 1)]

        for i in range(len(hidden_layer_sizes)):
            if i == 0:
                in_features = in_features
            else:
                in_features = hidden_layer_sizes[i - 1]
            if use_batch_norm:
                layers.append(nn.BatchNorm1d(num_features=in_features))
            drop_p = drop_ps[i]
            if drop_p != 0:
                layers.append(nn.Dropout(p=drop_p))
            layers.append(nn.Linear(
                in_features=in_features,
                out_features=hidden_layer_sizes[i]
            ))
            layers.append(activation())

        if bn_final and use_batch_norm:
            layers.append(nn.BatchNorm1d(num_features=hidden_layer_sizes[-1], momentum=0.001)) #follows fast ai
        if drop_ps[-1] != 0:
            layers.append(nn.Dropout(p=drop_ps[-1]))
        layers.append(nn.Linear(in_features=hidden_layer_sizes[-1], out_features=out_features))
        super(MLP, self).__init__(*layers)
Source code for nntoolbox.components.components

nn-toolbox

Navigation

Related Topics