import torch
from torch import nn, Tensor
from typing import Sequence, Callable, Optional, Tuple
__all__ = [
'LambdaLayer', 'ModifyByLambda', 'ScalingLayer', 'BiasLayer', 'ResidualLinearBlock',
'LinearlyAugmentedFF', 'HighwayLayer', 'SquareUnitLinear',
'QuadraticPolynomialLayer', 'MLP'
]
[docs]class LambdaLayer(nn.Module):
"""
Implement a quick layer wrapper for a function
Useful for stateless layer (e.g without parameters)
"""
def __init__(self, fn: Callable[[Tensor], Tensor]):
super(LambdaLayer, self).__init__()
self.fn = fn
[docs] def forward(self, input: Tensor) -> Tensor:
return self.fn(input)
[docs]class ModifyByLambda(nn.Module):
def __init__(self, module: nn.Module, fn: Callable[[Tensor], Tensor]):
super().__init__()
self.module = module
self.fn = fn
[docs] def forward(self, input: Tensor) -> Tensor:
return self.fn(self.module(input))
[docs]class ScalingLayer(LambdaLayer):
"""
References:
Christian Szegedy et al. "Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning."
https://arxiv.org/pdf/1602.07261.pdf
"""
def __init__(self, scale: float=0.1):
super(ScalingLayer, self).__init__(lambda inp: inp * scale)
[docs]class BiasLayer(nn.Module):
"""
Add a trainable bias vector to input:
y = x + bias
"""
def __init__(self, shape: Tuple[int, ...], init: float=0.0):
super().__init__()
self.bias = nn.Parameter(torch.zeros(shape) + init, requires_grad=True)
[docs] def forward(self, input: Tensor) -> Tensor: return input + self.bias[None, :]
[docs]class ResidualLinearBlock(nn.Module):
"""
A two-layer linear block with residual connection:
y = f(w_2f(w_1 x + b_1) + b_2) + x
"""
def __init__(
self, in_features: int, activation: Callable[..., nn.Module]=nn.ReLU,
bias: bool=True, use_dropout: bool=False, drop_rate: float=0.5
):
super(ResidualLinearBlock, self).__init__()
self.add_module(
"main",
nn.Sequential(
nn.Linear(in_features=in_features, out_features=in_features, bias=bias),
activation(),
nn.Dropout(drop_rate) if use_dropout else nn.Identity(),
nn.Linear(in_features=in_features, out_features=in_features, bias=bias),
activation()
)
)
[docs] def forward(self, input):
return input + self._modules["main"](input)
[docs]class LinearlyAugmentedFF(nn.Module):
"""
Based on https://link.springer.com/chapter/10.1007/978-3-642-35289-8_13
"""
def __init__(self, in_features: int, out_features: int, activation: Callable[..., nn.Module]=nn.Identity):
super(LinearlyAugmentedFF, self).__init__()
self._fc = nn.Linear(in_features, out_features)
self._a = activation()
[docs] def forward(self, x):
op = self._fc(x) + torch.sum(x, dim=-1, keepdim=True)
op = self._a(op)
return op
[docs]class HighwayLayer(nn.Module):
"""
Highway layer:
y = T(x) * H(x) + (1 - T(x)) * x
Reference:
https://arxiv.org/pdf/1505.00387.pdf
"""
def __init__(self, in_features: int, main: nn.Module, gate: Optional[nn.Module]=None):
"""
:param in_features: Number of features of each input
:param main: The main network H(x). Take input of with in_features and return output with in_features
:param gate: the gating function. Take input of with in_features and return output with in_features
"""
super(HighwayLayer, self).__init__()
if gate is None:
self._gate = nn.Sequential(
nn.Linear(in_features=in_features, out_features=in_features),
nn.Sigmoid()
)
else:
self._gate = gate
self._main = main
[docs] def forward(self, input):
"""
:param input: (batch_size, in_features)
:return: output: (batch_size, in_features)
"""
gate = self._gate(input)
return gate * self._main(input) + (1 - gate) * input
[docs]class SquareUnitLinear(nn.Linear):
"""
Augment input with square units:
g(x) = W concat([x, x^2]) + b
Reference:
Flake, Gary. "Square Unit Augmented, Radially Extended, Multilayer Perceptrons." Neural Network: Tricks of the Trade
"""
def __init__(self, in_features, out_features, bias: bool=True):
super(SquareUnitLinear, self).__init__(in_features=in_features * 2, out_features=out_features, bias=bias)
[docs] def forward(self, input):
input = torch.cat([input, input * input], dim=-1)
return super(SquareUnitLinear, self).forward(input)
[docs]class QuadraticPolynomialLayer(nn.Module):
"""
h(x) = sigma( sum_k(A_k x)^2 + bx + c)
References:
Bergstra et al. "Quadratic Polynomials Learn Better Image Features."
http://www.iro.umontreal.ca/~lisa/publications2/index.php/attachments/single/205 (dead link, use web archive)
Joseph Turian, James Bergstra and Yoshua Bengio. "Quadratic Features and Deep Architectures for Chunking."
https://www.aclweb.org/anthology/N09-2062
"""
def __init__(
self, in_features: int, out_features: int, rank: int, sqrt: bool=False, bias: bool=False, eps: float=1e-6
):
super(QuadraticPolynomialLayer, self).__init__()
self.linear = nn.Linear(in_features=in_features, out_features=out_features, bias=bias)
self.quadratic = nn.Linear(in_features=in_features, out_features=out_features * rank, bias=False)
self.out_features = out_features
self.rank = rank
self.sqrt = sqrt
self.eps = eps
[docs] def forward(self, input: Tensor) -> Tensor:
linear_features = self.linear(input)
quadratic_features = self.quadratic(input).pow(2)
quadratic_features = quadratic_features.view(-1, self.rank, self.out_features).sum(-2)
if self.sqrt:
quadratic_features = torch.sqrt(quadratic_features + self.eps)
return quadratic_features + linear_features
[docs]class MLP(nn.Sequential):
"""
Implement a generic multilayer perceptron
"""
def __init__(
self, in_features: int, out_features: int, hidden_layer_sizes: Sequence[int]=(512,),
activation: Callable[..., Tensor]=nn.ReLU, bn_final: bool=False,
drop_ps=(0.5, 0.5), use_batch_norm: bool=True
):
layers = []
if isinstance(drop_ps, float):
drop_ps = [drop_ps for _ in range(len(hidden_layer_sizes) + 1)]
for i in range(len(hidden_layer_sizes)):
if i == 0:
in_features = in_features
else:
in_features = hidden_layer_sizes[i - 1]
if use_batch_norm:
layers.append(nn.BatchNorm1d(num_features=in_features))
drop_p = drop_ps[i]
if drop_p != 0:
layers.append(nn.Dropout(p=drop_p))
layers.append(nn.Linear(
in_features=in_features,
out_features=hidden_layer_sizes[i]
))
layers.append(activation())
if bn_final and use_batch_norm:
layers.append(nn.BatchNorm1d(num_features=hidden_layer_sizes[-1], momentum=0.001)) #follows fast ai
if drop_ps[-1] != 0:
layers.append(nn.Dropout(p=drop_ps[-1]))
layers.append(nn.Linear(in_features=hidden_layer_sizes[-1], out_features=out_features))
super(MLP, self).__init__(*layers)