Source code for torchbearer.callbacks.init

import torchbearer
from torchbearer import cite
from torchbearer.callbacks import Callback

import torch.nn.init as init

__kaiming__ = """
@inproceedings{he2015delving,
  title={Delving deep into rectifiers: Surpassing human-level performance on imagenet classification},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE international conference on computer vision},
  pages={1026--1034},
  year={2015}
}"""

__xavier__ = """
@inproceedings{glorot2010understanding,
  title={Understanding the difficulty of training deep feedforward neural networks},
  author={Glorot, Xavier and Bengio, Yoshua},
  booktitle={Proceedings of the thirteenth international conference on artificial intelligence and statistics},
  pages={249--256},
  year={2010}
}
"""

__lsuv__ = """
@article{mishkin2015all,
  title={All you need is a good init},
  author={Mishkin, Dmytro and Matas, Jiri},
  journal={arXiv preprint arXiv:1511.06422},
  year={2015}
}
"""



[docs]
class WeightInit(Callback):
    """Base class for weight initialisations. Performs the provided function for each module when on_init is
    called.

    Args:
        initialiser (lambda): a function which initialises an nn.Module **inplace**
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised

    State Requirements:
        - :attr:`torchbearer.state.MODEL`: Model should have the `modules` method if modules is None
    """
    def __init__(self, initialiser=lambda module: module, modules=None, targets=['Conv', 'Linear', 'Bilinear']):
        self.initialiser = initialiser
        self.modules = modules
        self.targets = targets


[docs]
    def on_init(self, state):
        if self.modules is None:
            self.modules = state[torchbearer.MODEL].modules()

        for m in self.modules:
            if len(list(filter(lambda target: target in m.__class__.__name__, self.targets))) > 0:
                self.initialiser(m)





[docs]
@cite(__lsuv__)
class LsuvInit(Callback):
    """Layer-sequential unit-variance (LSUV) initialization as described in
    `All you need is a good init <https://arxiv.org/abs/1511.06422>`_ and
    modified from the code by  `ducha-aiki <https://github.com/ducha-aiki/LSUV-pytorch>`__.
    To be consistent with the paper, LsuvInit should be preceeded by a ZeroBias init on the Linear and Conv layers.


    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import LsuvInit

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> lsuv = LsuvInit(example_batch)

        # Model and trail using lsuv init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[lsuv]).with_train_data(data, data+5)

    Args:
        data_item (torch.Tensor): A representative data item to put through the model
        weight_lambda (lambda): A function that takes a module and returns the weight attribute. If none defaults to 
            module.weight.
        needed_std: See `paper <https://arxiv.org/abs/1511.06422>`__, where needed_std is always 1.0
        std_tol: See `paper <https://arxiv.org/abs/1511.06422>`__, Tol_{var}
        max_attempts: See `paper <https://arxiv.org/abs/1511.06422>`__, T_{max}
        do_orthonorm: See `paper <https://arxiv.org/abs/1511.06422>`__, first pre-initialise with orthonormal matricies

    State Requirements:
        - :attr:`torchbearer.state.MODEL`: Model should have the `modules` method if modules is None
    """
    def __init__(self, data_item, weight_lambda=None, needed_std=1.0, std_tol=0.1, max_attempts=10, do_orthonorm=True):
        from torchbearer.callbacks.lsuv import LSUV
        self.lsuv_init = LSUV
        self.data = data_item
        self.needed_std = needed_std
        self.std_tol = std_tol
        self.max_attempts = max_attempts
        self.do_arthonorm = do_orthonorm
        self.weight_lambda = weight_lambda


[docs]
    def on_init(self, state):
        lsuv = self.lsuv_init()
        state[torchbearer.MODEL] = lsuv.init_model(state[torchbearer.MODEL], self.data, self.weight_lambda, self.needed_std,
                                                  self.std_tol, self.max_attempts, self.do_arthonorm)





[docs]
@cite(__kaiming__)
class KaimingNormal(WeightInit):
    """Kaiming Normal weight initialisation. Uses `torch.nn.init.kaiming_normal_` on the ``weight`` attribute of the
    filtered modules.

    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import KaimingNormal

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> initialiser = KaimingNormal()

        # Model and trail using kaiming init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[initialiser]).with_train_data(data, data+5)

    Args:
        a (int): See `PyTorch kaiming_normal_ <https://pytorch.org/docs/stable/nn.html#torch.nn.init.kaiming_normal_>`_
        mode (str): See `PyTorch kaiming_normal_`_
        nonlinearity (str): See `PyTorch kaiming_normal_`_
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised

    See:
        `PyTorch kaiming_normal_`_
    """
    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu', modules=None,
                 targets=['Conv', 'Linear', 'Bilinear']):
        def initialiser(module):
            init.kaiming_normal_(module.weight.data, a=a, mode=mode, nonlinearity=nonlinearity)

        super(KaimingNormal, self).__init__(initialiser, modules=modules, targets=targets)




[docs]
@cite(__kaiming__)
class KaimingUniform(WeightInit):
    """Kaiming Uniform weight initialisation. Uses `torch.nn.init.kaiming_uniform_` on the ``weight`` attribute of the
    filtered modules.

    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import KaimingUniform

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> initialiser = KaimingUniform()

        # Model and trail using kaiming init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[initialiser]).with_train_data(data, data+5)

    Args:
        a (int): See `PyTorch kaiming_uniform_ <https://pytorch.org/docs/stable/nn.html#torch.nn.init.kaiming_uniform_>`_
        mode (str): See `PyTorch kaiming_uniform_`_
        nonlinearity (str): See `PyTorch kaiming_uniform_`_
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised

    See:
        `PyTorch kaiming_uniform_`_
    """
    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu', modules=None,
                 targets=['Conv', 'Linear', 'Bilinear']):
        def initialiser(module):
            init.kaiming_uniform_(module.weight.data, a=a, mode=mode, nonlinearity=nonlinearity)

        super(KaimingUniform, self).__init__(initialiser, modules=modules, targets=targets)




[docs]
@cite(__xavier__)
class XavierNormal(WeightInit):
    """Xavier Normal weight initialisation. Uses ``torch.nn.init.xavier_normal_`` on the ``weight`` attribute of the
    filtered modules.

    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import XavierNormal

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> initialiser = XavierNormal()

        # Model and trail using Xavier init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[initialiser]).with_train_data(data, data+5)

    Args:
        gain (int): See `PyTorch xavier_normal_ <https://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_normal_>`_
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised

    See:
        `PyTorch xavier_normal_`_
    """
    def __init__(self, gain=1, modules=None, targets=['Conv', 'Linear', 'Bilinear']):
        def initialiser(module):
            init.xavier_normal_(module.weight.data, gain=gain)

        super(XavierNormal, self).__init__(initialiser, modules=modules, targets=targets)




[docs]
@cite(__xavier__)
class XavierUniform(WeightInit):
    """Xavier Uniform weight initialisation. Uses ``torch.nn.init.xavier_uniform_`` on the ``weight`` attribute of the
    filtered modules.

    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import XavierUniform

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> initialiser = XavierUniform()

        # Model and trail using Xavier init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[initialiser]).with_train_data(data, data+5)

    Args:
        gain (int): See `PyTorch xavier_uniform_ <https://pytorch.org/docs/stable/nn.html#torch.nn.init.xavier_uniform_>`_
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised

    See:
        `PyTorch xavier_uniform_`_
    """
    def __init__(self, gain=1, modules=None, targets=['Conv', 'Linear', 'Bilinear']):
        def initialiser(module):
            init.xavier_uniform_(module.weight.data, gain=gain)

        super(XavierUniform, self).__init__(initialiser, modules=modules, targets=targets)




[docs]
class ZeroBias(WeightInit):
    """Zero initialisation for the ``bias`` attributes of filtered modules. This is recommended for use in conjunction
    with weight initialisation schemes.

    Example: ::

        >>> import torch
        >>> import torch.nn as nn
        >>> from torchbearer import Trial
        >>> from torchbearer.callbacks.init import ZeroBias

        # 100 random data points
        >>> data = torch.rand(100, 3, 5, 5)
        >>> example_batch = data[:3]
        >>> initialiser = ZeroBias()

        # Model and trail using zero bias init for some random data
        >>> model = nn.Sequential(nn.Conv2d(3, 1, 3), nn.ReLU())
        >>> trial = Trial(model, callbacks=[initialiser]).with_train_data(data, data+5)

    Args:
        modules (Iterable[nn.Module] or nn.Module, optional): an iterable of nn.Modules or a
            single nn.Module that will have weights initialised, otherwise this is retrieved from the model
        targets (list[String]): A list of lookup strings to match which modules will be initialised
    """
    def __init__(self, modules=None, targets=['Conv', 'Linear', 'Bilinear']):
        def initialiser(module):
            module.bias.data.zero_()

        super(ZeroBias, self).__init__(initialiser, modules=modules, targets=targets)