Source code for torchbearer.callbacks.tensor_board

import copy
import os

import torch
import torch.nn.functional as F
import torchvision.utils as utils

import torchbearer
from torchbearer.callbacks import Callback

__writers__ = dict()


[docs]class VisdomParams: """ Class to hold visdom client arguments. Modify member variables before initialising tensorboard callbacks for custom arguments. See: `visdom <https://github.com/facebookresearch/visdom#visdom-arguments-python-only>`_ """ SERVER = 'http://localhost' ENDPOINT = 'events' PORT = 8097 IPV6 = True HTTP_PROXY_HOST = None HTTP_PROXY_PORT = None ENV = 'main' SEND = True RAISE_EXCEPTIONS = None USE_INCOMING_SOCKET = True LOG_TO_FILENAME = None def __to_dict__(self): base_params = {e.lower(): VisdomParams.__dict__[e] for e in VisdomParams.__dict__ if '__' not in e} new_params = {e.lower(): self.__dict__[e] for e in self.__dict__ if '__' not in e} base_params.update(new_params) return base_params
[docs]def get_writer(log_dir, logger, visdom=False, visdom_params=None): """ Get the writer assigned to the given log directory. If the writer doesn't exist it will be created, and a reference to the logger added. :param log_dir: the log directory :param logger: the object requesting the writer. That object should call `close_writer` when its finished :param visdom: if true VisdomWriter is returned instead of tensorboard SummaryWriter :param visdom_params: Visdom parameter settings object, uses default if None :type visdom_params: VisdomParams :return: the `SummaryWriter` or `VisdomWriter` object """ import tensorboardX from tensorboardX import SummaryWriter writer_key = 'writer' if visdom: writer_key = 'writer_visdom' if log_dir not in __writers__ or writer_key not in __writers__[log_dir]: if visdom: w = tensorboardX.torchvis.VisdomWriter() from visdom import Visdom os.makedirs(log_dir, exist_ok=True) if visdom_params is None: visdom_params = VisdomParams() visdom_params.LOG_TO_FILENAME = os.path.join(log_dir,'log.log') w.vis = Visdom(**visdom_params.__to_dict__()) else: w = SummaryWriter(log_dir=log_dir) __writers__[log_dir] = {writer_key: w, 'references': set()} __writers__[log_dir]['references'].add(logger) return __writers__[log_dir][writer_key]
[docs]def close_writer(log_dir, logger): """ Decrement the reference count for a writer belonging to a specific log directory. If the reference count gets to zero, the writer will be closed and removed. :param log_dir: the log directory :param logger: the object releasing the writer """ if log_dir in __writers__: __writers__[log_dir]['references'].discard(logger) if len(__writers__[log_dir]['references']) is 0: if 'writer' in __writers__[log_dir]: __writers__[log_dir]['writer'].close() if 'writer_visdom' in __writers__[log_dir]: __writers__[log_dir]['writer_visdom'].close() del __writers__[log_dir]
[docs]class AbstractTensorBoard(Callback): """TensorBoard callback which writes metrics to the given log directory. Requires the TensorboardX library for python. :param log_dir: The tensorboard log path for output :type log_dir: str :param comment: Descriptive comment to append to path :type comment: str :param visdom: If true, log to visdom instead of tensorboard :type visdom: bool :param visdom_params: Visdom parameter settings object, uses default if None :type visdom_params: VisdomParams """ def __init__(self, log_dir='./logs', comment='torchbearer', visdom=False, visdom_params=None): super(AbstractTensorBoard, self).__init__() self.raw_log_dir = log_dir self.log_dir = log_dir self.comment = comment self.writer = None self.visdom = visdom self.visdom_params = visdom_params
[docs] def get_writer(self, log_dir=None, visdom=False, visdom_params=None): """ Get a SummaryWriter for the given directory (or the default writer if the directory is not given). If you are getting a `SummaryWriter` for a custom directory, it is your responsibility to close it using `close_writer`. :param log_dir: the (optional) directory :type log_dir: str :param visdom: If true, return VisdomWriter, if false return tensorboard SummaryWriter :type visdom: bool :return: the `SummaryWriter` or `VisdomWriter` """ if log_dir is None: self.writer = get_writer(self.log_dir, self, visdom=visdom, visdom_params=visdom_params) return self.writer else: return get_writer(log_dir, self, visdom=visdom, visdom_params=visdom_params)
[docs] def close_writer(self, log_dir=None): """ Decrement the reference count for a writer belonging to the given log directory (or the default writer if the directory is not given). If the reference count gets to zero, the writer will be closed and removed. :param log_dir: the (optional) directory :type log_dir: str """ if log_dir is None: close_writer(self.log_dir, self) else: close_writer(log_dir, self)
[docs] def on_start(self, state): self.log_dir = os.path.join(self.log_dir, state[torchbearer.MODEL].__class__.__name__ + '_' + self.comment) self.writer = self.get_writer(visdom=self.visdom, visdom_params=self.visdom_params)
[docs] def on_end(self, state): self.close_writer()
[docs]class TensorBoard(AbstractTensorBoard): """TensorBoard callback which writes metrics to the given log directory. Requires the TensorboardX library for python. :param log_dir: The tensorboard log path for output :type log_dir: str :param write_graph: If True, the model graph will be written using the TensorboardX library :type write_graph: bool :param write_batch_metrics: If True, batch metrics will be written :type write_batch_metrics: bool :param batch_step_size: The step size to use when writing batch metrics, make this larger to reduce latency :type batch_step_size: int :param write_epoch_metrics: If True, metrics from the end of the epoch will be written :type write_epoch_metrics: True :param comment: Descriptive comment to append to path :type comment: str :param visdom: If true, log to visdom instead of tensorboard :type visdom: bool :param visdom_params: Visdom parameter settings object, uses default if None :type visdom_params: VisdomParams """ def __init__(self, log_dir='./logs', write_graph=True, write_batch_metrics=False, batch_step_size=10, write_epoch_metrics=True, comment='torchbearer', visdom=False, visdom_params=None): super(TensorBoard, self).__init__(log_dir, comment, visdom, visdom_params) self.write_graph = write_graph self.write_batch_metrics = write_batch_metrics self.batch_step_size = batch_step_size self.write_epoch_metrics = write_epoch_metrics self.visdom = visdom if self.write_graph and not visdom: def handle_graph(state): dummy = torch.rand(state[torchbearer.X].size(), requires_grad=False) model = copy.deepcopy(state[torchbearer.MODEL]).to('cpu') self.writer.add_graph(model, (dummy,)) self._handle_graph = lambda _: ... self._handle_graph = handle_graph else: self._handle_graph = lambda _: ... self.batch_log_dir = None self.batch_writer = None
[docs] def on_start_epoch(self, state): if self.write_batch_metrics: if self.visdom: self.batch_log_dir = os.path.join(self.log_dir, 'epoch/') else: self.batch_log_dir = os.path.join(self.log_dir, 'epoch-' + str(state[torchbearer.EPOCH])) self.batch_writer = self.get_writer(self.batch_log_dir, visdom=self.visdom)
[docs] def on_sample(self, state): self._handle_graph(state)
[docs] def on_step_training(self, state): if self.write_batch_metrics and state[torchbearer.BATCH] % self.batch_step_size == 0: for metric in state[torchbearer.METRICS]: if self.visdom: self.batch_writer.add_scalar(metric, state[torchbearer.METRICS][metric], state[torchbearer.EPOCH] * state[torchbearer.TRAIN_STEPS] + state[ torchbearer.BATCH], main_tag='batch') else: self.batch_writer.add_scalar('batch/' + metric, state[torchbearer.METRICS][metric], state[torchbearer.BATCH])
[docs] def on_step_validation(self, state): if self.write_batch_metrics and state[torchbearer.BATCH] % self.batch_step_size == 0: for metric in state[torchbearer.METRICS]: if self.visdom: self.batch_writer.add_scalar(metric, state[torchbearer.METRICS][metric], state[torchbearer.EPOCH] * state[torchbearer.TRAIN_STEPS] + state[ torchbearer.BATCH], main_tag='batch') else: self.batch_writer.add_scalar('batch/' + metric, state[torchbearer.METRICS][metric], state[torchbearer.BATCH])
[docs] def on_end_epoch(self, state): if self.write_batch_metrics and not self.visdom: self.close_writer(self.batch_log_dir) if self.write_epoch_metrics: for metric in state[torchbearer.METRICS]: if self.visdom: self.writer.add_scalar(metric, state[torchbearer.METRICS][metric], state[torchbearer.EPOCH], main_tag='epoch') else: self.writer.add_scalar('epoch/' + metric, state[torchbearer.METRICS][metric], state[torchbearer.EPOCH])
[docs] def on_end(self, state): super().on_end(state) if self.write_batch_metrics and self.visdom: self.close_writer(self.batch_log_dir)
[docs]class TensorBoardText(AbstractTensorBoard): """TensorBoard callback which writes metrics as text to the given log directory. Requires the TensorboardX library for python. :param log_dir: The tensorboard log path for output :type log_dir: str :param write_epoch_metrics: If True, metrics from the end of the epoch will be written :type write_epoch_metrics: True :param log_trial_string: If True logs a string summary of the Trial :type log_trial_summary: bool :param batch_step_size: The step size to use when writing batch metrics, make this larger to reduce latency :type batch_step_size: int :param comment: Descriptive comment to append to path :type comment: str :param visdom: If true, log to visdom instead of tensorboard :type visdom: bool :param visdom_params: Visdom parameter settings object, uses default if None :type visdom_params: VisdomParams """ def __init__(self, log_dir='./logs', write_epoch_metrics=True, write_batch_metrics=False, log_trial_summary=True, batch_step_size=100, comment='torchbearer', visdom=False, visdom_params=None): super(TensorBoardText, self).__init__(log_dir, comment, visdom, visdom_params) self.write_epoch_metrics = write_epoch_metrics self.write_batch_metrics = write_batch_metrics self.log_trial_summary = log_trial_summary self.batch_step_size = batch_step_size self.visdom = visdom self.batch_log_dir = None self.batch_writer = None self.logged_summary = False
[docs] @staticmethod def table_formatter(string): table = '<table><th>Metric</th><th>Value</th>' string = string.replace('{', '').replace('}', '').replace("'", "") # TODO: Replace this with single pass regex def cell(string): return '<td>' + string + '</td>' def row(string): return '<tr>' + string + '</tr>' metrics = string.split(',') for _, metric in enumerate(metrics): items = metric.split(':') name, value = items[0], items[1] table = table + row(cell(name) + cell(value)) return table + '</table>'
[docs] def on_start(self, state): super().on_start(state) if self.log_trial_summary and not self.logged_summary: self.logged_summary = True self.writer.add_text('trial', str(state[torchbearer.SELF]).replace('\n', '\n \n'), 1)
[docs] def on_start_epoch(self, state): if self.write_batch_metrics: if self.visdom: self.batch_log_dir = os.path.join(self.log_dir, 'epoch/') batch_params = self.visdom_params if self.visdom_params is not None else VisdomParams() batch_params.ENV = batch_params.ENV + '-batch' self.batch_writer = self.get_writer(self.batch_log_dir, visdom=self.visdom, visdom_params=batch_params) else: self.batch_log_dir = os.path.join(self.log_dir, 'epoch-' + str(state[torchbearer.EPOCH])) self.batch_writer = self.get_writer(self.batch_log_dir)
[docs] def on_step_training(self, state): if self.write_batch_metrics and state[torchbearer.BATCH] % self.batch_step_size == 0: if self.visdom: self.batch_writer.add_text('batch', '<h3>Epoch {} - Batch {}</h3>'.format(state[torchbearer.EPOCH], state[torchbearer.BATCH])+self.table_formatter(str(state[torchbearer.METRICS])), 1) else: self.batch_writer.add_text('batch', self.table_formatter(str(state[torchbearer.METRICS])), state[torchbearer.BATCH])
[docs] def on_end_epoch(self, state): if self.write_epoch_metrics: if self.visdom: self.writer.add_text('epoch', '<h4>Epoch {}</h4>'.format(state[torchbearer.EPOCH])+self.table_formatter(str(state[torchbearer.METRICS])), 1) else: self.writer.add_text('epoch', self.table_formatter(str(state[torchbearer.METRICS])), state[torchbearer.EPOCH])
[docs] def on_end(self, state): super().on_end(state) if self.write_batch_metrics and self.visdom: self.close_writer(self.batch_log_dir)
[docs]class TensorBoardImages(AbstractTensorBoard): """The TensorBoardImages callback will write a selection of images from the validation pass to tensorboard using the TensorboardX library and torchvision.utils.make_grid. Images are selected from the given key and saved to the given path. Full name of image sub directory will be model name + _ + comment. :param log_dir: The tensorboard log path for output :type log_dir: str :param comment: Descriptive comment to append to path :type comment: str :param name: The name of the image :type name: str :param key: The key in state containing image data (tensor of size [c, w, h] or [b, c, w, h]) :type key: str :param write_each_epoch: If True, write data on every epoch, else write only for the first epoch. :type write_each_epoch: bool :param num_images: The number of images to write :type num_images: int :param nrow: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param padding: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param normalize: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param norm_range: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param scale_each: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param pad_value: See `torchvision.utils.make_grid https://pytorch.org/docs/stable/torchvision/utils.html#torchvision.utils.make_grid` :param visdom: If true, log to visdom instead of tensorboard :type visdom: bool :param visdom_params: Visdom parameter settings object, uses default if None :type visdom_params: VisdomParams """ def __init__(self, log_dir='./logs', comment='torchbearer', name='Image', key=torchbearer.Y_PRED, write_each_epoch=True, num_images=16, nrow=8, padding=2, normalize=False, norm_range=None, scale_each=False, pad_value=0, visdom=False, visdom_params=None): super(TensorBoardImages, self).__init__(log_dir, comment, visdom=visdom, visdom_params=visdom_params) self.name = name self.key = key self.write_each_epoch = write_each_epoch self.num_images = num_images self.nrow = nrow self.padding = padding self.normalize = normalize self.norm_range = norm_range self.scale_each = scale_each self.pad_value = pad_value self._data = None self.done = False
[docs] def on_step_validation(self, state): if not self.done: data = state[self.key].clone() if len(data.size()) == 3: data = data.unsqueeze(1) if self._data is None: remaining = self.num_images if self.num_images < data.size(0) else data.size(0) self._data = data[:remaining].to('cpu') else: remaining = self.num_images - self._data.size(0) if remaining > data.size(0): remaining = data.size(0) self._data = torch.cat((self._data, data[:remaining].to('cpu')), dim=0) if self._data.size(0) >= self.num_images: image = utils.make_grid( self._data, nrow=self.nrow, padding=self.padding, normalize=self.normalize, range=self.norm_range, scale_each=self.scale_each, pad_value=self.pad_value ) if self.visdom: name = self.name + str(state[torchbearer.EPOCH]) else: name = self.name self.writer.add_image(name, image, state[torchbearer.EPOCH]) self.done = True self._data = None
[docs] def on_end_epoch(self, state): if self.write_each_epoch: self.done = False
[docs]class TensorBoardProjector(AbstractTensorBoard): """The TensorBoardProjector callback is used to write images from the validation pass to Tensorboard using the TensorboardX library. Images are written to the given directory and, if required, so are associated features. :param log_dir: The tensorboard log path for output :type log_dir: str :param comment: Descriptive comment to append to path :type comment: str :param num_images: The number of images to write :type num_images: int :param avg_pool_size: Size of the average pool to perform on the image. This is recommended to reduce the overall image sizes and improve latency :type avg_pool_size: int :param avg_data_channels: If True, the image data will be averaged in the channel dimension :type avg_data_channels: bool :param write_data: If True, the raw data will be written as an embedding :type write_data: bool :param write_features: If True, the image features will be written as an embedding :type write_features: bool :param features_key: The key in state to use for the embedding. Typically model output but can be used to show features from any layer of the model. :type features_key: str """ def __init__(self, log_dir='./logs', comment='torchbearer', num_images=100, avg_pool_size=1, avg_data_channels=True, write_data=True, write_features=True, features_key=torchbearer.Y_PRED): super(TensorBoardProjector, self).__init__(log_dir, comment) self.num_images = num_images self.avg_pool_size = avg_pool_size self.avg_data_channels = avg_data_channels self.write_data = write_data self.write_features = write_features self.features_key = features_key self.done = False self._images = None self._labels = None self._features = None self._data = None
[docs] def on_step_validation(self, state): if not self.done: x = state[torchbearer.X].data.clone() if len(x.size()) == 3: x = x.unsqueeze(1) x = F.avg_pool2d(x, self.avg_pool_size).data data = None if state[torchbearer.EPOCH] == 0 and self.write_data: if self.avg_data_channels: data = torch.mean(x, 1) else: data = x data = data.view(data.size(0), -1) feature = None if self.write_features: feature = state[self.features_key].data.clone() feature = feature.view(feature.size(0), -1) label = state[torchbearer.Y_TRUE].data.clone() if state[torchbearer.BATCH] == 0: remaining = self.num_images if self.num_images < label.size(0) else label.size(0) self._images = x[:remaining].to('cpu') self._labels = label[:remaining].to('cpu') if data is not None: self._data = data[:remaining].to('cpu') if feature is not None: self._features = feature[:remaining].to('cpu') else: remaining = self.num_images - self._labels.size(0) if remaining > label.size(0): remaining = label.size(0) self._images = torch.cat((self._images, x[:remaining].to('cpu')), dim=0) self._labels = torch.cat((self._labels, label[:remaining].to('cpu')), dim=0) if data is not None: self._data = torch.cat((self._data, data[:remaining].to('cpu')), dim=0) if feature is not None: self._features = torch.cat((self._features, feature[:remaining].to('cpu')), dim=0) if self._labels.size(0) >= self.num_images: if state[torchbearer.EPOCH] == 0 and self.write_data: self.writer.add_embedding(self._data, metadata=self._labels, label_img=self._images, tag='data', global_step=-1) if self.write_features: self.writer.add_embedding(self._features, metadata=self._labels, label_img=self._images, tag='features', global_step=state[torchbearer.EPOCH]) self.done = True
[docs] def on_end_epoch(self, state): if self.write_features: self.done = False