跳转至

TensorBoardX

TensorBoard 是用于提供机器学习工作流程期间所需的测量和可视化的工具。它使您能够跟踪实验指标,例如损失和准确性,可视化模型图,将嵌入物投影到较低维度的空间等等。

安装TensorBoardX

这里演示为Pytorch框架使用 TensorBoardX 可视化,创建一个 Pytorch 框架的实例,然后进行如下操作。

安装tensorboardX

~# pip install tensorboardX

可以选择安装crc32c以加快速度

~# pip install crc32c

从tensorboardX 2.1开始,需要为add_audio()函数安装soundfile

~# pip install soundfile

#安装soundfile所需要的依赖
~# apt-get update -y && apt-get install libsndfile1 -y

上传代码

这里通过tensorboardX的项目提供的代码来运行,大家在训练的过程中需要使用自己的代码并上传到实例中。

~# git clone https://github.com/lanpa/tensorboardX.git
#查看tensorboardX的项目提供代码的示例,主要查看如何调用TensorBoardX进行展示
~# cat tensorboardX/examples/demo.py
import torch
import torchvision.utils as vutils
import numpy as np
import torchvision.models as models
from torchvision import datasets
from tensorboardX import SummaryWriter
import datetime

try:
    import soundfile
    skip_audio = False
except ImportError:
    skip_audio = True

resnet18 = models.resnet18(False)
writer = SummaryWriter()
sample_rate = 44100
freqs = [262, 294, 330, 349, 392, 440, 440, 440, 440, 440, 440]

true_positive_counts = [75, 64, 21, 5, 0]
false_positive_counts = [150, 105, 18, 0, 0]
true_negative_counts = [0, 45, 132, 150, 150]
false_negative_counts = [0, 11, 54, 70, 75]
precision = [0.3333333, 0.3786982, 0.5384616, 1.0, 0.0]
recall = [1.0, 0.8533334, 0.28, 0.0666667, 0.0]


for n_iter in range(100):
    s1 = torch.rand(1)  # value to keep
    s2 = torch.rand(1)
    # data grouping by `slash`
    writer.add_scalar('data/scalar_systemtime', s1[0], n_iter, summary_description="# markdown is supported!")
    # data grouping by `slash`
    writer.add_scalar('data/scalar_customtime', s1[0], n_iter, walltime=n_iter, display_name="dudubird")
    writer.add_scalars('data/scalar_group', {"xsinx": n_iter * np.sin(n_iter),
                                             "xcosx": n_iter * np.cos(n_iter),
                                             "arctanx": np.arctan(n_iter)}, n_iter)
    x = torch.rand(32, 3, 64, 64)  # output from network
    if n_iter % 10 == 0:
        x = vutils.make_grid(x, normalize=True, scale_each=True)
        writer.add_image('Image', x, n_iter)  # Tensor
        writer.add_image_with_boxes('imagebox_label', torch.ones(3, 240, 240) * 0.5,
             torch.Tensor([[10, 10, 100, 100], [101, 101, 200, 200]]),
             n_iter,
             labels=['abcde' + str(n_iter), 'fgh' + str(n_iter)])
        if not skip_audio:
            x = torch.zeros(sample_rate * 2)
            for i in range(x.size(0)):
                # sound amplitude should in [-1, 1]
                x[i] = np.cos(freqs[n_iter // 10] * np.pi *
                            float(i) / float(sample_rate))
            writer.add_audio('myAudio', x, n_iter)
        writer.add_text('Text', 'text logged at step:' + str(n_iter), n_iter)
        writer.add_text('markdown Text', '''a|b\n-|-\nc|d''', n_iter)
        for name, param in resnet18.named_parameters():
            if 'bn' not in name:
                writer.add_histogram(name, param, n_iter)
        writer.add_pr_curve('xoxo', np.random.randint(2, size=100), np.random.rand(
            100), n_iter)  # needs tensorboard 0.4RC or later
        writer.add_pr_curve_raw('prcurve with raw data', true_positive_counts,
                                false_positive_counts,
                                true_negative_counts,
                                false_negative_counts,
                                precision,
                                recall, n_iter)
# export scalar data to JSON for external processing
writer.export_scalars_to_json("./all_scalars.json")

dataset = datasets.MNIST('mnist', train=False, download=True)
images = dataset.data[:100].float()
label = dataset.targets[:100]
features = images.view(100, 784)
writer.add_embedding(features, metadata=label, label_img=images.unsqueeze(1))
writer.add_embedding(features, global_step=1, tag='noMetadata')
images_train = dataset.data[100:200].float()
labels_train = dataset.targets[100:200]
features_train = images_train.view(100, 784)

all_features = torch.cat((features, features_train))
all_labels = torch.cat((label, labels_train))
all_images = torch.cat((images, images_train))
dataset_label = ['test'] * 100 + ['train'] * 100
all_labels = list(zip(all_labels, dataset_label))

writer.add_embedding(all_features, metadata=all_labels, label_img=all_images.unsqueeze(1),
                     metadata_header=['digit', 'dataset'], global_step=2)

# VIDEO
vid_images = dataset.data[:16 * 48]
vid = vid_images.view(16, 48, 1, 28, 28)  # BxTxCxHxW

writer.add_video('video', vid_tensor=vid)
writer.add_video('video_1_fps', vid_tensor=vid, fps=1)

writer.close()

writer.add_scalar('implicit reopen writer', 100, 0)

运行程序

下面示例中通过 tmux 程序来托管程序运行。

#创建一个demo的tmux窗口
~# tmux new -s demo

#运行程序
~# python tensorboardX/examples/demo.py
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist/MNIST/raw/train-images-idx3-ubyte.gz

运行TensorBoardX

上个步骤通过tmux运行了python项目,这里需要重新打开一个ssh终端窗口。

#启动TensorboardX前,需要先关闭官方镜像中安装的tensorboard
~# supervisord ctl stop tensorboard

#启动TensorBoardX也通过tmux程序托管运行
~# tmux new -s tensorboard
~# tensorboard --logdir runs --host 0.0.0.0

访问TensorBoardX

打开 恒源云控制台,然后找到当前运行实例的Tensorboard进行访问。