adding grad histograms

parent 849777f5
......@@ -138,7 +138,6 @@ class ELBO(CriterionContainer):
full_loss = full_loss - logdet_error
logdets = logdets + (float(logdet_error),)
pdb.set_trace()
return full_loss, (rec_errors, reg_errors, *logdets)
......
......@@ -63,7 +63,11 @@ class TensorboardHandler(object):
def add_model(self, model, epoch):
# self.writer.add_graph(model, verbose=True)
for n, p in model.named_parameters():
self.writer.add_histogram('model/'+n,p.detach().cpu().numpy(),epoch)
self.writer.add_histogram('model/params/'+n,p.detach().cpu().numpy(),epoch)
def add_model_grads(self, model, epoch):
for n, p in model.named_parameters():
self.writer.add_histogram('model/grads/'+n,p.grad.detach().cpu().numpy(),epoch)
def add_loss(self, losses, epoch):
for loss in losses:
......@@ -81,6 +85,7 @@ class TensorboardHandler(object):
self.writer.add_audio(name, audio, sample_rate=sr)
class Monitor(object):
def __init__(self, model, dataset, loss, labels, plots={}, synth={}, partitions=['train', 'test'],
output_folder=None, tasks=None, use_tensorboard=None, **kwargs):
......@@ -119,6 +124,9 @@ class Monitor(object):
if self.writer:
self.writer.add_image(name, image, epoch)
def plot_grads(self, model, epoch=None):
if self.writer:
self.writer.add_model_grads(model, epoch=epoch)
def plot(self, out=None, epoch=None, loader=None, trainer=None, **kwargs):
# plot reconstructions
......
......@@ -20,6 +20,7 @@ def run(self, loader, preprocessing=None, epoch=None, optimize=True, schedule=Fa
train_losses = {'main_losses':[]}
if self.reinforcers:
train_losses['reinforcement_losses'] = []
self.plot_grads = True
for x,y in loader:
# forward
#pdb.set_trace()
......@@ -30,7 +31,6 @@ def run(self, loader, preprocessing=None, epoch=None, optimize=True, schedule=Fa
x = sample_normalize(x)
self.logger('data preprocessed')
try:
print(torch.cuda.memory_allocated(), torch.cuda.memory_cached())
out = self.models.forward(x, y=y, epoch=epoch)
if self.reinforcers:
out = self.reinforcers.forward(out, target=x, optimize=False)
......@@ -42,12 +42,12 @@ def run(self, loader, preprocessing=None, epoch=None, optimize=True, schedule=Fa
except NaNError:
pdb.set_trace()
# learn
self.logger('loss computed')
if optimize:
batch_loss.backward(retain_graph=False)
self.optimize(self.models, batch_loss, epoch=epoch, batch=batch)
self.plot_grads = False
if self.reinforcers:
_, reinforcement_losses = self.reinforcers(out, target=x, epoch=epoch, optimize=optimize)
......
......@@ -63,6 +63,7 @@ class SimpleTrainer(Trainer):
self.preprocessing = kwargs.get('preprocessing', None)
self.dataloader_class = kwargs.get('dataloader', self.dataloader_class)
self.optim_balance = kwargs.get('optim_balance')
self.plot_grads = True
# additional args
self.trace_mode = kwargs.get('trace_mode', 'epoch')
self.device = kwargs.get('device')
......@@ -90,10 +91,15 @@ class SimpleTrainer(Trainer):
def get_time(self):
return process_time() - self.start_time
def monitor_grads(self, models, loss):
self.monitor.plot_grads(models, loss)
def optimize(self, models, loss, epoch=None, batch=None):
#pdb.set_trace()
update_model = True if self.optim_balance is None else batch % self.optim_balance[0] == 0
update_loss = True if self.optim_balance is None else batch % self.optim_balance[1] == 0
if self.plot_grads:
self.monitor_grads(models, loss)
if update_model:
apply_method(self.models, 'step', loss)
if update_loss:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment