Issue
I added two VAEs to the original model, so I need to add optimizer and loss. However, the following errors are reported. How can I modify them?
Traceback (most recent call last):
File "train.py", line 320, in <module>
main()
File "train.py", line 315, in main
ImgCla.TrainingData()
File "train.py", line 201, in TrainingData
lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
File "train.py", line 135, in loss_function
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
File "/home/user1/.conda/envs/tyz/lib/python3.6/site-packages/torch/nn/functional.py", line 2762, in binary_cross_entropy
return torch._C._nn.binary_cross_entropy(input, target, weight, reduction_enum)
RuntimeError: the derivative for 'target' is not implemented
The train.py is as follows:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import dataloader
import pandas
import os
import imp
import model
import math
import time
import matplotlib.pyplot as plt
import seaborn as sn
from tqdm import tqdm
from sklearn.metrics import classification_report,accuracy_score
import training_plot
from sklearn.metrics import confusion_matrix
import torch.nn.functional as F
from model import VAE1,VAE2
config = imp.load_source("config","config/Resnet50.py").config
device_ids = config["device_ids"]
data_train_opt = config['data_train_opt']
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print("======================================")
print("Device: {}".format(device_ids))
def fix_bn(m):
classname = m.__class__.__name__
if classname.find('BatchNorm') != -1:
m.eval()
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def adjust_learning_rate(optimizer, epoch, args):
"""Decay the learning rate based on schedule"""
lr = args.lr
if args.cos: # cosine lr schedule
lr *= 0.5 * (1. + math.cos(math.pi * epoch / args.epochs))
else: # stepwise lr schedule
for milestone in args.schedule:
lr *= 0.1 if epoch >= milestone else 1.
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k]
correct_k = torch.sum(correct_k).float()
res.append(correct_k.mul_(100.0 / batch_size))
return res
class ImageClassify(object):
def __init__(self):
self.name_list = []
self.model = model.Mixed_model(data_train_opt["dim"])
self.model = torch.nn.DataParallel(self.model, device_ids=device_ids)
self.model = self.model.cuda(device=device_ids[0])
self.save = data_train_opt["final_model_file"]
self.training_save = data_train_opt["feat_training_file"]
self.training_log = data_train_opt["training_log"]
self.loss = 9999
self.best = 0
self.train_dataset = dataloader.Load_Data(config["data_dir"],"train")
self.trainloader = DataLoader(self.train_dataset, batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True,drop_last=False)
self.valid_dataset = dataloader.Load_Data(config["data_dir"],"val")
self.validloader = DataLoader(self.valid_dataset,batch_size=data_train_opt['batch_size']*len(device_ids),num_workers=8,shuffle=True)
self.LossFun()
print("Trainloader: {}".format(len(self.trainloader)))
print("Validloader: {}".format(len(self.validloader)))
self.vae1 = VAE1().cuda()
self.vae2 = VAE2().cuda()
def loss_function(self,recon_x, x, mean, std):
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
var = torch.pow(torch.exp(std), 2)
KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
return BCE+KLD
def loss_function2(self,recon_x, x, mean, std):
BCE = F.binary_cross_entropy(recon_x, x, reduction='sum')
var = torch.pow(torch.exp(std), 2)
KLD = -0.5 * torch.sum(1 + torch.log(var) - torch.pow(mean, 2) - var)
return BCE + KLD
def LossFun(self):
print("lossing...")
self.criterion = nn.CrossEntropyLoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=data_train_opt['lr'])
VAE needs to introduce reconstruction error, which is added to my previous model, so I first updated the parameters of the model and trained the previous model. Update the parameters of VAE and train VAE. When training VAE, I want to fix the parameters of other parts, so I add this part:
for name,param in model.Mixed_model().named_parameters():
if 'video' in name:
param.requires_grad=False
if 'audio_net' in name:
param.requires_grad=False
if 'classifier' in name:
param.requires_grad=False
self.optimizer2 = optim.Adam(filter(lambda param:param.requires_grad,model.Mixed_model().parameters()), lr=data_train_opt['lr'])
def TrainingData(self):
self.model.train()
log = []
for epoch in range(data_train_opt['epoch']):
if (epoch+1) % data_train_opt["decay_epoch"] == 0 :
for param_group in self.optimizer.param_groups:
param_group['lr'] = param_group['lr']*data_train_opt["decay_rate"]
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
top1 = AverageMeter('Acc@1', ':6.2f')
progress = ProgressMeter(
len(self.trainloader),
[batch_time, data_time, losses,top1],
prefix="Epoch: [{}]".format(epoch+1))
# switch to train mode
self.model.train()
end = time.time()
for i, (img,audio, class_id) in enumerate(self.trainloader):
# measure data loading time
data_time.update(time.time() - end)
img,audio,class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]),class_id.cuda(device=device_ids[0])
predict,audio1,img1= self.model(img,audio)
loss = self.criterion(predict, class_id)
# acc1/acc5 are (K+1)-way contrast classifier accuracy
# measure accuracy and record loss
acc1= accuracy(predict, class_id, topk=(1,))
losses.update(loss.item(), img.size(0))
top1.update(acc1[0], img.size(0))
self.optimizer.zero_grad()
loss.backward(retain_graph=True)
self.optimizer.step()
z1, logstd1, mean1, eps1,recon_audio = self.vae1(audio1)
z2, logstd2, mean2, eps2,recon_img = self.vae2(img1)
lossv1 = self.loss_function(recon_audio, audio1, mean1, logstd1)
lossv2 = self.loss_function2(recon_img, img1, mean2, logstd2)
lossv = lossv2 + lossv1
lossv.backward()
self.optimizer2.zero_grad()
lossv.backward()
self.optimizer2.step()
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if (i+1) % data_train_opt["log_step"] == 0:
loss_avg = losses.avg
acc_avg = top1.avg
log.append([epoch, i + 1, loss.item(), acc1[0], loss_avg, acc_avg])
progress.display(i+1)
if (epoch+1) % data_train_opt["save_epoch"] == 0:
acc, a = self.ValidingData(epoch+1)
if losses.avg <self.loss:
self.loss = losses.avg
a = 1
np.save(data_train_opt["training_log"], log)
if a == 1:
self.save_checkpoint({
'epoch': epoch + 1,
'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
'acc':acc
}, filename=os.path.join(data_train_opt["feat_training_file"],'Epoch_{}_acc_{}_loss_{}.pth'.format(epoch+1,acc,losses.avg)))
# }, filename=os.path.join(data_train_opt["feat_training_file"],'checkpoint_{:04d}.pth'.format(epoch+1)))
# }, filename=os.path.join(data_train_opt["feat_training_file"],'best.pth'))
def save_checkpoint(self,state,filename='checkpoint.pth.tar'):
torch.save(state, filename)
def ValidingData(self,epoch):
self.model.eval()
a = 0
with torch.no_grad():
y_pre = []
y_true = []
with tqdm(total=len(self.validloader), desc='Example', leave=True, ncols=100, unit='batch', unit_scale=True) as pbar:
for i, (img,audio,class_id) in enumerate(self.validloader):
img,audio, class_id = img.cuda(device=device_ids[0]),audio.cuda(device=device_ids[0]), class_id.cuda(device=device_ids[0])
predict = self.model(img, audio)
_, pre = torch.max(predict,dim=1)
y_pre.append(pre.cpu())
y_true.append(class_id.cpu())
pbar.update(1)
y_pre = torch.cat(y_pre).cpu().detach().numpy()
y_true = torch.cat(y_true).cpu().detach().numpy()
cm = confusion_matrix(y_true, y_pre)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
cm.diagonal()
keys = ['airport',
'bus',
'metro',
'metro_station',
'park',
'public_square',
'shopping_mall',
'street_pedestrian',
'street_traffic',
'tram']
values = [np.round(i, decimals=3) for i in list(cm.diagonal())]
df_cm = pandas.DataFrame(cm, index=[i for i in keys], columns=[i for i in keys])
plt.figure(figsize=(15, 12))
sn.heatmap(df_cm, annot=True)
plt.savefig('confusion.png')
report = classification_report(y_true, y_pre, target_names=
['airport',
'bus',
'metro',
'metro_station',
'park',
'public_square',
'shopping_mall',
'street_pedestrian',
'street_traffic',
'tram'], digits=4)
acc = accuracy_score(y_true, y_pre)
if acc>self.best:
a = 1
self.best=acc
print(report)
print("==================")
with open(data_train_opt["txt"],"a") as f:
f.write("========= {} =======\n".format(epoch))
f.write("classification_report".format(epoch))
f.write(report)
f.write("\n")
self.model.train()
if a ==1:
with open(data_train_opt["best"], "a") as f:
f.write("========= {} =======\n".format(epoch))
f.write("classification_report".format(epoch))
f.write(report)
f.write("================\n")
return acc,a
def main():
ImgCla = ImageClassify()
ImgCla.TrainingData()
training_plot.draw(data_train_opt["training_log"])
acc, a = ImgCla.ValidingData(epoch=0)
if __name__ == '__main__':
main()
Solution
The error message refers to the fact you are requiring gradient computation on the target tensor which is not supported by nn.functional.binary_cross_entropy
. In other words you need to detach the target before computing the loss term:
BCE = F.binary_cross_entropy(recon_x, x.detach(), reduction='sum')
In both loss_function
and loss_function2
.
Answered By - Ivan
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.