Sunday, October 9, 2022

[FIXED] PyTorch CNN doesn't update weights while training

October 09, 2022 conv-neural-network, pytorch No comments

Issue

I want to predict a 8x8 matrix with the original 8x8 matrix. But the weights DO NOT update in the training process.

I use two simple conv layers to conv input matrix from 1x8x8 to 2x8x8. Then I used another conv layer to convert 2x8x8 to 1x8x8. The inputs and outputs in the data folder are generated randomly. The pytorch codes are shown as follows.

I have already checked some posts about weights not update issues. I think there must be some wrong with "requires_grad = True" of data or loss.backward(). Any suggestions about the codes would be grateful. Thanks in advance.

                            M
                            Tue Sep  6 15:34:17 CST 2022

The data input folder is in data/CM10_1/CM_1.txt data/CM10_1/CM_2.txt data/CM10_1/CM_3.txt data/CM10_1/CM_4.txt

The data output folder is in data/CM10_2/CM_1.txt data/CM10_2/CM_2.txt data/CM10_2/CM_3.txt data/CM10_2/CM_4.txt

CM_i.txt is shown as

207 244 107 173 70 111 180 244
230 246 233 193 11 97 192 86
32 40 202 189 24 195 70 149
232 247 244 100 209 202 173 57
161 244 167 167 177 47 167 191
24 123 9 43 80 124 41 65
71 204 216 180 242 113 30 129
139 36 238 8 8 164 127 178

data/CM_info_tr.csv

CMname,
CM_1.txt,
CM_2.txt,
CM_3.txt,
CM_4.txt,

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# outline###############################################################
# 
#  CM10_1/CM_i.txt to predict CM10_2/CM_i.txt
#  
#  data pair example
#  CM10_1/CM_1.txt -> CM10_2/CM_1.txt
# 
#  CM10_1/CM_1.txt is 8x8 matrix with random int
#  CM10_2/CM_1.txt is 8x8 matrix with random int
# 
#  The model uses two conv layers 
#  layer 01 : 1x8x8 -> 2x8x8
#  layer 02 : 2x8x8 -> 1x8x8
#  
#  The loss is the difference between 
#  CM10_2/CM_1.txt(predicted) and CM10_2/CM_1.txt
#  

# main ###############################################################

from __future__       import print_function, division

import os
import sys
import torch
import pandas              as pd
import numpy               as np
import torch.nn.functional as F

from skimage          import io, transform

from torch.utils.data import Dataset, DataLoader
from torch            import nn
from torch.autograd   import Variable

torch.cuda.empty_cache()

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# test CM parameters
n_Ca   = 8
batch_size = 4

#device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"

# define class dataset CMDataset ###################################################
class CMDataset(Dataset):
    """CM dataset"""
    def __init__(self,csv_CM,CM_beg_dir,CM_end_dir,n_Ca=n_Ca):
        """
        Args:
        csv_CM       (string): Path to the csv file with CM class.
        CM_beg_dir   (string): Directory with all the CM begin data.
        CM_end_dir   (string): Directory with all the CM end data.
        """
        self.CM_info    = pd.read_csv(csv_CM)
        self.CM_beg_dir = CM_beg_dir
        self.CM_end_dir = CM_end_dir

    def __len__(self):
        return len(self.CM_info)# the number of the samples

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        #load and convert CM begin data ---------------------------------------
        CM_beg_path  = os.path.join(self.CM_beg_dir, self.CM_info.iloc[idx, 0])
        CM_beg_data  = np.loadtxt(CM_beg_path)
        CM_beg_data  = CM_beg_data.reshape(1,n_Ca,n_Ca)
        CM_beg_data  = CM_beg_data.astype(np.float32)
        CM_beg_data  = torch.from_numpy(CM_beg_data)
        CM_beg_data  = CM_beg_data.to(device)
        
        #load and convert CM endin data ---------------------------------------
        CM_end_path  = os.path.join(self.CM_end_dir, self.CM_info.iloc[idx, 0])
        CM_end_data  = np.loadtxt(CM_end_path)
        CM_end_data  = CM_end_data.reshape(1,n_Ca,n_Ca)
        CM_end_data  = CM_end_data.astype(np.float32)
        CM_end_data  = torch.from_numpy(CM_end_data)
        CM_end_data  = CM_end_data.to(device)
        
        return CM_beg_data, CM_end_data


# define class model CMNet ###################################################
class CMNet(nn.Module):
    def __init__(self):
        super(CMNet, self).__init__()
        self.lay_CM_01 = nn.Conv2d(in_channels=1,out_channels=2,kernel_size=1,stride=1,bias=True)
        self.lay_CM_02 = nn.Conv2d(in_channels=2,out_channels=1,kernel_size=1,stride=1,bias=True)


    def forward(self, CM_data): 

        [n_in_batch,n_in_chan,n_in_hei,n_in_wid]=CM_data.shape
        n_Ca = n_in_hei

        out1_1 = self.lay_CM_01(CM_data)
        out1_2 = out1_1 
        out1_3 = self.lay_CM_02(out1_2)
        out = out1_3

        return out  

# load data for training and validing
CM_dataset_train = CMDataset(csv_CM     = 'data/CM_info_tr.csv',
                             CM_beg_dir = 'data/CM10_1/',
                             CM_end_dir = 'data/CM10_2/',
                             n_Ca       =  n_Ca)

train_dataloader = DataLoader(CM_dataset_train, 
                              batch_size=batch_size, 
                              shuffle=True)

# training parameter 
learning_rate = 2
epochs        = 5

model = CMNet()
model = model.to(device)

# Initialize the loss function
loss_fn = nn.MSELoss(reduction='mean')

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# define train loop ###############################################################
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X1,Y) in enumerate(dataloader):
        X1=X1.to(torch.float32)
        Y = Y.to(torch.float32)
        
        # Compute prediction and loss
        X1=torch.autograd.Variable(X1)
        pred = model(X1)
        pred = torch.autograd.Variable(pred)

        # compute loss 
        loss = loss_fn(pred,Y)
        loss = Variable(loss, requires_grad = True)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss, current = loss.item(), batch * len(X1)
        print(f" loss:{loss:>15f},  [{current:>5d}/{size:>5d}]")

# Train ###############################################################
for t in range(epochs):
    print(f"Epoch {t+1}\n----------------------------------------------")
#    print(list(model.parameters()))
    train_loop(train_dataloader, model, loss_fn, optimizer)

#print("Train and Valid Done!")

Solution

What pytorch version are you using? Variable is depracated for 5 years now. Remove the lines loss = Variable(loss, requires_grad = True) and pred = torch.autograd.Variable(pred), that should do the trick. Try and read the current documentation and don't rely on archaic tutorials.

Answered By - KonstantinosKokos

This Answer collected from stackoverflow and tested by PythonFixing community admins, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Sunday, October 9, 2022

[FIXED] PyTorch CNN doesn't update weights while training

Issue

Solution

0 comments:

Post a Comment

Popular Posts

Labels