Issue
I used a 3DUnet with resblock to segment a CT image with input torch size of [1, 1, 96, 176, 176], but it throws the following error:
RuntimeError: Sizes of tensors must match except in dimension 2. Got 55 and 54 (The offending index is 0)
Hence I traced back, I found the error comes from
outputs = self.decoder_stage2(torch.cat([short_range6, long_range3], dim=1)) + short_range6
The short_range6 has torch.Size([1, 64, 24, 55, 40]) while the long_range3 has torch.Size([1, 128, 24, 54, 40]). I think this is because something not being a power of 2, but cannot find where to modify.
Below is the complete structure of the network, really thanks for any help!
class ResUNet(nn.Module):
def __init__(self, in_channel=1, out_channel=2 ,training=True):
super().__init__()
self.training = training
self.dorp_rate = 0.2
self.encoder_stage1 = nn.Sequential(
nn.Conv3d(in_channel, 16, 3, 1, padding=1),
nn.PReLU(16),
nn.Conv3d(16, 16, 3, 1, padding=1),
nn.PReLU(16),
)
self.encoder_stage2 = nn.Sequential(
nn.Conv3d(32, 32, 3, 1, padding=1),
nn.PReLU(32),
nn.Conv3d(32, 32, 3, 1, padding=1),
nn.PReLU(32),
nn.Conv3d(32, 32, 3, 1, padding=1),
nn.PReLU(32),
)
self.encoder_stage3 = nn.Sequential(
nn.Conv3d(64, 64, 3, 1, padding=1),
nn.PReLU(64),
nn.Conv3d(64, 64, 3, 1, padding=2, dilation=2),
nn.PReLU(64),
nn.Conv3d(64, 64, 3, 1, padding=4, dilation=4),
nn.PReLU(64),
)
self.encoder_stage4 = nn.Sequential(
nn.Conv3d(128, 128, 3, 1, padding=3, dilation=3),
nn.PReLU(128),
nn.Conv3d(128, 128, 3, 1, padding=4, dilation=4),
nn.PReLU(128),
nn.Conv3d(128, 128, 3, 1, padding=5, dilation=5),
nn.PReLU(128),
)
self.decoder_stage1 = nn.Sequential(
nn.Conv3d(128, 256, 3, 1, padding=1),
nn.PReLU(256),
nn.Conv3d(256, 256, 3, 1, padding=1),
nn.PReLU(256),
nn.Conv3d(256, 256, 3, 1, padding=1),
nn.PReLU(256),
)
self.decoder_stage2 = nn.Sequential(
nn.Conv3d(128 + 64, 128, 3, 1, padding=1),
nn.PReLU(128),
nn.Conv3d(128, 128, 3, 1, padding=1),
nn.PReLU(128),
nn.Conv3d(128, 128, 3, 1, padding=1),
nn.PReLU(128),
)
self.decoder_stage3 = nn.Sequential(
nn.Conv3d(64 + 32, 64, 3, 1, padding=1),
nn.PReLU(64),
nn.Conv3d(64, 64, 3, 1, padding=1),
nn.PReLU(64),
nn.Conv3d(64, 64, 3, 1, padding=1),
nn.PReLU(64),
)
self.decoder_stage4 = nn.Sequential(
nn.Conv3d(32 + 16, 32, 3, 1, padding=1),
nn.PReLU(32),
nn.Conv3d(32, 32, 3, 1, padding=1),
nn.PReLU(32),
)
self.down_conv1 = nn.Sequential(
nn.Conv3d(16, 32, 2, 2),
nn.PReLU(32)
)
self.down_conv2 = nn.Sequential(
nn.Conv3d(32, 64, 2, 2),
nn.PReLU(64)
)
self.down_conv3 = nn.Sequential(
nn.Conv3d(64, 128, 2, 2),
nn.PReLU(128)
)
self.down_conv4 = nn.Sequential(
nn.Conv3d(128, 256, 3, 1, padding=1),
nn.PReLU(256)
)
self.up_conv2 = nn.Sequential(
nn.ConvTranspose3d(256, 128, 2, 2),
nn.PReLU(128)
)
self.up_conv3 = nn.Sequential(
nn.ConvTranspose3d(128, 64, 2, 2),
nn.PReLU(64)
)
self.up_conv4 = nn.Sequential(
nn.ConvTranspose3d(64, 32, 2, 2),
nn.PReLU(32)
)
# 256*256
self.map4 = nn.Sequential(
nn.Conv3d(32, out_channel, 1, 1),
nn.Upsample(scale_factor=(1, 1, 1), mode='trilinear', align_corners=False),
nn.Softmax(dim=1)
)
# 128*128
self.map3 = nn.Sequential(
nn.Conv3d(64, out_channel, 1, 1),
nn.Upsample(scale_factor=(2, 2, 2), mode='trilinear', align_corners=False),
nn.Softmax(dim=1)
)
# 64*64
self.map2 = nn.Sequential(
nn.Conv3d(128, out_channel, 1, 1),
nn.Upsample(scale_factor=(4, 4, 4), mode='trilinear', align_corners=False),
nn.Softmax(dim=1)
)
# 32*32
self.map1 = nn.Sequential(
nn.Conv3d(256, out_channel, 1, 1),
nn.Upsample(scale_factor=(8, 8, 8), mode='trilinear', align_corners=False),
nn.Softmax(dim=1)
)
def forward(self, inputs):
long_range1 = self.encoder_stage1(inputs) + inputs
short_range1 = self.down_conv1(long_range1)
long_range2 = self.encoder_stage2(short_range1) + short_range1
long_range2 = F.dropout(long_range2, self.dorp_rate, self.training)
short_range2 = self.down_conv2(long_range2)
long_range3 = self.encoder_stage3(short_range2) + short_range2
long_range3 = F.dropout(long_range3, self.dorp_rate, self.training)
short_range3 = self.down_conv3(long_range3)
long_range4 = self.encoder_stage4(short_range3) + short_range3
long_range4 = F.dropout(long_range4, self.dorp_rate, self.training)
short_range4 = self.down_conv4(long_range4)
outputs = self.decoder_stage1(long_range4) + short_range4
outputs = F.dropout(outputs, self.dorp_rate, self.training)
output1 = self.map1(outputs)
short_range6 = self.up_conv2(outputs)
outputs = self.decoder_stage2(torch.cat([short_range6, long_range3], dim=1)) + short_range6
outputs = F.dropout(outputs, self.dorp_rate, self.training)
output2 = self.map2(outputs)
short_range7 = self.up_conv3(outputs)
outputs = self.decoder_stage3(torch.cat([short_range7, long_range2], dim=1)) + short_range7
outputs = F.dropout(outputs, self.dorp_rate, self.training)
output3 = self.map3(outputs)
short_range8 = self.up_conv4(outputs)
outputs = self.decoder_stage4(torch.cat([short_range8, long_range1], dim=1)) + short_range8
output4 = self.map4(outputs)
if self.training is True:
return output1, output2, output3, output4
else:
return output4```
Solution
You can pad your image's dimensions to be multiple of 32's. By doing this, you won't have to change the 3DUnet's parameters.
I will provide you a simple code to show you the way.
# I assume that you named your input image as img
padding1_mult = math.floor(img.shape[3] / 32) + 1
padding2_mult = math.floor(img.shape[4] / 32) + 1
pad1 = (32 * padding1_mult) - img.shape[3]
pad2 = (32 * padding2_mult) - img.shape[4]
padding = nn.ReplicationPad2d((0, pad2, pad1, 0, 0 ,0))
img = padding(img)
After this operation, your image shape must be torch.Size([1, 1, 96, 192, 192])
Answered By - hus
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.