Issue
I am trying to convert a Notebook for an CNN LSTM model from Keras to Pytorch. I am struggling with the dimensions/shapes in the model definition.
def build_model():
# Inputs to the model
input_img = layers.Input(shape=(200,50,1), name="image", dtype="float32")
labels = layers.Input(name="label", shape=(None,), dtype="float32")
# First conv block
x = layers.Conv2D(32,(3, 3),activation="relu",kernel_initializer="he_normal",padding="same",name="Conv1")(input_img)
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
# Second conv block
x = layers.Conv2D(64,(3, 3),activation="relu",kernel_initializer="he_normal",padding="same",name="Conv2")(x)
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
# We have used two max pool with pool size and strides 2.
# Hence, downsampled feature maps are 4x smaller. The number of
# filters in the last layer is 64. Reshape accordingly before
# passing the output to the RNN part of the model
x = layers.Reshape(target_shape=(50, 768), name="reshape")(x)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
# Output layer
x = layers.Dense(20, activation="softmax", name="dense2")(x) # 20 = 19 characters + UKN
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
# Define the model
model = keras.models.Model(inputs=[input_img, labels], outputs=output, name="ocr_cnn_lstm_model")
# Compile the model and return
model.compile(optimizer=keras.optimizers.Adam())
return model
Currently I only have the first 2 convolutional layers, which are already not working.:
# X_train Shape: (832, 1, 50, 200)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Defining a 2D convolution layer
self.conv = nn.Conv2d(1, 32, kernel_size=3, padding = 'same')
# Defining another 2D convolution layer
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding ='same')
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.relu = nn.ReLU(inplace=True)
self.out = nn.Linear(64 * 7 * 7, 10)
# Defining the forward pass
def forward(self, x):
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
print(x.shape)
x = x.view(x.size(0),-1)
X = self.out(x)
return x
It would be appreciated if someone could help me out with the input shapes (especially in nn.Linear but I doubt the rest corresponds to the initial notebook either).
When I try to run the model I get:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_9064/4102025856.py in <module>
----> 1 out = model(torch.Tensor(X_train))
~/env/neural/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/tmp/ipykernel_9064/3113669837.py in forward(self, x)
25 x = x.view(x.size(0),-1)
26
---> 27 X = self.out(x)
28 return x
~/env/neural/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
~/env/neural/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input)
94
95 def forward(self, input: Tensor) -> Tensor:
---> 96 return F.linear(input, self.weight, self.bias)
97
98 def extra_repr(self) -> str:
~/env/neural/lib/python3.7/site-packages/torch/nn/functional.py in linear(input, weight, bias)
1845 if has_torch_function_variadic(input, weight):
1846 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1847 return torch._C._nn.linear(input, weight, bias)
1848
1849
RuntimeError: mat1 and mat2 shapes cannot be multiplied (832x80000 and 3136x10)
Thanks in advance.
Solution
This works. You didn't use the good input shape for the linear layer.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# Defining a 2D convolution layer
self.conv = nn.Conv2d(1, 32, kernel_size=3, padding=2)
# Defining another 2D convolution layer
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=2)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.relu = nn.ReLU(inplace=True)
self.out = nn.Linear(32 * 26 * 101, 10)
# Defining the forward pass
def forward(self, x):
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
print(x.shape)
# torch.Size([832, 32, 26, 101])
x = x.view(x.size(0),-1)
X = self.out(x)
return x
if __name__ == "__main__":
x = torch.randn(832, 1, 50, 200)
net = Net()
out = net(x)
Answered By - Chopin
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.