Issue
For TensorFlow 2.6, Python 3.9 & CIFAR-10 dataset, I am trying to train a simple Conv Neural Network model defined as follows:
def conv6_cnn():
"""
Function to define the architecture of a neural network model
following Conv-6 architecture for CIFAR-10 dataset and using
provided parameter which are used to prune the model.
Conv-6 architecture-
64, 64, pool -- convolutional layers
128, 128, pool -- convolutional layers
256, 256, pool -- convolutional layers
256, 256, 10 -- fully connected layers
Output: Returns designed and compiled neural network model
"""
# l = tf.keras.layers
model = Sequential()
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same',
input_shape=(32, 32, 3)
)
)
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.keras.initializers.GlorotNormal(),
strides = (1, 1), padding = 'same'
)
)
model.add(
MaxPooling2D(
pool_size = (2, 2),
strides = (2, 2)
)
)
model.add(Flatten())
model.add(
Dense(
units = 256, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
)
model.add(
Dense(
units = 256, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
)
model.add(
Dense(
units = 10, activation = 'softmax'
)
)
return model
# Initialize a Conv-6 CNN object-
model = conv6_cnn()
# Define data Augmentation using ImageDataGenerator:
# Initialize and define the image data generator-
datagen = ImageDataGenerator(
# featurewise_center=True,
# featurewise_std_normalization=True,
rotation_range = 90,
width_shift_range = 0.1,
height_shift_range = 0.1,
horizontal_flip = True
)
# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)
# Compile defined model-
model.compile(
optimizer = optimizer,
loss = loss_fn,
metrics = ['accuracy']
)
# Define early stopping criterion-
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor = 'val_loss', min_delta = 0.001,
patience = 4, verbose = 0,
mode = 'auto', baseline = None,
restore_best_weights = True
)
When I train this CNN model without any data augmentation with the following code, there seems to be no problem:
# Train model without any data augmentation-
history = model.fit(
x = X_train, y = y_train,
batch_size = batch_size, epochs = num_epochs,
callbacks = [early_stopping],
validation_data = (X_test, y_test)
)
However, when using data (image) augmentation:
# Train model on batches with real-time data augmentation-
training_history = model.fit(
datagen.flow(
X_train, y_train,
batch_size = batch_size, subset = 'training'
),
validation_data = (X_test, y_test),
steps_per_epoch = len(X_train) / batch_size,
epochs = num_epochs,
callbacks = [early_stopping]
)
it gives the error:
ValueError: Training and validation subsets have different number of classes after the split. If your numpy arrays are sorted by the label, you might want to shuffle them.
Solution
You just have to remove the parameter subset='training'
, since you did not set a validation_split
in the ImageDataGenerator
. Both parameters have to be set in order to work or you just do not use them:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
# featurewise_center=True,
# featurewise_std_normalization=True,
rotation_range = 90,
width_shift_range = 0.1,
height_shift_range = 0.1,
horizontal_flip = True
)
datagen.fit(x_train)
# Compile defined model-
model.compile(
optimizer = tf.keras.optimizers.Adam(),
loss = tf.keras.losses.CategoricalCrossentropy(),
metrics = ['accuracy']
)
# Define early stopping criterion-
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor = 'val_loss', min_delta = 0.001,
patience = 4, verbose = 0,
mode = 'auto', baseline = None,
restore_best_weights = True
)
batch_size = 32
training_history = model.fit(
datagen.flow(
x_train, y_train,
batch_size = batch_size
),
steps_per_epoch = len(x_train) // batch_size,
epochs = 2,
callbacks = [early_stopping])
Check the docs for more information.
Answered By - AloneTogether
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.