Issue
I have built a custom Peephole LSTM Layer for TensorFlow:
import tensorflow as tf
class PeepholeLSTM(tf.keras.layers.Layer):
def __init__(self, units, activation='tanh', return_sequences=False, **kwargs):
super(PeepholeLSTM, self).__init__(**kwargs)
self.units = units
self.activation = tf.keras.activations.get(activation)
self.return_sequences = return_sequences
def build(self, input_shape):
input_dim = input_shape[-1]
# Create weights for the LSTM cell
self.Wf = self.add_weight(name='Wf', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uf = self.add_weight(name='Uf', shape=(self.units, self.units), initializer='orthogonal')
self.bf = self.add_weight(name='bf', shape=(self.units,), initializer='zeros')
self.Wi = self.add_weight(name='Wi', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Ui = self.add_weight(name='Ui', shape=(self.units, self.units), initializer='orthogonal')
self.bi = self.add_weight(name='bi', shape=(self.units,), initializer='zeros')
self.Wc = self.add_weight(name='Wc', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uc = self.add_weight(name='Uc', shape=(self.units, self.units), initializer='orthogonal')
self.bc = self.add_weight(name='bc', shape=(self.units,), initializer='zeros')
self.Wo = self.add_weight(name='Wo', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uo = self.add_weight(name='Uo', shape=(self.units, self.units), initializer='orthogonal')
self.bo = self.add_weight(name='bo', shape=(self.units,), initializer='zeros')
self.c_peephole = self.add_weight(name='c_peephole', shape=(self.units,), initializer='zeros')
self.o_peephole = self.add_weight(name='o_peephole', shape=(self.units,), initializer='zeros')
self.built = True
def call(self, inputs):
# Initialize states
batch_size, seq_length, _ = inputs.shape
h_tm1 = tf.zeros(shape=(batch_size, self.units))
c_tm1 = tf.zeros(shape=(batch_size, self.units))
outputs = []
for t in range(seq_length):
x_t = inputs[:, t, :]
f = tf.sigmoid(tf.matmul(x_t, self.Wf) + tf.matmul(h_tm1, self.Uf) + self.bf + self.c_peephole * c_tm1)
i = tf.sigmoid(tf.matmul(x_t, self.Wi) + tf.matmul(h_tm1, self.Ui) + self.bi)
c = f * c_tm1 + i * self.activation(tf.matmul(x_t, self.Wc) + tf.matmul(h_tm1, self.Uc) + self.bc)
o = tf.sigmoid(tf.matmul(x_t, self.Wo) + tf.matmul(h_tm1, self.Uo) + self.bo + self.o_peephole * c)
h = o * self.activation(c)
outputs.append(h)
h_tm1 = h
c_tm1 = c
if self.return_sequences:
return tf.stack(outputs, axis=1)
else:
return h
Which I compile using the following:
def LSTM(inpshape, oupshape, NHLayer=7, LR=0.002):
Input = keras.Input(shape=inpshape)
Nnode = [int(round(o)) for o in np.linspace(inpshape[1], oupshape[1], NHLayer)]
rnn = PeepholeLSTM(inpshape[1], activation=tf.nn.elu, return_sequences=True)(Input)
for o, n in enumerate(Nnode):
rnn = PeepholeLSTM(n, activation=tf.nn.elu, return_sequences=True)(rnn)
if o % 3 == 0 and o != NHLayer - 1:
rnn = keras.layers.Dropout(rate=0.2)(rnn)
Output = keras.layers.Dense(oupshape[1], activation=keras.activations.linear)(rnn)
model = keras.Model(inputs=Input, outputs=Output)
optimizer = keras.optimizers.RMSprop(LR)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
return model
When I run this code I get the following error:
Cell In[3], line 84, in LSTM(inpdata, oupdata, NHLayer, LR)
82 Input = keras.Input(shape=inpshape)
83 Nnode = [int(round(o)) for o in np.linspace(inpshape[1], oupshape[1], NHLayer)]
---> 84 rnn = PeepholeLSTM(inpshape[1], activation=tf.nn.elu, return_sequences=True)(Input)
85 for o, n in enumerate(Nnode):
86 rnn = PeepholeLSTM(max([n, Nnode[0]]), activation=tf.nn.elu, return_sequences=True)(rnn)
File ~/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /tmp/__autograph_generated_filelmzr4_60.py:12, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, inputs)
10 ag__.ld(print)(ag__.ld(self).units, ag__.ld(inputs).shape)
11 batch_size, seq_length, _ = ag__.ld(inputs).shape
---> 12 h_tm1 = ag__.converted_call(ag__.ld(tf).zeros, (), dict(shape=(ag__.ld(batch_size), ag__.ld(self).units)), fscope)
13 c_tm1 = ag__.converted_call(ag__.ld(tf).zeros, (), dict(shape=(ag__.ld(batch_size), ag__.ld(self).units)), fscope)
14 outputs = []
TypeError: Exception encountered when calling layer "peephole_lstm" (type PeepholeLSTM).
in user code:
File "/home/hgc-swin/Thesan1/peephole.py", line 39, in call *
h_tm1 = tf.zeros(shape=(batch_size, self.units))
TypeError: Expected int32, but got None of type 'NoneType'.
Call arguments received by layer "peephole_lstm" (type PeepholeLSTM):
• inputs=tf.Tensor(shape=(None, 81, 6), dtype=float32)
I realise this is coming from the batch_size
parameter being a None
variable through definition, but isn't that meant to be the case such that the model can be applied to a dataset of any size? How would I modify this?
Using TensorFlow version 2.13.0
Solution
Managed to fix this myself. There were two issues. First, the TensorFlow shape object was needed to parse the dimensions. Second, a TensorArray was necessary to retain time iterations. Working model is as follows:
import tensorflow as tf
class PeepholeLSTM(tf.keras.layers.Layer):
def __init__(self, units, activation='tanh', return_sequences=False, **kwargs):
super(PeepholeLSTM, self).__init__(**kwargs)
self.units = units
self.activation = tf.keras.activations.get(activation)
self.return_sequences = return_sequences
def build(self, input_shape):
input_dim = input_shape[-1]
# Create weights for the LSTM cell
self.Wf = self.add_weight(name='Wf', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uf = self.add_weight(name='Uf', shape=(self.units, self.units), initializer='orthogonal')
self.bf = self.add_weight(name='bf', shape=(self.units,), initializer='zeros')
self.Wi = self.add_weight(name='Wi', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Ui = self.add_weight(name='Ui', shape=(self.units, self.units), initializer='orthogonal')
self.bi = self.add_weight(name='bi', shape=(self.units,), initializer='zeros')
self.Wc = self.add_weight(name='Wc', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uc = self.add_weight(name='Uc', shape=(self.units, self.units), initializer='orthogonal')
self.bc = self.add_weight(name='bc', shape=(self.units,), initializer='zeros')
self.Wo = self.add_weight(name='Wo', shape=(input_dim, self.units), initializer='glorot_uniform')
self.Uo = self.add_weight(name='Uo', shape=(self.units, self.units), initializer='orthogonal')
self.bo = self.add_weight(name='bo', shape=(self.units,), initializer='zeros')
self.c_peephole = self.add_weight(name='c_peephole', shape=(self.units,), initializer='zeros')
self.o_peephole = self.add_weight(name='o_peephole', shape=(self.units,), initializer='zeros')
self.built = True
def call(self, inputs):
# Initialize states
batch_size = tf.shape(inputs)[0]
seq_length = tf.shape(inputs)[1]
h_tm1 = tf.zeros(shape=(batch_size, self.units))
c_tm1 = tf.zeros(shape=(batch_size, self.units))
outputs = tf.TensorArray(tf.float32, size=seq_length)
for t in range(seq_length):
x_t = inputs[:, t, :]
f = tf.sigmoid(tf.matmul(x_t, self.Wf) + tf.matmul(h_tm1, self.Uf) + self.bf + self.c_peephole * c_tm1)
i = tf.sigmoid(tf.matmul(x_t, self.Wi) + tf.matmul(h_tm1, self.Ui) + self.bi)
c = f * c_tm1 + i * self.activation(tf.matmul(x_t, self.Wc) + tf.matmul(h_tm1, self.Uc) + self.bc)
o = tf.sigmoid(tf.matmul(x_t, self.Wo) + tf.matmul(h_tm1, self.Uo) + self.bo + self.o_peephole * c)
h = o * self.activation(c)
outputs = outputs.write(t, h)
h_tm1 = h
c_tm1 = c
if self.return_sequences:
return outputs.stack()
else:
return h
Answered By - Harry Chittenden
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.