Issue
I am trying to train some machine learning models to predict the price action for 4 chosen stocks from a list of NASDAQ-100 stocks.
I am very new to Python, so I've run into a few issues I have not been able to fix. The first has been while trying to use the LSTM model. I get the following error upon executing my code:
ValueError: Unexpected result of predict_function (Empty batch_outputs). Please use Model.compile(..., run_eagerly=True), or tf.config.run_functions_eagerly(True) for more information of where went wrong, or file a issue/bug to tf.keras.
The model seems to work upon passing in a Dataframe but not a Series with one stock. Why is this? I only want to run the model using one stock (eg. CTSH), so how can I do this?
run_eagerly=True
did not provide any additional information as to what was going wrong.
This is my code:
# Imports
import os
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
from PIL import Image
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
# Chosen stocks from NASDAQ-100
chosen_stocks = ['CTSH', 'BKNG', 'REGN', 'MSFT']
def get_data():
# Get list of tickers
tickers = open("dataset/nasdaq_100_tickers.txt", "r")
data = tickers.read().splitlines()
# Check if the data has already been downloaded, drop NaN values
if os.path.exists('dataframe.csv'):
dataframe = pd.read_csv('dataframe.csv', index_col="Date", parse_dates=True).dropna()
else:
# Download Close data from Yahoo Finance
data = yf.download(tickers=data, period='1y', interval='1d')['Close']
data.to_csv('dataframe.csv')
# Convert array to Pandas dataframe, drop NaN values
complete_data = data.dropna()
dataframe = pd.DataFrame(complete_data)
dataframe.drop(['GEHC'], axis=1, inplace=True) # Dropping GEHC because it contains NULL values
return dataframe
def get_ctsh_data(dataframe):
get_ctsh_data = dataframe.iloc[:, 30]
return get_ctsh_data
# LSTM stock predictions on Close
def lstm_prediction(dataframe):
keras = tf.keras
Sequential = keras.models.Sequential
Dense = keras.layers.Dense
LSTM = keras.layers.LSTM
# Scale down data
scaler = MinMaxScaler()
dataframe = scaler.fit_transform(np.array(dataframe).reshape(-1, 1))
dataframe.shape
# Use 65% of data for training & 35% for testing
train_size = int(len(dataframe) * 0.65)
test_size = len(dataframe) - train_size
train_data, test_data = dataframe[0:train_size, :], dataframe[train_size:len(dataframe), :1]
# Create a data matrix
def create_dataset(dataset, time_step = 1):
input_data, output_data = [], []
for i in range(len(dataset)-time_step-1):
a = dataset[i:(i+time_step), 0]
input_data.append(a)
output_data.append(dataset[i + time_step, 0])
return np.array(input_data), np.array(output_data)
# Calling the create dataset function to split the data into input output datasets with time
# step 100
time_step = 100
input_train, output_train = create_dataset(train_data, time_step)
input_test, output_test = create_dataset(test_data, time_step)
# checking values
print("Checking values:")
print(input_train.shape)
print(input_train)
print(input_test.shape)
print(output_test.shape)
# Create and fit LSTM model - 4 layers (1 input, 2 hidden, 1 Dense output) & 50 neurons
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(input_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', run_eagerly=True) # Adam optimizer - mean squared error
model.summary()
model.fit(input_train, output_train, validation_data=(input_test, output_test), epochs=10, batch_size=64, verbose=1)
train_predict = model.predict(input_train)
test_predict = model.predict(input_test)
# Transform to original form
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
print("Mean Squared Errors:")
print(math.sqrt(mean_squared_error(output_train, train_predict)))
print(math.sqrt(mean_squared_error(output_test, test_predict)))
# If difference is less than 50 - model is good
look_back = 100 # Takes the number of values behind the current value
train_predict_plot = np.empty_like(dataframe)
train_predict_plot[:, :] = np.nan
train_predict_plot[look_back: len(train_predict) + look_back, :] = train_predict
test_predict_plot = np.empty_like(dataframe)
test_predict_plot[:, :] = np.nan
test_predict_plot[len(train_predict) + (look_back) * 2 + 1: len(dataframe) - 1, :] = test_predict
plt.plot(scaler.inverse_transform(dataframe))
plt.plot(train_predict_plot)
plt.plot(test_predict_plot)
plt.show()
dataframe = get_data()
# Make Series with CTSH Close data
ctsh_data = get_ctsh_data(dataframe)
lstm_prediction(ctsh_data)
My dataframe looks like this:
AAPL ABNB ... ZM ZS
Date ...
2022-12-15 136.500000 90.610001 ... 70.199997 117.169998
2022-12-16 134.509995 89.570000 ... 69.860001 114.209999
2022-12-19 132.369995 85.930000 ... 69.089996 112.269997
2022-12-20 132.300003 87.620003 ... 68.559998 113.540001
2022-12-21 135.449997 87.070000 ... 69.930000 112.769997
... ... ... ... ... ...
2023-11-28 190.399994 127.559998 ... 67.529999 193.850006
2023-11-29 189.369995 126.480003 ... 67.949997 199.839996
2023-11-30 189.949997 126.339996 ... 67.830002 197.529999
2023-12-01 191.240005 135.020004 ... 70.290001 198.029999
2023-12-04 188.669998 134.539993 ... 67.720001 197.919998
The full traceback is:
C:\Users\xxx\source\repos\Project\venv\Scripts\python.exe C:/Users/xxx/source/repos/Project/main.py
Checking values:
(56, 100)
[[0.08179096 0.03462697 0. ... 0.39761184 0.39701493 0.45253732]
[0.03462697 0. 0.00358217 ... 0.39701493 0.45253732 0.4119403 ]
[0. 0.00358217 0.03880605 ... 0.45253732 0.4119403 0.3988059 ]
...
[0.45552235 0.38447744 0.39104464 ... 0.76776083 0.76298506 0.67522381]
[0.38447744 0.39104464 0.35402979 ... 0.76298506 0.67522381 0.6435821 ]
[0.39104464 0.35402979 0.32238808 ... 0.67522381 0.6435821 0.6597013 ]]
(0,)
(0,)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (None, 100, 50) 10400
lstm_1 (LSTM) (None, 100, 50) 20200
lstm_2 (LSTM) (None, 50) 20200
dense (Dense) (None, 1) 51
=================================================================
Total params: 50851 (198.64 KB)
Trainable params: 50851 (198.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
1/1 [==============================] - 1s 814ms/step - loss: 0.2619
Epoch 2/10
1/1 [==============================] - 1s 671ms/step - loss: 0.1607
Epoch 3/10
1/1 [==============================] - 1s 670ms/step - loss: 0.0753
Epoch 4/10
1/1 [==============================] - 1s 868ms/step - loss: 0.0173
Epoch 5/10
1/1 [==============================] - 1s 613ms/step - loss: 0.0157
Epoch 6/10
1/1 [==============================] - 1s 615ms/step - loss: 0.0539
Epoch 7/10
1/1 [==============================] - 1s 611ms/step - loss: 0.0489
Epoch 8/10
1/1 [==============================] - 1s 616ms/step - loss: 0.0251
Epoch 9/10
1/1 [==============================] - 1s 596ms/step - loss: 0.0098
Epoch 10/10
1/1 [==============================] - 1s 611ms/step - loss: 0.0079
2/2 [==============================] - 0s 225ms/step
Traceback (most recent call last):
File "C:/Users/xxx/source/repos/Project/main.py", line 395, in <module>
lstm_prediction(ctsh_data)
File "C:/Users/xxx/source/repos/Project/main.py", line 313, in lstm_prediction
test_predict = model.predict(input_test)
File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\xxx\source\repos\Project\venv\lib\site-packages\keras\src\engine\training.py", line 2579, in predict
raise ValueError(
ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.
Process finished with exit code 1
Any help is appreciated.
Solution
I discovered that the cause of the error was due to setting time_step
to 100 - a number far too high. Setting it to a lower number (eg 10) fixed the issue.
I also had to change look_back
to a lower number as well as a result of this, so I also chose 10.
Thank you everybody for your help!
Answered By - custardbun
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.