IBM stock price prediction#
Predicting the next day’s stock price based on historical prices using a cleaned-up version of Apple’s historical stock data sourced from the Nasdaq website.
1raise SystemExit("Stop right there!");
An exception has occurred, use %tb to see the full traceback.
SystemExit: Stop right there!
Importing libraries and packages#
1# System
2import os
3
4# Mathematical operations and data manipulation
5import numpy as np
6import pandas as pd
7import math
8
9# Modelling
10from sklearn.preprocessing import MinMaxScaler
11import tensorflow as tf
12import keras
13from tensorflow.keras import layers
14
15# Plotting
16import matplotlib.pyplot as plt
17from IPython.display import display, HTML
18
19%matplotlib inline
20display(HTML("<style>.container {width:80% !important;}</style>"))
21
22print("Tensorflow version:", tf.__version__)
23print("Keras version:", keras.__version__)
Tensorflow version: 2.4.1
Keras version: 2.4.3
1os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
Set paths#
1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"
Loading dataset#
1dataset = pd.read_csv(f"{data_path}/IBM.csv")
1dataset.head()
Date | Close | Volume | Open | High | Low | |
---|---|---|---|---|---|---|
0 | 1/24/2020 | 140.56 | 5580189 | 143.39 | 143.9200 | 140.46 |
1 | 1/23/2020 | 142.87 | 5657790 | 144.20 | 144.4097 | 142.15 |
2 | 1/22/2020 | 143.89 | 16470430 | 143.32 | 145.7900 | 142.55 |
3 | 1/21/2020 | 139.17 | 7244079 | 137.81 | 139.3500 | 137.60 |
4 | 1/17/2020 | 138.31 | 5623336 | 136.54 | 138.3300 | 136.16 |
1dataset.tail()
Date | Close | Volume | Open | High | Low | |
---|---|---|---|---|---|---|
2513 | 1/29/2010 | 122.39 | 11571890 | 124.32 | 125.000 | 121.90 |
2514 | 1/28/2010 | 123.75 | 9616132 | 127.03 | 127.040 | 123.05 |
2515 | 1/27/2010 | 126.33 | 8719147 | 125.82 | 126.960 | 125.04 |
2516 | 1/26/2010 | 125.75 | 7135190 | 125.92 | 127.750 | 125.41 |
2517 | 1/25/2010 | 126.12 | 5738455 | 126.33 | 126.895 | 125.71 |
Exploring dataset#
1dataset.plot("Date", "Close")
2plt.show()
1# Reversing the data for convenience of plotting and handling
2dataset = dataset.sort_index(ascending=False)
1# Extracting values for ‘Close’ from the dataframe as a numpy array.
2ts_data = dataset.Close.values.reshape(-1, 1)
1plt.figure(figsize=(14, 5))
2plt.plot(ts_data)
3plt.show()
Preparing the data#
1# Preparing the data for stock price prediction
2train_recs = int(len(ts_data) * 0.75)
3
4train_data = ts_data[:train_recs]
5test_data = ts_data[train_recs:]
6
7len(train_data), len(test_data)
(1888, 630)
1# Scaling
2scaler = MinMaxScaler()
3train_scaled = scaler.fit_transform(train_data)
4test_scaled = scaler.transform(test_data)
1def get_lookback(inp, look_back):
2 y = pd.DataFrame(inp)
3 dataX = [y.shift(i) for i in range(1, look_back + 1)]
4 dataX = pd.concat(dataX, axis=1)
5 dataX.fillna(0, inplace=True)
6 return dataX.values, y.values
1look_back = 10
2trainX, trainY = get_lookback(train_scaled, look_back=look_back)
3testX, testY = get_lookback(test_scaled, look_back=look_back)
1trainX.shape, testX.shape
((1888, 10), (630, 10))
Hybrid model#
1# Training a hybrid (1D conv + RNN) model, with a
2model_hybrid = tf.keras.Sequential(
3 [
4 layers.Reshape((look_back, 1), input_shape=(look_back,)),
5 layers.Conv1D(5, 3, activation="relu"),
6 layers.SimpleRNN(32),
7 layers.Dropout(0.25),
8 layers.Dense(1),
9 layers.Activation("linear"),
10 ]
11)
1model_hybrid.compile(loss="mean_squared_error", optimizer="adam")
1model_hybrid.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
reshape (Reshape) (None, 10, 1) 0
_________________________________________________________________
conv1d (Conv1D) (None, 8, 5) 20
_________________________________________________________________
simple_rnn (SimpleRNN) (None, 32) 1216
_________________________________________________________________
dropout (Dropout) (None, 32) 0
_________________________________________________________________
dense (Dense) (None, 1) 33
_________________________________________________________________
activation (Activation) (None, 1) 0
=================================================================
Total params: 1,269
Trainable params: 1,269
Non-trainable params: 0
_________________________________________________________________
1model_hybrid.fit(
2 trainX, trainY, epochs=3, batch_size=1, verbose=2, validation_split=0.1
3)
Epoch 1/3
1699/1699 - 18s - loss: 0.0089 - val_loss: 0.0012
Epoch 2/3
1699/1699 - 20s - loss: 0.0049 - val_loss: 9.7144e-04
Epoch 3/3
1699/1699 - 18s - loss: 0.0037 - val_loss: 7.5165e-04
<tensorflow.python.keras.callbacks.History at 0x7f4c82a63970>
1def calculate_performance(model_obj):
2
3 score_train = model_obj.evaluate(trainX, trainY, verbose=0)
4 print("Train RMSE: %.2f RMSE" % (math.sqrt(score_train)))
5
6 score_test = model_obj.evaluate(testX, testY, verbose=0)
7 print("Test RMSE: %.2f RMSE" % (math.sqrt(score_test)))
8
9
10calculate_performance(model_hybrid)
Train RMSE: 0.03 RMSE
Test RMSE: 0.04 RMSE
1def plot_prediction(model_obj):
2 testPredict = scaler.inverse_transform(model_obj.predict(testX))
3
4 pred_test_plot = ts_data.copy()
5 pred_test_plot[: train_recs + look_back, :] = np.nan
6 pred_test_plot[train_recs + look_back :, :] = testPredict[look_back:]
7
8 plt.plot(ts_data)
9 plt.plot(pred_test_plot, "--")
10
11
12plt.figure(figsize=(10, 5))
13plot_prediction(model_hybrid)