Data preparation and feature engineering#

Importing libraries and packages#

1# Mathematical operations and data manipulation
2import pandas as pd
3
4# Warnings
5import warnings
6
7warnings.filterwarnings("ignore")
8
9%matplotlib inline

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Loading dataset#

1# load data
2dataset = pd.read_csv(f"{data_path}/cleaned_energydata.csv")
3dataset.head().T
0 1 2 3 4
date_time 2016-01-11 17:00:00 2016-01-11 17:10:00 2016-01-11 17:20:00 2016-01-11 17:30:00 2016-01-11 17:40:00
a_energy 60 60 50 50 60
kitchen_temp 19.89 19.89 19.89 19.89 19.89
kitchen_hum 47.596667 46.693333 46.3 46.066667 46.333333
liv_temp 19.2 19.2 19.2 19.2 19.2
liv_hum 44.79 44.7225 44.626667 44.59 44.53
laun_temp 19.79 19.79 19.79 19.79 19.79
laun_hum 44.73 44.79 44.933333 45.0 45.0
off_temp 19.0 19.0 18.926667 18.89 18.89
off_hum 45.566667 45.9925 45.89 45.723333 45.53
bath_temp 17.166667 17.166667 17.166667 17.166667 17.2
bath_hum 55.2 55.2 55.09 55.09 55.09
out_b_temp 7.026667 6.833333 6.56 6.433333 6.366667
out_b_hum 84.256667 84.063333 83.156667 83.423333 84.893333
iron_temp 17.2 17.2 17.2 17.133333 17.2
iron_hum 41.626667 41.56 41.433333 41.29 41.23
teen_temp 18.2 18.2 18.2 18.1 18.1
teen_hum 48.9 48.863333 48.73 48.59 48.59
par_temp 17.033333 17.066667 17.0 17.0 17.0
par_hum 45.53 45.56 45.5 45.4 45.4
out_temp 6.6 6.483333 6.366667 6.25 6.133333
out_press 733.5 733.6 733.7 733.8 733.9
out_hum 92.0 92.0 92.0 92.0 92.0
wind 7.0 6.666667 6.333333 6.0 5.666667
visibility 63.0 59.166667 55.333333 51.5 47.666667
dew_point 5.3 5.2 5.1 5.0 4.9
rv1 13.275433 18.606195 28.642668 45.410389 10.084097
rv2 13.275433 18.606195 28.642668 45.410389 10.084097

Data preparation and feature engineering#

1dataset["date_time"] = pd.to_datetime(
2    dataset.date_time, format="%Y-%m-%d %H:%M:%S"
3)
4dataset.head().T
0 1 2 3 4
date_time 2016-01-11 17:00:00 2016-01-11 17:10:00 2016-01-11 17:20:00 2016-01-11 17:30:00 2016-01-11 17:40:00
a_energy 60 60 50 50 60
kitchen_temp 19.89 19.89 19.89 19.89 19.89
kitchen_hum 47.596667 46.693333 46.3 46.066667 46.333333
liv_temp 19.2 19.2 19.2 19.2 19.2
liv_hum 44.79 44.7225 44.626667 44.59 44.53
laun_temp 19.79 19.79 19.79 19.79 19.79
laun_hum 44.73 44.79 44.933333 45.0 45.0
off_temp 19.0 19.0 18.926667 18.89 18.89
off_hum 45.566667 45.9925 45.89 45.723333 45.53
bath_temp 17.166667 17.166667 17.166667 17.166667 17.2
bath_hum 55.2 55.2 55.09 55.09 55.09
out_b_temp 7.026667 6.833333 6.56 6.433333 6.366667
out_b_hum 84.256667 84.063333 83.156667 83.423333 84.893333
iron_temp 17.2 17.2 17.2 17.133333 17.2
iron_hum 41.626667 41.56 41.433333 41.29 41.23
teen_temp 18.2 18.2 18.2 18.1 18.1
teen_hum 48.9 48.863333 48.73 48.59 48.59
par_temp 17.033333 17.066667 17.0 17.0 17.0
par_hum 45.53 45.56 45.5 45.4 45.4
out_temp 6.6 6.483333 6.366667 6.25 6.133333
out_press 733.5 733.6 733.7 733.8 733.9
out_hum 92.0 92.0 92.0 92.0 92.0
wind 7.0 6.666667 6.333333 6.0 5.666667
visibility 63.0 59.166667 55.333333 51.5 47.666667
dew_point 5.3 5.2 5.1 5.0 4.9
rv1 13.275433 18.606195 28.642668 45.410389 10.084097
rv2 13.275433 18.606195 28.642668 45.410389 10.084097
1dataset.insert(loc=1, column="month", value=dataset.date_time.dt.month)
2dataset.insert(loc=2, column="day", value=dataset.date_time.dt.dayofweek + 1)
3dataset.head().T
0 1 2 3 4
date_time 2016-01-11 17:00:00 2016-01-11 17:10:00 2016-01-11 17:20:00 2016-01-11 17:30:00 2016-01-11 17:40:00
month 1 1 1 1 1
day 1 1 1 1 1
a_energy 60 60 50 50 60
kitchen_temp 19.89 19.89 19.89 19.89 19.89
kitchen_hum 47.596667 46.693333 46.3 46.066667 46.333333
liv_temp 19.2 19.2 19.2 19.2 19.2
liv_hum 44.79 44.7225 44.626667 44.59 44.53
laun_temp 19.79 19.79 19.79 19.79 19.79
laun_hum 44.73 44.79 44.933333 45.0 45.0
off_temp 19.0 19.0 18.926667 18.89 18.89
off_hum 45.566667 45.9925 45.89 45.723333 45.53
bath_temp 17.166667 17.166667 17.166667 17.166667 17.2
bath_hum 55.2 55.2 55.09 55.09 55.09
out_b_temp 7.026667 6.833333 6.56 6.433333 6.366667
out_b_hum 84.256667 84.063333 83.156667 83.423333 84.893333
iron_temp 17.2 17.2 17.2 17.133333 17.2
iron_hum 41.626667 41.56 41.433333 41.29 41.23
teen_temp 18.2 18.2 18.2 18.1 18.1
teen_hum 48.9 48.863333 48.73 48.59 48.59
par_temp 17.033333 17.066667 17.0 17.0 17.0
par_hum 45.53 45.56 45.5 45.4 45.4
out_temp 6.6 6.483333 6.366667 6.25 6.133333
out_press 733.5 733.6 733.7 733.8 733.9
out_hum 92.0 92.0 92.0 92.0 92.0
wind 7.0 6.666667 6.333333 6.0 5.666667
visibility 63.0 59.166667 55.333333 51.5 47.666667
dew_point 5.3 5.2 5.1 5.0 4.9
rv1 13.275433 18.606195 28.642668 45.410389 10.084097
rv2 13.275433 18.606195 28.642668 45.410389 10.084097
1dataset.to_csv(f"{data_path}/engineered_energydata.csv", index=False)