Outliers#

Importing libraries and packages#

 1# Mathematical operations and data manipulation
 2import pandas as pd
 3
 4# Visualisation
 5import seaborn as sns
 6import matplotlib.pyplot as plt
 7
 8# Warnings
 9import warnings
10
11warnings.filterwarnings("ignore")
12
13%matplotlib inline

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Loading dataset#

1# load data
2dataset = pd.read_csv(f"{data_path}/preprocessed_heart.csv")
3dataset.head().T
0 1 2 3 4
age 63.0 37.0 41.0 56.0 57.0
sex 1.0 1.0 0.0 1.0 0.0
chest_pain 3.0 2.0 1.0 1.0 0.0
rest_bp 145.0 130.0 130.0 120.0 120.0
chol 233.0 250.0 204.0 236.0 354.0
fast_bld_sugar 1.0 0.0 0.0 0.0 0.0
rest_ecg 0.0 1.0 0.0 1.0 1.0
max_hr 150.0 187.0 172.0 178.0 163.0
ex_angina 0.0 0.0 0.0 0.0 1.0
st_depr 2.3 3.5 1.4 0.8 0.6
slope 0.0 0.0 2.0 2.0 2.0
colored_vessels 0.0 0.0 0.0 0.0 0.0
thalassemia 1.0 2.0 2.0 2.0 2.0
target 1.0 1.0 1.0 1.0 1.0

Checking for Outliers#

 1sns.set(
 2    palette="pastel",
 3    rc={
 4        "figure.figsize": (12, 8),
 5        "axes.titlesize": 18,
 6        "axes.labelsize": 16,
 7        "xtick.labelsize": 16,
 8        "ytick.labelsize": 16,
 9    },
10)
11
12chol = sns.boxplot(dataset["chol"])
13plt.show()
../../_images/b9031078a84c192f54942bce3f4808c4e730d75ad8c6184641275c77a56bfbbf.png
1sd = sns.boxplot(dataset["st_depr"])
2plt.show()
../../_images/042ad636cc49a8060229f31ee151101dadf9b37742856f677de5eb924065875a.png
1cv = sns.boxplot(dataset["colored_vessels"])
2plt.show()
../../_images/ab6a8f60278dc1bcf12deb89ac40bc0858e985b0c77d6e614cdf92eea600c979.png
1t = sns.boxplot(dataset["thalassemia"])
2plt.show()
../../_images/16bd513b1ec5c27f8cb5f478f67e0ed361a942ddc8220d9f1d3366f10a7720ab.png