AnAge#

Visualize the correlation between various animal attributes using scatter plots and marginal histograms.

Importing libraries and packages#

 1# Mathematical operations and data manipulation
 2import pandas as pd
 3import numpy as np
 4
 5# Plotting
 6import matplotlib.pyplot as plt
 7
 8# Warnings
 9import warnings
10
11warnings.filterwarnings("ignore")
12
13%matplotlib inline

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Loading dataset#

1dataset = pd.read_csv(f"{data_path}/anage_data.csv")

Exploring dataset#

1# Shape of the dataset
2print("Shape of the dataset: ", dataset.shape)
3# Head
4dataset
Shape of the dataset:  (4218, 30)
Unnamed: 0 HAGRID Kingdom Phylum Class Order Family Genus Species Common name ... Growth rate (1/days) Maximum longevity (yrs) Specimen origin Sample size Data quality IMR (per yr) MRDT (yrs) Metabolic rate (W) Body mass (g) Temperature (K)
0 0 3 Animalia Arthropoda Branchiopoda Diplostraca Daphniidae Daphnia pulicaria Daphnia ... NaN 0.19 unknown medium acceptable NaN NaN NaN NaN NaN
1 1 5 Animalia Arthropoda Insecta Diptera Drosophilidae Drosophila melanogaster Fruit fly ... NaN 0.30 captivity large acceptable 0.05 0.04 NaN NaN NaN
2 2 6 Animalia Arthropoda Insecta Hymenoptera Apidae Apis mellifera Honey bee ... NaN 8.00 unknown medium acceptable NaN NaN NaN NaN NaN
3 3 8 Animalia Arthropoda Insecta Hymenoptera Formicidae Cardiocondyla obscurior Cardiocondyla obscurior ... NaN 0.50 captivity medium acceptable NaN NaN NaN NaN NaN
4 4 9 Animalia Arthropoda Insecta Hymenoptera Formicidae Lasius niger Black garden ant ... NaN 28.00 unknown medium acceptable NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4213 4214 4239 Animalia Porifera Hexactinellida Lyssacinosida Rossellidae Scolymastra joubini Hexactinellid sponge ... NaN 15000.00 wild medium questionable NaN NaN NaN NaN NaN
4214 4215 4241 Plantae Pinophyta Pinopsida Pinales Pinaceae Pinus longaeva Great Basin bristlecone pine ... NaN 5062.00 wild medium acceptable NaN 999.00 NaN NaN NaN
4215 4216 4242 Fungi Ascomycota Saccharomycetes Saccharomycetales Saccharomycetaceae Saccharomyces cerevisiae Baker's yeast ... NaN 0.04 captivity large acceptable NaN NaN NaN NaN NaN
4216 4217 4243 Fungi Ascomycota Schizosaccharomycetes Schizosaccharomycetales Schizosaccharomycetaceae Schizosaccharomyces pombe Fission yeast ... NaN NaN unknown small low NaN NaN NaN NaN NaN
4217 4218 4244 Fungi Ascomycota Sordariomycetes Sordariales Lasiosphaeriaceae Podospora anserina Filamentous fungus ... NaN NaN unknown small low NaN NaN NaN NaN NaN

4218 rows × 30 columns

Preprocessing#

1# Filtering the data to samples containing a body mass and a maximum longevity.
2# Selecting all samples of the class Aves and with a body mass smaller than
3# 20,000.
4longevity = "Maximum longevity (yrs)"
5mass = "Body mass (g)"
6dataset = dataset[np.isfinite(dataset[longevity]) & np.isfinite(dataset[mass])]
7# Sort according to class
8aves = dataset[dataset["Class"] == "Aves"]
9aves = aves[aves[mass] < 20000]

Visualisation#

 1# Create figure
 2fig = plt.figure(figsize=(8, 8), dpi=150, constrained_layout=True)
 3# Create gridspec
 4gs = fig.add_gridspec(4, 4)
 5# Specify subplots
 6histx_ax = fig.add_subplot(gs[0, :-1])
 7histy_ax = fig.add_subplot(gs[1:, -1])
 8scatter_ax = fig.add_subplot(gs[1:, :-1])
 9# Create plots
10scatter_ax.scatter(aves[mass], aves[longevity])
11histx_ax.hist(aves[mass], bins=20, density=True)
12histx_ax.set_xticks([])
13histy_ax.hist(aves[longevity], bins=20, density=True, orientation="horizontal")
14histy_ax.set_yticks([])
15# Add labels and title
16plt.xlabel("Body mass in grams")
17plt.ylabel("Maximum longevity in years")
18fig.suptitle("Scatter plot with marginal histograms")
19# Show plot
20plt.show()
../../_images/8bfee1d8f6f3abdf2410080e95d2732c64ccd7216f1876b3a4b2a129f0d78a71.png