Animals#

Visualize the correlation between the various animal attributes such as Maximum longevity in years and Body mass in grams.

Importing libraries and packages#

 1# Mathematical operations and data manipulation
 2import pandas as pd
 3import numpy as np
 4
 5# Plotting
 6import matplotlib.pyplot as plt
 7
 8# Warnings
 9import warnings
10
11warnings.filterwarnings("ignore")
12
13%matplotlib inline

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Loading dataset#

1dataset = pd.read_csv(f"{data_path}/anage_data.csv")

Exploring dataset#

1# Shape of the dataset
2print("Shape of the dataset: ", dataset.shape)
3# Head
4dataset
Shape of the dataset:  (4218, 30)
Unnamed: 0 HAGRID Kingdom Phylum Class Order Family Genus Species Common name ... Growth rate (1/days) Maximum longevity (yrs) Specimen origin Sample size Data quality IMR (per yr) MRDT (yrs) Metabolic rate (W) Body mass (g) Temperature (K)
0 0 3 Animalia Arthropoda Branchiopoda Diplostraca Daphniidae Daphnia pulicaria Daphnia ... NaN 0.19 unknown medium acceptable NaN NaN NaN NaN NaN
1 1 5 Animalia Arthropoda Insecta Diptera Drosophilidae Drosophila melanogaster Fruit fly ... NaN 0.30 captivity large acceptable 0.05 0.04 NaN NaN NaN
2 2 6 Animalia Arthropoda Insecta Hymenoptera Apidae Apis mellifera Honey bee ... NaN 8.00 unknown medium acceptable NaN NaN NaN NaN NaN
3 3 8 Animalia Arthropoda Insecta Hymenoptera Formicidae Cardiocondyla obscurior Cardiocondyla obscurior ... NaN 0.50 captivity medium acceptable NaN NaN NaN NaN NaN
4 4 9 Animalia Arthropoda Insecta Hymenoptera Formicidae Lasius niger Black garden ant ... NaN 28.00 unknown medium acceptable NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4213 4214 4239 Animalia Porifera Hexactinellida Lyssacinosida Rossellidae Scolymastra joubini Hexactinellid sponge ... NaN 15000.00 wild medium questionable NaN NaN NaN NaN NaN
4214 4215 4241 Plantae Pinophyta Pinopsida Pinales Pinaceae Pinus longaeva Great Basin bristlecone pine ... NaN 5062.00 wild medium acceptable NaN 999.00 NaN NaN NaN
4215 4216 4242 Fungi Ascomycota Saccharomycetes Saccharomycetales Saccharomycetaceae Saccharomyces cerevisiae Baker's yeast ... NaN 0.04 captivity large acceptable NaN NaN NaN NaN NaN
4216 4217 4243 Fungi Ascomycota Schizosaccharomycetes Schizosaccharomycetales Schizosaccharomycetaceae Schizosaccharomyces pombe Fission yeast ... NaN NaN unknown small low NaN NaN NaN NaN NaN
4217 4218 4244 Fungi Ascomycota Sordariomycetes Sordariales Lasiosphaeriaceae Podospora anserina Filamentous fungus ... NaN NaN unknown small low NaN NaN NaN NaN NaN

4218 rows × 30 columns

Cleaning dataset#

1dataset.drop(dataset.columns[[0]], axis=1, inplace=True)
2dataset.head()
HAGRID Kingdom Phylum Class Order Family Genus Species Common name Female maturity (days) ... Growth rate (1/days) Maximum longevity (yrs) Specimen origin Sample size Data quality IMR (per yr) MRDT (yrs) Metabolic rate (W) Body mass (g) Temperature (K)
0 3 Animalia Arthropoda Branchiopoda Diplostraca Daphniidae Daphnia pulicaria Daphnia NaN ... NaN 0.19 unknown medium acceptable NaN NaN NaN NaN NaN
1 5 Animalia Arthropoda Insecta Diptera Drosophilidae Drosophila melanogaster Fruit fly 7.0 ... NaN 0.30 captivity large acceptable 0.05 0.04 NaN NaN NaN
2 6 Animalia Arthropoda Insecta Hymenoptera Apidae Apis mellifera Honey bee NaN ... NaN 8.00 unknown medium acceptable NaN NaN NaN NaN NaN
3 8 Animalia Arthropoda Insecta Hymenoptera Formicidae Cardiocondyla obscurior Cardiocondyla obscurior NaN ... NaN 0.50 captivity medium acceptable NaN NaN NaN NaN NaN
4 9 Animalia Arthropoda Insecta Hymenoptera Formicidae Lasius niger Black garden ant NaN ... NaN 28.00 unknown medium acceptable NaN NaN NaN NaN NaN

5 rows × 29 columns

Preprocessing#

1# Creating samples containing a body mass and a maximum longevity.
2longevity = "Maximum longevity (yrs)"
3mass = "Body mass (g)"
4data = dataset[np.isfinite(dataset[longevity]) & np.isfinite(dataset[mass])]
5# Sorting the data according to the animal class
6amphibia = data[data["Class"] == "Amphibia"]
7aves = data[data["Class"] == "Aves"]
8mammalia = data[data["Class"] == "Mammalia"]
9reptilia = data[data["Class"] == "Reptilia"]

Visualisation#

 1# Create figure
 2plt.figure(figsize=(10, 6), dpi=300)
 3# Create scatter plot
 4plt.scatter(amphibia[mass], amphibia[longevity], label="Amphibia")
 5plt.scatter(aves[mass], aves[longevity], label="Aves")
 6plt.scatter(mammalia[mass], mammalia[longevity], label="Mammalia")
 7plt.scatter(reptilia[mass], reptilia[longevity], label="Reptilia")
 8# Add legend
 9plt.legend()
10# Log scale
11ax = plt.gca()
12ax.set_xscale("log")
13ax.set_yscale("log")
14# Add labels
15plt.xlabel("Body mass in grams")
16plt.ylabel("Maximum longevity in years")
17# Show plot
18plt.show()
../../_images/a3714f272d21d35a83544d96b4921e4b2649bd98267bfc4aadaab15f864daf95.png