World population#
Population of different countries over the years.
Importing libraries and packages#
1# Warnings
2import warnings
3
4# Mathematical operations and data manipulation
5import pandas as pd
6
7# Visualisation
8from bokeh.plotting import figure, show
9from bokeh.layouts import gridplot
10
11# Output
12from bokeh.io import output_notebook
13
14output_notebook()
15warnings.filterwarnings("ignore")
Set paths#
1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"
Loading dataset#
1dataset = pd.read_csv(f"{data_path}/world_population.csv", index_col=0)
Exploring dataset#
1# Shape of the dataset
2print("Shape of the dataset: ", dataset.shape)
Shape of the dataset: (264, 60)
1# View
2dataset
Country Code | Indicator Name | Indicator Code | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | ... | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country Name | |||||||||||||||||||||
Aruba | ABW | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 307.972222 | 312.366667 | 314.983333 | 316.827778 | 318.666667 | 320.622222 | ... | 562.322222 | 563.011111 | 563.422222 | 564.427778 | 566.311111 | 568.850000 | 571.783333 | 574.672222 | 577.161111 | NaN |
Andorra | AND | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 30.587234 | 32.714894 | 34.914894 | 37.170213 | 39.470213 | 41.800000 | ... | 180.591489 | 182.161702 | 181.859574 | 179.614894 | 175.161702 | 168.757447 | 161.493617 | 154.863830 | 149.942553 | NaN |
Afghanistan | AFG | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 14.038148 | 14.312061 | 14.599692 | 14.901579 | 15.218206 | 15.545203 | ... | 39.637202 | 40.634655 | 41.674005 | 42.830327 | 44.127634 | 45.533197 | 46.997059 | 48.444546 | 49.821649 | NaN |
Angola | AGO | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 4.305195 | 4.384299 | 4.464433 | 4.544558 | 4.624228 | 4.703271 | ... | 15.387749 | 15.915819 | 16.459536 | 17.020898 | 17.600302 | 18.196544 | 18.808215 | 19.433323 | 20.070565 | NaN |
Albania | ALB | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 60.576642 | 62.456898 | 64.329234 | 66.209307 | 68.058066 | 69.874927 | ... | 108.394781 | 107.566204 | 106.843759 | 106.314635 | 106.013869 | 105.848431 | 105.717226 | 105.607810 | 105.444051 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Yemen, Rep. | YEM | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 9.946897 | 10.112857 | 10.283730 | 10.460234 | 10.642972 | 10.834968 | ... | 41.102913 | 42.280241 | 43.476383 | 44.684304 | 45.902116 | 47.129178 | 48.361113 | 49.593113 | 50.821477 | NaN |
South Africa | ZAF | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 14.796892 | 15.216878 | 15.609838 | 15.984431 | 16.348334 | 16.708236 | ... | 40.060326 | 40.636905 | 41.234300 | 41.853305 | 42.494751 | 43.159519 | 43.848532 | 44.562767 | 45.303251 | NaN |
Congo, Dem. Rep. | COD | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 6.897825 | 7.075824 | 7.261381 | 7.456342 | 7.661877 | 7.878900 | ... | 26.393275 | 27.264188 | 28.162192 | 29.085689 | 30.033469 | 31.005562 | 32.003203 | 33.028398 | 34.082536 | NaN |
Zambia | ZMB | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 4.227724 | 4.359305 | 4.496824 | 4.639914 | 4.788452 | 4.942343 | ... | 17.135926 | 17.641587 | 18.170609 | 18.721585 | 19.294752 | 19.890745 | 20.508866 | 21.148177 | 21.807890 | NaN |
Zimbabwe | ZWE | Population density (people per sq. km of land ... | EN.POP.DNST | NaN | 10.021037 | 10.356112 | 10.703901 | 11.062585 | 11.431128 | 11.809022 | ... | 34.374559 | 34.885516 | 35.468520 | 36.122262 | 36.850438 | 37.651498 | 38.511289 | 39.410249 | 40.332819 | NaN |
264 rows × 60 columns
Only the columns that are years start with a numerical character
Preprocessing#
Netherlands#
1# Preparing our data for the Netherlands
2years = [year for year in dataset.columns if not year[0].isalpha()]
3nl_vals = [dataset.loc[["Netherlands"]][year] for year in years]
1# plotting the population density change in the Netherlands in the given years
2plot = figure(
3 title="Population Density of the Netherlands",
4 x_axis_label="Year",
5 y_axis_label="Population Density",
6)
7
8plot.line(years, nl_vals, line_width=2, legend_label="Netherlands")
9
10show(plot)
France#
1# preparing the data for the second country: France
2fr_vals = [dataset.loc[["France"]][year] for year in years]
1# Plotting the data for the Netherlands and France in one visualization,
2# adding circles for each data point for France
3plot = figure(
4 title="Population Density of the Netherlands and France",
5 x_axis_label="Year",
6 y_axis_label="Population Density",
7)
8
9plot.line(years, nl_vals, line_width=2, legend_label="Netherlands")
10plot.line(years, fr_vals, line_width=2, color="orange", legend_label="France")
11plot.circle(
12 years,
13 fr_vals,
14 size=4,
15 line_color="orange",
16 fill_color="white",
17 legend_label="France",
18)
19
20show(plot)
Visualisation#
1# Plotting the Netherlands and France plot in two different
2# visualizations that are interconnected in terms of view port
3
4plot_nl = figure(
5 title="Population Density of the Netherlands",
6 x_axis_label="Year",
7 y_axis_label="Population Density",
8 plot_height=300,
9)
10
11plot_fr = figure(
12 title="Population Density of France",
13 x_axis_label="Year",
14 y_axis_label="Population Density",
15 plot_height=300,
16 x_range=plot_nl.x_range,
17 y_range=plot_nl.y_range,
18)
19
20plot_fr.line(years, fr_vals, line_width=2)
21plot_nl.line(years, nl_vals, line_width=2)
22
23plot = gridplot([[plot_nl, plot_fr]])
24
25show(plot)
1# Plotting the above declared figures in a vertical manner
2plot_v = gridplot([[plot_nl], [plot_fr]])
3
4show(plot_v)