Olympia 2016#
Creating an interactive visualization for exploring the results of the 2016 Rio Olympics.
Importing libraries and packages#
1# Warnings
2import warnings
3
4# Mathematical operations and data manipulation
5import pandas as pd
6import random
7
8# Visualisation
9from bokeh.plotting import figure, show, ColumnDataSource
10
11# https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html
12from ipywidgets import interact
13import ipywidgets as widgets
14
15# Output
16from bokeh.io import output_notebook
17
18output_notebook()
19warnings.filterwarnings("ignore")
20%matplotlib inline
Set paths#
1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"
Loading dataset#
1dataset = pd.read_csv(f"{data_path}/olympia2016_athletes.csv")
Exploring dataset#
1# Shape of the dataset
2print("Shape of the dataset: ", dataset.shape)
3# Head
4dataset.head()
Shape of the dataset: (11538, 11)
id | name | nationality | sex | dob | height | weight | sport | gold | silver | bronze | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 736041664 | A Jesus Garcia | ESP | male | 10/17/69 | 1.72 | 64.0 | athletics | 0 | 0 | 0 |
1 | 532037425 | A Lam Shin | KOR | female | 9/23/86 | 1.68 | 56.0 | fencing | 0 | 0 | 0 |
2 | 435962603 | Aaron Brown | CAN | male | 5/27/92 | 1.98 | 79.0 | athletics | 0 | 0 | 1 |
3 | 521041435 | Aaron Cook | MDA | male | 1/2/91 | 1.83 | 80.0 | taekwondo | 0 | 0 | 0 |
4 | 33922579 | Aaron Gate | NZL | male | 11/26/90 | 1.81 | 71.0 | cycling | 0 | 0 | 0 |
Preprocessing#
1# extract countries and group olympians by country
2# and the number of medals per country
3countries = dataset["nationality"].unique()
4athletes_per_country = dataset.groupby("nationality").size()
5medals_per_country = dataset.groupby("nationality")[
6 "gold", "silver", "bronze"
7].sum()
Visualisation#
1# creating the scatter plot
2def get_plot(max_athletes, max_medals):
3 filtered_countries = []
4
5 for country in countries:
6 if (
7 athletes_per_country[country] <= max_athletes
8 and medals_per_country.loc[country].sum() <= max_medals
9 ):
10 filtered_countries.append(country)
11
12 data_source = get_datasource(filtered_countries)
13 TOOLTIPS = [
14 ("Country", "@countries"),
15 ("Num of Athletes", "@y"),
16 ("Gold", "@gold"),
17 ("Silver", "@silver"),
18 ("Bronze", "@bronze"),
19 ]
20
21 plot = figure(
22 title="Rio Olympics 2016 - Medal comparison",
23 x_axis_label="Number of Medals",
24 y_axis_label="Num of Athletes",
25 plot_width=800,
26 plot_height=500,
27 tooltips=TOOLTIPS,
28 )
29
30 plot.circle(
31 "x", "y", source=data_source, size=20, color="color", alpha=0.5
32 )
33
34 return plot
1# get a 6 digit random hex color to differentiate the countries better
2def get_random_color():
3 return "#%06x" % random.randint(0, 0xFFFFFF)
1# build the datasource
2def get_datasource(filtered_countries):
3 return ColumnDataSource(
4 data=dict(
5 color=[get_random_color() for _ in filtered_countries],
6 countries=filtered_countries,
7 gold=[
8 medals_per_country.loc[country]["gold"]
9 for country in filtered_countries
10 ],
11 silver=[
12 medals_per_country.loc[country]["silver"]
13 for country in filtered_countries
14 ],
15 bronze=[
16 medals_per_country.loc[country]["bronze"]
17 for country in filtered_countries
18 ],
19 x=[
20 medals_per_country.loc[country].sum()
21 for country in filtered_countries
22 ],
23 y=[
24 athletes_per_country.loc[country].sum()
25 for country in filtered_countries
26 ],
27 )
28 )
1# getting the max amount of medals and athletes of all countries
2max_medals = medals_per_country.sum(axis=1).max()
3max_athletes = athletes_per_country.max()
1# setting up the interaction elements
2max_athletes_slider = widgets.IntSlider(
3 value=max_athletes,
4 min=0,
5 max=max_athletes,
6 step=1,
7 description="Max. Athletes:",
8 continuous_update=False,
9 orientation="vertical",
10 layout={"width": "100px"},
11)
12
13max_medals_slider = widgets.IntSlider(
14 value=max_medals,
15 min=0,
16 max=max_medals,
17 step=1,
18 description="Max. Medals:",
19 continuous_update=False,
20 orientation="horizontal",
21)
1# creating the interact method
2@interact(max_athletes=max_athletes_slider, max_medals=max_medals_slider)
3def get_olympia_stats(max_athletes, max_medals):
4 show(get_plot(max_athletes, max_medals))