Olympia 2016#

Creating an interactive visualization for exploring the results of the 2016 Rio Olympics.

Importing libraries and packages#

 1# Warnings
 2import warnings
 3
 4# Mathematical operations and data manipulation
 5import pandas as pd
 6import random
 7
 8# Visualisation
 9from bokeh.plotting import figure, show, ColumnDataSource
10
11# https://ipywidgets.readthedocs.io/en/latest/examples/Using%20Interact.html
12from ipywidgets import interact
13import ipywidgets as widgets
14
15# Output
16from bokeh.io import output_notebook
17
18output_notebook()
19warnings.filterwarnings("ignore")
20%matplotlib inline
Loading BokehJS ...

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Loading dataset#

1dataset = pd.read_csv(f"{data_path}/olympia2016_athletes.csv")

Exploring dataset#

1# Shape of the dataset
2print("Shape of the dataset: ", dataset.shape)
3# Head
4dataset.head()
Shape of the dataset:  (11538, 11)
id name nationality sex dob height weight sport gold silver bronze
0 736041664 A Jesus Garcia ESP male 10/17/69 1.72 64.0 athletics 0 0 0
1 532037425 A Lam Shin KOR female 9/23/86 1.68 56.0 fencing 0 0 0
2 435962603 Aaron Brown CAN male 5/27/92 1.98 79.0 athletics 0 0 1
3 521041435 Aaron Cook MDA male 1/2/91 1.83 80.0 taekwondo 0 0 0
4 33922579 Aaron Gate NZL male 11/26/90 1.81 71.0 cycling 0 0 0

Preprocessing#

1# extract countries and group olympians by country
2# and the number of medals per country
3countries = dataset["nationality"].unique()
4athletes_per_country = dataset.groupby("nationality").size()
5medals_per_country = dataset.groupby("nationality")[
6    "gold", "silver", "bronze"
7].sum()

Visualisation#

 1# creating the scatter plot
 2def get_plot(max_athletes, max_medals):
 3    filtered_countries = []
 4
 5    for country in countries:
 6        if (
 7            athletes_per_country[country] <= max_athletes
 8            and medals_per_country.loc[country].sum() <= max_medals
 9        ):
10            filtered_countries.append(country)
11
12    data_source = get_datasource(filtered_countries)
13    TOOLTIPS = [
14        ("Country", "@countries"),
15        ("Num of Athletes", "@y"),
16        ("Gold", "@gold"),
17        ("Silver", "@silver"),
18        ("Bronze", "@bronze"),
19    ]
20
21    plot = figure(
22        title="Rio Olympics 2016 - Medal comparison",
23        x_axis_label="Number of Medals",
24        y_axis_label="Num of Athletes",
25        plot_width=800,
26        plot_height=500,
27        tooltips=TOOLTIPS,
28    )
29
30    plot.circle(
31        "x", "y", source=data_source, size=20, color="color", alpha=0.5
32    )
33
34    return plot
1# get a 6 digit random hex color to differentiate the countries better
2def get_random_color():
3    return "#%06x" % random.randint(0, 0xFFFFFF)
 1# build the datasource
 2def get_datasource(filtered_countries):
 3    return ColumnDataSource(
 4        data=dict(
 5            color=[get_random_color() for _ in filtered_countries],
 6            countries=filtered_countries,
 7            gold=[
 8                medals_per_country.loc[country]["gold"]
 9                for country in filtered_countries
10            ],
11            silver=[
12                medals_per_country.loc[country]["silver"]
13                for country in filtered_countries
14            ],
15            bronze=[
16                medals_per_country.loc[country]["bronze"]
17                for country in filtered_countries
18            ],
19            x=[
20                medals_per_country.loc[country].sum()
21                for country in filtered_countries
22            ],
23            y=[
24                athletes_per_country.loc[country].sum()
25                for country in filtered_countries
26            ],
27        )
28    )
1# getting the max amount of medals and athletes of all countries
2max_medals = medals_per_country.sum(axis=1).max()
3max_athletes = athletes_per_country.max()
 1# setting up the interaction elements
 2max_athletes_slider = widgets.IntSlider(
 3    value=max_athletes,
 4    min=0,
 5    max=max_athletes,
 6    step=1,
 7    description="Max. Athletes:",
 8    continuous_update=False,
 9    orientation="vertical",
10    layout={"width": "100px"},
11)
12
13max_medals_slider = widgets.IntSlider(
14    value=max_medals,
15    min=0,
16    max=max_medals,
17    step=1,
18    description="Max. Medals:",
19    continuous_update=False,
20    orientation="horizontal",
21)
1# creating the interact method
2@interact(max_athletes=max_athletes_slider, max_medals=max_medals_slider)
3def get_olympia_stats(max_athletes, max_medals):
4    show(get_plot(max_athletes, max_medals))