SQL refresher#

The database used here is SQLite. The main data science tricks will not change much depending on the database in use because they all adhere to PEP249 (commonly known as Python DB API 2). For different databases, different third-party Python libraries (such as Psycopg2 for PostgreSQL) must be installed.

Importing libraries and packages#

1# Mathematical operations and data manipulation
2
3# Database
4import sqlite3

Set paths#

1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"

Connect (and create database)#

1with sqlite3.connect(f"{assets_path}/discworld.db") as conn:
2    cursor = conn.cursor()

Using DDL and DML#

 1sql = """DROP TABLE IF EXISTS user"""
 2
 3cursor.execute(sql)
 4
 5sql = """CREATE TABLE user(
 6            email TEXT PRIMARY KEY,
 7            first_name TEXT,
 8            last_name TEXT,
 9            address TEXT,
10            age INTEGER)"""
11
12cursor.execute(sql)
<sqlite3.Cursor at 0x7f3e711432d0>
 1sql = """INSERT INTO user
 2        (email, first_name, last_name, address, age)
 3        VALUES
 4        ('agnes@lspace.org', 'Agnes', 'Nitt',
 5        '123 Amper Sands, Mad Stoat', 31),
 6        ('millie@lspace.org', 'Millie', 'Hopgood',
 7        'Right there, Slice', 39),
 8        ('granny@lspace.org', 'Esmeralda', 'Weatherwax',
 9        'The cottage, Bad Ass', 31),
10        ('grodley@lspace.com', 'Sister', 'Grodley',
11        '456 Le Foie Heureux, Slice', 39)
12        """
13
14cursor.execute(sql)
15conn.commit()

Sorting values#

1rows = cursor.execute("SELECT * FROM user ORDER BY age DESC")
2for row in rows:
3    print(row)
('millie@lspace.org', 'Millie', 'Hopgood', 'Right there, Slice', 39)
('grodley@lspace.com', 'Sister', 'Grodley', '456 Le Foie Heureux, Slice', 39)
('agnes@lspace.org', 'Agnes', 'Nitt', '123 Amper Sands, Mad Stoat', 31)
('granny@lspace.org', 'Esmeralda', 'Weatherwax', 'The cottage, Bad Ass', 31)

Altering the structure of a table and updating the new fields#

1cursor.execute("ALTER TABLE user ADD COLUMN gender text")
2conn.commit()
3cursor.execute("UPDATE user SET gender='F'")
4conn.commit()
5
6rows = cursor.execute("SELECT * FROM user ORDER BY age")
7for row in rows:
8    print(row)
('agnes@lspace.org', 'Agnes', 'Nitt', '123 Amper Sands, Mad Stoat', 31, 'F')
('granny@lspace.org', 'Esmeralda', 'Weatherwax', 'The cottage, Bad Ass', 31, 'F')
('millie@lspace.org', 'Millie', 'Hopgood', 'Right there, Slice', 39, 'F')
('grodley@lspace.com', 'Sister', 'Grodley', '456 Le Foie Heureux, Slice', 39, 'F')

Grouping values in tables#

1cursor.execute(
2    "INSERT INTO user VALUES ('swivel@lspace.org', 'Geoffrey', "
3    "'Swivel', 'The Shires', 20, 'M')"
4)
5conn.commit()
6cursor = conn.cursor()
7rows = cursor.execute("SELECT * FROM user ORDER BY age DESC")
8for row in rows:
9    print(row)
('millie@lspace.org', 'Millie', 'Hopgood', 'Right there, Slice', 39, 'F')
('grodley@lspace.com', 'Sister', 'Grodley', '456 Le Foie Heureux, Slice', 39, 'F')
('agnes@lspace.org', 'Agnes', 'Nitt', '123 Amper Sands, Mad Stoat', 31, 'F')
('granny@lspace.org', 'Esmeralda', 'Weatherwax', 'The cottage, Bad Ass', 31, 'F')
('swivel@lspace.org', 'Geoffrey', 'Swivel', 'The Shires', 20, 'M')
1rows = cursor.execute("SELECT COUNT(*), gender FROM user GROUP BY gender")
2for row in rows:
3    print(row)
(4, 'F')
(1, 'M')
1conn.close()