CSV file headers#
Adding proper headers or column names, or bypassing and renaming the headers of a CSV file.
Importing libraries and packages#
1# Mathematical operations and data manipulation
2import pandas as pd
Set paths#
1# Path to datasets directory
2data_path = "./datasets"
3# Path to assets directory (for saving results to)
4assets_path = "./assets"
Loading datasets#
1dataset_1 = pd.read_csv(f"{data_path}/example_1.csv")
2dataset_1
Column 1 | Column 2 | Column 3 | Column 4 | |
---|---|---|---|---|
0 | 2 | 1500 | Good | 300000 |
1 | 3 | 1300 | Fair | 240000 |
2 | 3 | 1900 | Very good | 450000 |
3 | 3 | 1850 | Bad | 280000 |
4 | 2 | 1640 | Good | 310000 |
1dataset_2 = pd.read_csv(f"{data_path}/example_2.csv")
2dataset_2
2 | 1500 | Good | 300000 | |
---|---|---|---|---|
0 | 3 | 1300 | Fair | 240000 |
1 | 3 | 1900 | Very good | 450000 |
2 | 3 | 1850 | Bad | 280000 |
3 | 2 | 1640 | Good | 310000 |
1dataset_2 = pd.read_csv(f"{data_path}/example_2.csv", header=None)
2dataset_2
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 2 | 1500 | Good | 300000 |
1 | 3 | 1300 | Fair | 240000 |
2 | 3 | 1900 | Very good | 450000 |
3 | 3 | 1850 | Bad | 280000 |
4 | 2 | 1640 | Good | 310000 |
1dataset_2 = pd.read_csv(
2 f"{data_path}/example_2.csv",
3 header=None,
4 names=["Column 1", "Column 2", "Column 3", "Column 4"],
5)
6dataset_2
Column 1 | Column 2 | Column 3 | Column 4 | |
---|---|---|---|---|
0 | 2 | 1500 | Good | 300000 |
1 | 3 | 1300 | Fair | 240000 |
2 | 3 | 1900 | Very good | 450000 |
3 | 3 | 1850 | Bad | 280000 |
4 | 2 | 1640 | Good | 310000 |
1dataset_4 = pd.read_csv(
2 f"{data_path}/example_1.csv",
3 names=["Column 1", "Column 2", "Column 3", "Column 4"],
4)
5dataset_4
Column 1 | Column 2 | Column 3 | Column 4 | |
---|---|---|---|---|
0 | Column 1 | Column 2 | Column 3 | Column 4 |
1 | 2 | 1500 | Good | 300000 |
2 | 3 | 1300 | Fair | 240000 |
3 | 3 | 1900 | Very good | 450000 |
4 | 3 | 1850 | Bad | 280000 |
5 | 2 | 1640 | Good | 310000 |
1dataset_4 = pd.read_csv(
2 f"{data_path}/example_1.csv",
3 header=0,
4 names=["Column 1", "Column 2", "Column 3", "Column 4"],
5)
6dataset_4
Column 1 | Column 2 | Column 3 | Column 4 | |
---|---|---|---|---|
0 | 2 | 1500 | Good | 300000 |
1 | 3 | 1300 | Fair | 240000 |
2 | 3 | 1900 | Very good | 450000 |
3 | 3 | 1850 | Bad | 280000 |
4 | 2 | 1640 | Good | 310000 |