import pandas as pd


data_list = ['./data/yellow_tripdata_2019-01.csv',
              './data/yellow_tripdata_2019-02.csv',       
              './data/yellow_tripdata_2019-03.csv'
]
data_list

['./data/yellow_tripdata_2019-01.csv',
 './data/yellow_tripdata_2019-02.csv',
 './data/yellow_tripdata_2019-03.csv']


df = pd.concat(map(pd.read_csv, data_list))
df.head()


import glob
import pandas as pd


path = './yellow_taxi_data'
files = glob.glob(path + "/*.csv")
files

['./yellow_taxi_data\\yellow_tripdata_2019-01.csv',
 './yellow_taxi_data\\yellow_tripdata_2019-02.csv',
 './yellow_taxi_data\\yellow_tripdata_2019-03.csv']


df_list = (pd.read_csv(file) for file in files)


df = pd.concat(df_list, ignore_index=True)
df.head()


path = './yellow_taxi_data'
df = pd.concat(map(pd.read_csv, glob.glob(path + "/*.csv")))
df.head()

	VendorID	tpep_pickup_datetime	tpep_dropoff_datetime	passenger_count	trip_distance	RatecodeID	store_and_fwd_flag	PULocationID	DOLocationID	payment_type	fare_amount	extra	mta_tax	tip_amount	improvement_surcharge	total_amount	congestion_surcharge
0	1	2019-01-01 00:46:40	2019-01-01 00:53:20	1	1.5	1	N	151	239	1	7.0	0.5	0.5	1.65	0.3	9.95	NaN
1	1	2019-01-01 00:59:47	2019-01-01 01:18:59	1	2.6	1	N	239	246	1	14.0	0.5	0.5	1.00	0.3	16.30	NaN
2	2	2018-12-21 13:48:30	2018-12-21 13:52:40	3	0.0	1	N	236	236	1	4.5	0.5	0.5	0.00	0.3	5.80	NaN
3	2	2018-11-28 15:52:25	2018-11-28 15:55:45	5	0.0	1	N	193	193	2	3.5	0.5	0.5	0.00	0.3	7.55	NaN
4	2	2018-11-28 15:56:57	2018-11-28 15:58:33	5	0.0	2	N	193	193	2	52.0	0.0	0.5	0.00	0.3	55.55	NaN

	VendorID	tpep_pickup_datetime	tpep_dropoff_datetime	passenger_count	trip_distance	RatecodeID	store_and_fwd_flag	PULocationID	DOLocationID	payment_type	fare_amount	extra	mta_tax	tip_amount	improvement_surcharge	total_amount	congestion_surcharge
0	1	2019-01-01 00:46:40	2019-01-01 00:53:20	1	1.5	1	N	151	239	1	7.0	0.5	0.5	1.65	0.3	9.95	NaN
1	1	2019-01-01 00:59:47	2019-01-01 01:18:59	1	2.6	1	N	239	246	1	14.0	0.5	0.5	1.00	0.3	16.30	NaN
2	2	2018-12-21 13:48:30	2018-12-21 13:52:40	3	0.0	1	N	236	236	1	4.5	0.5	0.5	0.00	0.3	5.80	NaN
3	2	2018-11-28 15:52:25	2018-11-28 15:55:45	5	0.0	1	N	193	193	2	3.5	0.5	0.5	0.00	0.3	7.55	NaN
4	2	2018-11-28 15:56:57	2018-11-28 15:58:33	5	0.0	2	N	193	193	2	52.0	0.0	0.5	0.00	0.3	55.55	NaN

	VendorID	tpep_pickup_datetime	tpep_dropoff_datetime	passenger_count	trip_distance	RatecodeID	store_and_fwd_flag	PULocationID	DOLocationID	payment_type	fare_amount	extra	mta_tax	tip_amount	improvement_surcharge	total_amount	congestion_surcharge
0	1	2019-01-01 00:46:40	2019-01-01 00:53:20	1	1.5	1	N	151	239	1	7.0	0.5	0.5	1.65	0.3	9.95	NaN
1	1	2019-01-01 00:59:47	2019-01-01 01:18:59	1	2.6	1	N	239	246	1	14.0	0.5	0.5	1.00	0.3	16.30	NaN
2	2	2018-12-21 13:48:30	2018-12-21 13:52:40	3	0.0	1	N	236	236	1	4.5	0.5	0.5	0.00	0.3	5.80	NaN
3	2	2018-11-28 15:52:25	2018-11-28 15:55:45	5	0.0	1	N	193	193	2	3.5	0.5	0.5	0.00	0.3	7.55	NaN
4	2	2018-11-28 15:56:57	2018-11-28 15:58:33	5	0.0	2	N	193	193	2	52.0	0.0	0.5	0.00	0.3	55.55	NaN

Table of Contents

1. Read datasets from a list¶

(1) Import the required library¶

(2) Create a list¶

(3) Read and combine the files from List¶

2. Read datasets from a folder¶

(1) Import required libraries¶

(3) Obtain a list of all files from a folder¶

(4) Read each CSV file into DataFrame¶

(5) Combine all DataFrames into one¶

3. Using `map()` method instead of `loop`¶

Summary¶

Table of Contents

1. Read datasets from a list¶

(1) Import the required library¶

(2) Create a list¶

(3) Read and combine the files from List¶

2. Read datasets from a folder¶

(1) Import required libraries¶

(3) Obtain a list of all files from a folder¶

(4) Read each CSV file into DataFrame¶

(5) Combine all DataFrames into one¶

3. Using map() method instead of loop¶

Summary¶

3. Using `map()` method instead of `loop`¶