Need to fill the gaps in time series data? Nothing easier than that with pandas!
import pandas as pd; import numpy as np
days = pd.date_range(pd.datetime(2018,1,3), pd.datetime(2018,1,10), freq='B')
df = pd.DataFrame({
'price' : [ 8, 9, 10, 11, 12, 13 ],
'volume' : [125, 110, 90, 100, 140, 120 ]
}, index = days)
df
|
price |
volume |
2018-01-03 |
8 |
125 |
2018-01-04 |
9 |
110 |
2018-01-05 |
10 |
90 |
2018-01-08 |
11 |
100 |
2018-01-09 |
12 |
140 |
2018-01-10 |
13 |
120 |
ds = df.resample('D').last()
ds
|
price |
volume |
2018-01-03 |
8.0 |
125.0 |
2018-01-04 |
9.0 |
110.0 |
2018-01-05 |
10.0 |
90.0 |
2018-01-06 |
NaN |
NaN |
2018-01-07 |
NaN |
NaN |
2018-01-08 |
11.0 |
100.0 |
2018-01-09 |
12.0 |
140.0 |
2018-01-10 |
13.0 |
120.0 |
ds['price' ].fillna(method='ffill', inplace=True)
ds['volume'].fillna(0, inplace=True)
ds
|
price |
volume |
2018-01-03 |
8.0 |
125.0 |
2018-01-04 |
9.0 |
110.0 |
2018-01-05 |
10.0 |
90.0 |
2018-01-06 |
10.0 |
0.0 |
2018-01-07 |
10.0 |
0.0 |
2018-01-08 |
11.0 |
100.0 |
2018-01-09 |
12.0 |
140.0 |
2018-01-10 |
13.0 |
120.0 |
Panel Data
df1 = pd.DataFrame({
'price' : [ 8, 9, 10, 11, 12, 13 ],
'volume' : [125, 110, 90, 100, 140, 120 ]
}, index = days)
df1['stock'] = 'A'
df2 = pd.DataFrame({
'price' : [ 8, 9, 10, 11, 12 ],
'volume' : [125, 110, 90, 100, 140 ]
}, index = days[:-1])
df2['stock'] = 'B'
df = pd.concat([df1, df2])
df.index.name = 'day'
df = df.reset_index().set_index(['stock', 'day'])
df
|
|
price |
volume |
stock |
day |
|
|
A |
2018-01-03 |
8 |
125 |
2018-01-04 |
9 |
110 |
2018-01-05 |
10 |
90 |
2018-01-08 |
11 |
100 |
2018-01-09 |
12 |
140 |
2018-01-10 |
13 |
120 |
B |
2018-01-03 |
8 |
125 |
2018-01-04 |
9 |
110 |
2018-01-05 |
10 |
90 |
2018-01-08 |
11 |
100 |
2018-01-09 |
12 |
140 |
df.reset_index().set_index('day').groupby('stock').resample("D").last()
|
|
stock |
price |
volume |
stock |
day |
|
|
|
A |
2018-01-03 |
A |
8.0 |
125.0 |
2018-01-04 |
A |
9.0 |
110.0 |
2018-01-05 |
A |
10.0 |
90.0 |
2018-01-06 |
NaN |
NaN |
NaN |
2018-01-07 |
NaN |
NaN |
NaN |
2018-01-08 |
A |
11.0 |
100.0 |
2018-01-09 |
A |
12.0 |
140.0 |
2018-01-10 |
A |
13.0 |
120.0 |
B |
2018-01-03 |
B |
8.0 |
125.0 |
2018-01-04 |
B |
9.0 |
110.0 |
2018-01-05 |
B |
10.0 |
90.0 |
2018-01-06 |
NaN |
NaN |
NaN |
2018-01-07 |
NaN |
NaN |
NaN |
2018-01-08 |
B |
11.0 |
100.0 |
2018-01-09 |
B |
12.0 |
140.0 |