1067 lines
41 KiB
Python
1067 lines
41 KiB
Python
from datetime import datetime, timedelta, timezone
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import pytest
|
|
import pytz
|
|
|
|
import vectorbt as vbt
|
|
from vectorbt.utils.config import merge_dicts
|
|
from vectorbt.utils.datetime_ import to_timezone
|
|
|
|
seed = 42
|
|
|
|
|
|
# ############# Global ############# #
|
|
|
|
def setup_module():
|
|
vbt.settings.numba['check_func_suffix'] = True
|
|
vbt.settings.caching.enabled = False
|
|
vbt.settings.caching.whitelist = []
|
|
vbt.settings.caching.blacklist = []
|
|
|
|
|
|
def teardown_module():
|
|
vbt.settings.reset()
|
|
|
|
|
|
# ############# base.py ############# #
|
|
|
|
|
|
class MyData(vbt.Data):
|
|
@classmethod
|
|
def download_symbol(cls, symbol, shape=(5, 3), start_date=datetime(2020, 1, 1), columns=None, index_mask=None,
|
|
column_mask=None, return_arr=False, tz_localize=None, seed=seed):
|
|
np.random.seed(seed)
|
|
a = np.random.uniform(size=shape) + symbol
|
|
if return_arr:
|
|
return a
|
|
index = [start_date + timedelta(days=i) for i in range(a.shape[0])]
|
|
if a.ndim == 1:
|
|
sr = pd.Series(a, index=index, name=columns)
|
|
if index_mask is not None:
|
|
sr = sr.loc[index_mask]
|
|
if tz_localize is not None:
|
|
sr = sr.tz_localize(tz_localize)
|
|
return sr
|
|
df = pd.DataFrame(a, index=index, columns=columns)
|
|
if index_mask is not None:
|
|
df = df.loc[index_mask]
|
|
if column_mask is not None:
|
|
df = df.loc[:, column_mask]
|
|
if tz_localize is not None:
|
|
df = df.tz_localize(tz_localize)
|
|
return df
|
|
|
|
def update_symbol(self, symbol, n=1, **kwargs):
|
|
download_kwargs = self.select_symbol_kwargs(symbol, self.download_kwargs)
|
|
download_kwargs['start_date'] = self.data[symbol].index[-1]
|
|
shape = download_kwargs.pop('shape', (5, 3))
|
|
new_shape = (n, shape[1]) if len(shape) > 1 else (n,)
|
|
new_seed = download_kwargs.pop('seed', seed) + 1
|
|
kwargs = merge_dicts(download_kwargs, kwargs)
|
|
return self.download_symbol(symbol, shape=new_shape, seed=new_seed, **kwargs)
|
|
|
|
|
|
class TestData:
|
|
def test_config(self, tmp_path):
|
|
data = MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])
|
|
assert MyData.loads(data.dumps()) == data
|
|
data.save(tmp_path / 'data')
|
|
assert MyData.load(tmp_path / 'data') == data
|
|
|
|
def test_download(self):
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), return_arr=True).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
]
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3), return_arr=True).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
|
|
]
|
|
)
|
|
)
|
|
index = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=timezone.utc
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,)).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), columns='feat0').data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index,
|
|
name='feat0'
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3)).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
|
|
],
|
|
index=index,
|
|
columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object'))
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,)).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,)).data[1],
|
|
pd.Series(
|
|
[
|
|
1.3745401188473625,
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366,
|
|
1.15601864044243652
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3)).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3)).data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.3745401188473625, 1.9507143064099162, 1.7319939418114051],
|
|
[1.5986584841970366, 1.15601864044243652, 1.15599452033620265],
|
|
[1.05808361216819946, 1.8661761457749352, 1.6011150117432088],
|
|
[1.7080725777960455, 1.020584494295802447, 1.9699098521619943],
|
|
[1.8324426408004217, 1.21233911067827616, 1.18182496720710062]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
index2 = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=pytz.utc
|
|
).tz_convert(to_timezone('Europe/Berlin'))
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin').data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index2
|
|
)
|
|
)
|
|
index_mask = vbt.symbol_dict({
|
|
0: [False, True, True, True, True],
|
|
1: [True, True, True, True, False]
|
|
})
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[0],
|
|
pd.Series(
|
|
[
|
|
np.nan,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[1],
|
|
pd.Series(
|
|
[
|
|
1.3745401188473625,
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366,
|
|
np.nan
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[0],
|
|
pd.Series(
|
|
[
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[1],
|
|
pd.Series(
|
|
[
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
column_mask = vbt.symbol_dict({
|
|
0: [False, True, True],
|
|
1: [True, True, False]
|
|
})
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan').data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[np.nan, np.nan, np.nan],
|
|
[np.nan, 0.15601864044243652, 0.15599452033620265],
|
|
[np.nan, 0.8661761457749352, 0.6011150117432088],
|
|
[np.nan, 0.020584494295802447, 0.9699098521619943],
|
|
[np.nan, 0.21233911067827616, 0.18182496720710062]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan').data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.3745401188473625, 1.9507143064099162, np.nan],
|
|
[1.5986584841970366, 1.15601864044243652, np.nan],
|
|
[1.05808361216819946, 1.8661761457749352, np.nan],
|
|
[1.7080725777960455, 1.020584494295802447, np.nan],
|
|
[np.nan, np.nan, np.nan]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop').data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.15601864044243652],
|
|
[0.8661761457749352],
|
|
[0.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop').data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.15601864044243652],
|
|
[1.8661761457749352],
|
|
[1.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
with pytest.raises(Exception):
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='raise', missing_columns='nan')
|
|
with pytest.raises(Exception):
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='raise')
|
|
with pytest.raises(Exception):
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='test', missing_columns='nan')
|
|
with pytest.raises(Exception):
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='test')
|
|
|
|
def test_update(self):
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), return_arr=True).update().data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896
|
|
]
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), return_arr=True).update(n=2).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896,
|
|
0.6090665392794814
|
|
]
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3), return_arr=True).update().data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.11505456638977896, 0.6090665392794814, 0.13339096418598828]
|
|
]
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3), return_arr=True).update(n=2).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.11505456638977896, 0.6090665392794814, 0.13339096418598828],
|
|
[0.24058961996534878, 0.3271390558111398, 0.8591374909485977]
|
|
]
|
|
)
|
|
)
|
|
index = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=timezone.utc
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,)).update().data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
updated_index = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00',
|
|
'2020-01-06 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=timezone.utc
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,)).update(n=2).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896,
|
|
0.6090665392794814
|
|
],
|
|
index=updated_index
|
|
)
|
|
)
|
|
index2 = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=pytz.utc
|
|
).tz_convert(to_timezone('Europe/Berlin'))
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin')
|
|
.update(tz_localize=None).data[0],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896
|
|
],
|
|
index=index2
|
|
)
|
|
)
|
|
index_mask = vbt.symbol_dict({
|
|
0: [False, True, True, True, True],
|
|
1: [True, True, True, True, False]
|
|
})
|
|
update_index_mask = vbt.symbol_dict({
|
|
0: [True],
|
|
1: [False]
|
|
})
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
|
|
.update(index_mask=update_index_mask).data[0],
|
|
pd.Series(
|
|
[
|
|
np.nan,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
|
|
.update(index_mask=update_index_mask).data[1],
|
|
pd.Series(
|
|
[
|
|
1.3745401188473625,
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366,
|
|
np.nan
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
update_index_mask2 = vbt.symbol_dict({
|
|
0: [True, False],
|
|
1: [False, True]
|
|
})
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
|
|
.update(n=2, index_mask=update_index_mask2).data[0],
|
|
pd.Series(
|
|
[
|
|
np.nan,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.11505456638977896,
|
|
np.nan
|
|
],
|
|
index=updated_index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
|
|
.update(n=2, index_mask=update_index_mask2).data[1],
|
|
pd.Series(
|
|
[
|
|
1.3745401188473625,
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366,
|
|
np.nan,
|
|
1.6090665392794814
|
|
],
|
|
index=updated_index
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
|
|
.update(index_mask=update_index_mask).data[0],
|
|
pd.Series(
|
|
[
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
|
|
.update(index_mask=update_index_mask).data[1],
|
|
pd.Series(
|
|
[
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
|
|
.update(n=2, index_mask=update_index_mask2).data[0],
|
|
pd.Series(
|
|
[
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
|
|
.update(n=2, index_mask=update_index_mask2).data[1],
|
|
pd.Series(
|
|
[
|
|
1.9507143064099162,
|
|
1.7319939418114051,
|
|
1.5986584841970366
|
|
],
|
|
index=index[1:4]
|
|
)
|
|
)
|
|
column_mask = vbt.symbol_dict({
|
|
0: [False, True, True],
|
|
1: [True, True, False]
|
|
})
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan')
|
|
.update(index_mask=update_index_mask).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[np.nan, np.nan, np.nan],
|
|
[np.nan, 0.15601864044243652, 0.15599452033620265],
|
|
[np.nan, 0.8661761457749352, 0.6011150117432088],
|
|
[np.nan, 0.020584494295802447, 0.9699098521619943],
|
|
[np.nan, 0.6090665392794814, 0.13339096418598828]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan')
|
|
.update(index_mask=update_index_mask).data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.3745401188473625, 1.9507143064099162, np.nan],
|
|
[1.5986584841970366, 1.15601864044243652, np.nan],
|
|
[1.05808361216819946, 1.8661761457749352, np.nan],
|
|
[1.7080725777960455, 1.020584494295802447, np.nan],
|
|
[np.nan, np.nan, np.nan]
|
|
],
|
|
index=index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan')
|
|
.update(n=2, index_mask=update_index_mask2).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[np.nan, np.nan, np.nan],
|
|
[np.nan, 0.15601864044243652, 0.15599452033620265],
|
|
[np.nan, 0.8661761457749352, 0.6011150117432088],
|
|
[np.nan, 0.020584494295802447, 0.9699098521619943],
|
|
[np.nan, 0.6090665392794814, 0.13339096418598828],
|
|
[np.nan, np.nan, np.nan]
|
|
],
|
|
index=updated_index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan')
|
|
.update(n=2, index_mask=update_index_mask2).data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.3745401188473625, 1.9507143064099162, np.nan],
|
|
[1.5986584841970366, 1.15601864044243652, np.nan],
|
|
[1.05808361216819946, 1.8661761457749352, np.nan],
|
|
[1.7080725777960455, 1.020584494295802447, np.nan],
|
|
[np.nan, np.nan, np.nan],
|
|
[1.2405896199653488, 1.3271390558111398, np.nan]
|
|
],
|
|
index=updated_index
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop')
|
|
.update(index_mask=update_index_mask).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.15601864044243652],
|
|
[0.8661761457749352],
|
|
[0.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop')
|
|
.update(index_mask=update_index_mask).data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.15601864044243652],
|
|
[1.8661761457749352],
|
|
[1.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop')
|
|
.update(n=2, index_mask=update_index_mask2).data[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.15601864044243652],
|
|
[0.8661761457749352],
|
|
[0.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='drop', missing_columns='drop')
|
|
.update(n=2, index_mask=update_index_mask2).data[1],
|
|
pd.DataFrame(
|
|
[
|
|
[1.15601864044243652],
|
|
[1.8661761457749352],
|
|
[1.020584494295802447]
|
|
],
|
|
index=index[1:4],
|
|
columns=pd.Index([1], dtype='int64')
|
|
)
|
|
)
|
|
|
|
def test_concat(self):
|
|
index = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=timezone.utc
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), columns='feat0').concat()['feat0'],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index,
|
|
name=0
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5,), columns='feat0').concat()['feat0'],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.9507143064099162, 1.9507143064099162],
|
|
[0.7319939418114051, 1.7319939418114051],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.15601864044243652, 1.15601864044243652]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.5986584841970366,
|
|
0.05808361216819946,
|
|
0.7080725777960455,
|
|
0.8324426408004217
|
|
],
|
|
index=index,
|
|
name=0
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
|
|
pd.Series(
|
|
[
|
|
0.9507143064099162,
|
|
0.15601864044243652,
|
|
0.8661761457749352,
|
|
0.020584494295802447,
|
|
0.21233911067827616
|
|
],
|
|
index=index,
|
|
name=0
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
|
|
pd.Series(
|
|
[
|
|
0.7319939418114051,
|
|
0.15599452033620265,
|
|
0.6011150117432088,
|
|
0.9699098521619943,
|
|
0.18182496720710062
|
|
],
|
|
index=index,
|
|
name=0
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.05808361216819946, 1.05808361216819946],
|
|
[0.7080725777960455, 1.7080725777960455],
|
|
[0.8324426408004217, 1.8324426408004217]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
|
|
pd.DataFrame(
|
|
[
|
|
[0.9507143064099162, 1.9507143064099162],
|
|
[0.15601864044243652, 1.15601864044243652],
|
|
[0.8661761457749352, 1.8661761457749352],
|
|
[0.020584494295802447, 1.020584494295802447],
|
|
[0.21233911067827616, 1.21233911067827616]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
|
|
pd.DataFrame(
|
|
[
|
|
[0.7319939418114051, 1.7319939418114051],
|
|
[0.15599452033620265, 1.15599452033620265],
|
|
[0.6011150117432088, 1.6011150117432088],
|
|
[0.9699098521619943, 1.9699098521619943],
|
|
[0.18182496720710062, 1.18182496720710062]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
|
|
def test_get(self):
|
|
index = pd.DatetimeIndex(
|
|
[
|
|
'2020-01-01 00:00:00',
|
|
'2020-01-02 00:00:00',
|
|
'2020-01-03 00:00:00',
|
|
'2020-01-04 00:00:00',
|
|
'2020-01-05 00:00:00'
|
|
],
|
|
freq='D',
|
|
tz=timezone.utc
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5,), columns='feat0').get(),
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.9507143064099162,
|
|
0.7319939418114051,
|
|
0.5986584841970366,
|
|
0.15601864044243652
|
|
],
|
|
index=index,
|
|
name='feat0'
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(),
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
|
|
[0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
|
|
[0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
|
|
[0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
|
|
[0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
|
|
],
|
|
index=index,
|
|
columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object')
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
|
|
pd.Series(
|
|
[
|
|
0.3745401188473625,
|
|
0.5986584841970366,
|
|
0.05808361216819946,
|
|
0.7080725777960455,
|
|
0.8324426408004217
|
|
],
|
|
index=index,
|
|
name='feat0'
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5,), columns='feat0').get(),
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.9507143064099162, 1.9507143064099162],
|
|
[0.7319939418114051, 1.7319939418114051],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.15601864044243652, 1.15601864044243652]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.05808361216819946, 1.05808361216819946],
|
|
[0.7080725777960455, 1.7080725777960455],
|
|
[0.8324426408004217, 1.8324426408004217]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(['feat0', 'feat1'])[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.05808361216819946, 1.05808361216819946],
|
|
[0.7080725777960455, 1.7080725777960455],
|
|
[0.8324426408004217, 1.8324426408004217]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
pd.testing.assert_frame_equal(
|
|
MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get()[0],
|
|
pd.DataFrame(
|
|
[
|
|
[0.3745401188473625, 1.3745401188473625],
|
|
[0.5986584841970366, 1.5986584841970366],
|
|
[0.05808361216819946, 1.05808361216819946],
|
|
[0.7080725777960455, 1.7080725777960455],
|
|
[0.8324426408004217, 1.8324426408004217]
|
|
],
|
|
index=index,
|
|
columns=pd.Index([0, 1], dtype='int64', name='symbol')
|
|
)
|
|
)
|
|
|
|
def test_indexing(self):
|
|
assert MyData.download([0, 1], shape=(5,), columns='feat0').iloc[:3].wrapper == \
|
|
MyData.download([0, 1], shape=(3,), columns='feat0').wrapper
|
|
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).iloc[:3].wrapper == \
|
|
MyData.download([0, 1], shape=(3, 3), columns=['feat0', 'feat1', 'feat2']).wrapper
|
|
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])['feat0'].wrapper == \
|
|
MyData.download([0, 1], shape=(5,), columns='feat0').wrapper
|
|
assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])[['feat0']].wrapper == \
|
|
MyData.download([0, 1], shape=(5, 1), columns=['feat0']).wrapper
|
|
|
|
def test_stats(self):
|
|
index_mask = vbt.symbol_dict({
|
|
0: [False, True, True, True, True],
|
|
1: [True, True, True, True, False]
|
|
})
|
|
column_mask = vbt.symbol_dict({
|
|
0: [False, True, True],
|
|
1: [True, True, False]
|
|
})
|
|
data = MyData.download(
|
|
[0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
|
|
missing_index='nan', missing_columns='nan', columns=['feat0', 'feat1', 'feat2'])
|
|
|
|
stats_index = pd.Index([
|
|
'Start', 'End', 'Period', 'Total Symbols', 'Null Counts: 0', 'Null Counts: 1'
|
|
], dtype='object')
|
|
pd.testing.assert_series_equal(
|
|
data.stats(),
|
|
pd.Series([
|
|
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
|
|
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
|
|
pd.Timedelta('5 days 00:00:00'),
|
|
2, 2.3333333333333335, 2.3333333333333335
|
|
],
|
|
index=stats_index,
|
|
name='agg_func_mean'
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
data.stats(column='feat0'),
|
|
pd.Series([
|
|
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
|
|
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
|
|
pd.Timedelta('5 days 00:00:00'),
|
|
2, 5, 1
|
|
],
|
|
index=stats_index,
|
|
name='feat0'
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
data.stats(group_by=True),
|
|
pd.Series([
|
|
pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
|
|
pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
|
|
pd.Timedelta('5 days 00:00:00'),
|
|
2, 7, 7
|
|
],
|
|
index=stats_index,
|
|
name='group'
|
|
)
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
data['feat0'].stats(),
|
|
data.stats(column='feat0')
|
|
)
|
|
pd.testing.assert_series_equal(
|
|
data.replace(wrapper=data.wrapper.replace(group_by=True)).stats(),
|
|
data.stats(group_by=True)
|
|
)
|
|
stats_df = data.stats(agg_func=None)
|
|
assert stats_df.shape == (3, 6)
|
|
pd.testing.assert_index_equal(stats_df.index, data.wrapper.columns)
|
|
pd.testing.assert_index_equal(stats_df.columns, stats_index)
|
|
|
|
|
|
# ############# updater.py ############# #
|
|
|
|
class TestDataUpdater:
|
|
def test_update(self):
|
|
data = MyData.download(0, shape=(5,), return_arr=True)
|
|
updater = vbt.DataUpdater(data)
|
|
updater.update()
|
|
assert updater.data == data.update()
|
|
assert updater.config['data'] == data.update()
|
|
|
|
def test_update_every(self):
|
|
data = MyData.download(0, shape=(5,), return_arr=True)
|
|
kwargs = dict(call_count=0)
|
|
|
|
class DataUpdater(vbt.DataUpdater):
|
|
def update(self, kwargs):
|
|
super().update()
|
|
kwargs['call_count'] += 1
|
|
if kwargs['call_count'] == 5:
|
|
raise vbt.CancelledError
|
|
|
|
updater = DataUpdater(data)
|
|
updater.update_every(kwargs=kwargs)
|
|
for i in range(5):
|
|
data = data.update()
|
|
assert updater.data == data
|
|
assert updater.config['data'] == data
|