quant/vectorbt/tests/test_data.py

from datetime import datetime, timedelta, timezone

import numpy as np
import pandas as pd
import pytest
import pytz

import vectorbt as vbt
from vectorbt.utils.config import merge_dicts
from vectorbt.utils.datetime_ import to_timezone

seed = 42


# ############# Global ############# #

def setup_module():
    vbt.settings.numba['check_func_suffix'] = True
    vbt.settings.caching.enabled = False
    vbt.settings.caching.whitelist = []
    vbt.settings.caching.blacklist = []


def teardown_module():
    vbt.settings.reset()


# ############# base.py ############# #


class MyData(vbt.Data):
    @classmethod
    def download_symbol(cls, symbol, shape=(5, 3), start_date=datetime(2020, 1, 1), columns=None, index_mask=None,
                        column_mask=None, return_arr=False, tz_localize=None, seed=seed):
        np.random.seed(seed)
        a = np.random.uniform(size=shape) + symbol
        if return_arr:
            return a
        index = [start_date + timedelta(days=i) for i in range(a.shape[0])]
        if a.ndim == 1:
            sr = pd.Series(a, index=index, name=columns)
            if index_mask is not None:
                sr = sr.loc[index_mask]
            if tz_localize is not None:
                sr = sr.tz_localize(tz_localize)
            return sr
        df = pd.DataFrame(a, index=index, columns=columns)
        if index_mask is not None:
            df = df.loc[index_mask]
        if column_mask is not None:
            df = df.loc[:, column_mask]
        if tz_localize is not None:
            df = df.tz_localize(tz_localize)
        return df

    def update_symbol(self, symbol, n=1, **kwargs):
        download_kwargs = self.select_symbol_kwargs(symbol, self.download_kwargs)
        download_kwargs['start_date'] = self.data[symbol].index[-1]
        shape = download_kwargs.pop('shape', (5, 3))
        new_shape = (n, shape[1]) if len(shape) > 1 else (n,)
        new_seed = download_kwargs.pop('seed', seed) + 1
        kwargs = merge_dicts(download_kwargs, kwargs)
        return self.download_symbol(symbol, shape=new_shape, seed=new_seed, **kwargs)


class TestData:
    def test_config(self, tmp_path):
        data = MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])
        assert MyData.loads(data.dumps()) == data
        data.save(tmp_path / 'data')
        assert MyData.load(tmp_path / 'data') == data

    def test_download(self):
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), return_arr=True).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ]
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3), return_arr=True).data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
                ]
            )
        )
        index = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=timezone.utc
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,)).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), columns='feat0').data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index,
                name='feat0'
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3)).data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
                ],
                index=index,
                columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object'))
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,)).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,)).data[1],
            pd.Series(
                [
                    1.3745401188473625,
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366,
                    1.15601864044243652
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3)).data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3)).data[1],
            pd.DataFrame(
                [
                    [1.3745401188473625, 1.9507143064099162, 1.7319939418114051],
                    [1.5986584841970366, 1.15601864044243652, 1.15599452033620265],
                    [1.05808361216819946, 1.8661761457749352, 1.6011150117432088],
                    [1.7080725777960455, 1.020584494295802447, 1.9699098521619943],
                    [1.8324426408004217, 1.21233911067827616, 1.18182496720710062]
                ],
                index=index
            )
        )
        index2 = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=pytz.utc
        ).tz_convert(to_timezone('Europe/Berlin'))
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin').data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index2
            )
        )
        index_mask = vbt.symbol_dict({
            0: [False, True, True, True, True],
            1: [True, True, True, True, False]
        })
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[0],
            pd.Series(
                [
                    np.nan,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan').data[1],
            pd.Series(
                [
                    1.3745401188473625,
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366,
                    np.nan
                ],
                index=index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[0],
            pd.Series(
                [
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366
                ],
                index=index[1:4]
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop').data[1],
            pd.Series(
                [
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366
                ],
                index=index[1:4]
            )
        )
        column_mask = vbt.symbol_dict({
            0: [False, True, True],
            1: [True, True, False]
        })
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan').data[0],
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan],
                    [np.nan, 0.15601864044243652, 0.15599452033620265],
                    [np.nan, 0.8661761457749352, 0.6011150117432088],
                    [np.nan, 0.020584494295802447, 0.9699098521619943],
                    [np.nan, 0.21233911067827616, 0.18182496720710062]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan').data[1],
            pd.DataFrame(
                [
                    [1.3745401188473625, 1.9507143064099162, np.nan],
                    [1.5986584841970366, 1.15601864044243652, np.nan],
                    [1.05808361216819946, 1.8661761457749352, np.nan],
                    [1.7080725777960455, 1.020584494295802447, np.nan],
                    [np.nan, np.nan, np.nan]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop').data[0],
            pd.DataFrame(
                [
                    [0.15601864044243652],
                    [0.8661761457749352],
                    [0.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop').data[1],
            pd.DataFrame(
                [
                    [1.15601864044243652],
                    [1.8661761457749352],
                    [1.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )
        with pytest.raises(Exception):
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='raise', missing_columns='nan')
        with pytest.raises(Exception):
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='raise')
        with pytest.raises(Exception):
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='test', missing_columns='nan')
        with pytest.raises(Exception):
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='test')

    def test_update(self):
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), return_arr=True).update().data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896
                ]
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), return_arr=True).update(n=2).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896,
                    0.6090665392794814
                ]
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3), return_arr=True).update().data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.11505456638977896, 0.6090665392794814, 0.13339096418598828]
                ]
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3), return_arr=True).update(n=2).data[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.11505456638977896, 0.6090665392794814, 0.13339096418598828],
                    [0.24058961996534878, 0.3271390558111398, 0.8591374909485977]
                ]
            )
        )
        index = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=timezone.utc
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,)).update().data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896
                ],
                index=index
            )
        )
        updated_index = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00',
                '2020-01-06 00:00:00'
            ],
            freq='D',
            tz=timezone.utc
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,)).update(n=2).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896,
                    0.6090665392794814
                ],
                index=updated_index
            )
        )
        index2 = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=pytz.utc
        ).tz_convert(to_timezone('Europe/Berlin'))
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), tz_localize='UTC', tz_convert='Europe/Berlin')
                .update(tz_localize=None).data[0],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896
                ],
                index=index2
            )
        )
        index_mask = vbt.symbol_dict({
            0: [False, True, True, True, True],
            1: [True, True, True, True, False]
        })
        update_index_mask = vbt.symbol_dict({
            0: [True],
            1: [False]
        })
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
                .update(index_mask=update_index_mask).data[0],
            pd.Series(
                [
                    np.nan,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896
                ],
                index=index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
                .update(index_mask=update_index_mask).data[1],
            pd.Series(
                [
                    1.3745401188473625,
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366,
                    np.nan
                ],
                index=index
            )
        )
        update_index_mask2 = vbt.symbol_dict({
            0: [True, False],
            1: [False, True]
        })
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
                .update(n=2, index_mask=update_index_mask2).data[0],
            pd.Series(
                [
                    np.nan,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.11505456638977896,
                    np.nan
                ],
                index=updated_index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='nan')
                .update(n=2, index_mask=update_index_mask2).data[1],
            pd.Series(
                [
                    1.3745401188473625,
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366,
                    np.nan,
                    1.6090665392794814
                ],
                index=updated_index
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
                .update(index_mask=update_index_mask).data[0],
            pd.Series(
                [
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366
                ],
                index=index[1:4]
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
                .update(index_mask=update_index_mask).data[1],
            pd.Series(
                [
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366
                ],
                index=index[1:4]
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
                .update(n=2, index_mask=update_index_mask2).data[0],
            pd.Series(
                [
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366
                ],
                index=index[1:4]
            )
        )
        pd.testing.assert_series_equal(
            MyData.download([0, 1], shape=(5,), index_mask=index_mask, missing_index='drop')
                .update(n=2, index_mask=update_index_mask2).data[1],
            pd.Series(
                [
                    1.9507143064099162,
                    1.7319939418114051,
                    1.5986584841970366
                ],
                index=index[1:4]
            )
        )
        column_mask = vbt.symbol_dict({
            0: [False, True, True],
            1: [True, True, False]
        })
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan')
                .update(index_mask=update_index_mask).data[0],
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan],
                    [np.nan, 0.15601864044243652, 0.15599452033620265],
                    [np.nan, 0.8661761457749352, 0.6011150117432088],
                    [np.nan, 0.020584494295802447, 0.9699098521619943],
                    [np.nan, 0.6090665392794814, 0.13339096418598828]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan')
                .update(index_mask=update_index_mask).data[1],
            pd.DataFrame(
                [
                    [1.3745401188473625, 1.9507143064099162, np.nan],
                    [1.5986584841970366, 1.15601864044243652, np.nan],
                    [1.05808361216819946, 1.8661761457749352, np.nan],
                    [1.7080725777960455, 1.020584494295802447, np.nan],
                    [np.nan, np.nan, np.nan]
                ],
                index=index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan')
                .update(n=2, index_mask=update_index_mask2).data[0],
            pd.DataFrame(
                [
                    [np.nan, np.nan, np.nan],
                    [np.nan, 0.15601864044243652, 0.15599452033620265],
                    [np.nan, 0.8661761457749352, 0.6011150117432088],
                    [np.nan, 0.020584494295802447, 0.9699098521619943],
                    [np.nan, 0.6090665392794814, 0.13339096418598828],
                    [np.nan, np.nan, np.nan]
                ],
                index=updated_index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='nan', missing_columns='nan')
                .update(n=2, index_mask=update_index_mask2).data[1],
            pd.DataFrame(
                [
                    [1.3745401188473625, 1.9507143064099162, np.nan],
                    [1.5986584841970366, 1.15601864044243652, np.nan],
                    [1.05808361216819946, 1.8661761457749352, np.nan],
                    [1.7080725777960455, 1.020584494295802447, np.nan],
                    [np.nan, np.nan, np.nan],
                    [1.2405896199653488, 1.3271390558111398, np.nan]
                ],
                index=updated_index
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop')
                .update(index_mask=update_index_mask).data[0],
            pd.DataFrame(
                [
                    [0.15601864044243652],
                    [0.8661761457749352],
                    [0.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop')
                .update(index_mask=update_index_mask).data[1],
            pd.DataFrame(
                [
                    [1.15601864044243652],
                    [1.8661761457749352],
                    [1.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop')
                .update(n=2, index_mask=update_index_mask2).data[0],
            pd.DataFrame(
                [
                    [0.15601864044243652],
                    [0.8661761457749352],
                    [0.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
                            missing_index='drop', missing_columns='drop')
                .update(n=2, index_mask=update_index_mask2).data[1],
            pd.DataFrame(
                [
                    [1.15601864044243652],
                    [1.8661761457749352],
                    [1.020584494295802447]
                ],
                index=index[1:4],
                columns=pd.Index([1], dtype='int64')
            )
        )

    def test_concat(self):
        index = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=timezone.utc
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), columns='feat0').concat()['feat0'],
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index,
                name=0
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5,), columns='feat0').concat()['feat0'],
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.9507143064099162, 1.9507143064099162],
                    [0.7319939418114051, 1.7319939418114051],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.15601864044243652, 1.15601864044243652]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
            pd.Series(
                [
                    0.3745401188473625,
                    0.5986584841970366,
                    0.05808361216819946,
                    0.7080725777960455,
                    0.8324426408004217
                ],
                index=index,
                name=0
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
            pd.Series(
                [
                    0.9507143064099162,
                    0.15601864044243652,
                    0.8661761457749352,
                    0.020584494295802447,
                    0.21233911067827616
                ],
                index=index,
                name=0
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
            pd.Series(
                [
                    0.7319939418114051,
                    0.15599452033620265,
                    0.6011150117432088,
                    0.9699098521619943,
                    0.18182496720710062
                ],
                index=index,
                name=0
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat0'],
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.05808361216819946, 1.05808361216819946],
                    [0.7080725777960455, 1.7080725777960455],
                    [0.8324426408004217, 1.8324426408004217]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat1'],
            pd.DataFrame(
                [
                    [0.9507143064099162, 1.9507143064099162],
                    [0.15601864044243652, 1.15601864044243652],
                    [0.8661761457749352, 1.8661761457749352],
                    [0.020584494295802447, 1.020584494295802447],
                    [0.21233911067827616, 1.21233911067827616]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).concat()['feat2'],
            pd.DataFrame(
                [
                    [0.7319939418114051, 1.7319939418114051],
                    [0.15599452033620265, 1.15599452033620265],
                    [0.6011150117432088, 1.6011150117432088],
                    [0.9699098521619943, 1.9699098521619943],
                    [0.18182496720710062, 1.18182496720710062]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )

    def test_get(self):
        index = pd.DatetimeIndex(
            [
                '2020-01-01 00:00:00',
                '2020-01-02 00:00:00',
                '2020-01-03 00:00:00',
                '2020-01-04 00:00:00',
                '2020-01-05 00:00:00'
            ],
            freq='D',
            tz=timezone.utc
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5,), columns='feat0').get(),
            pd.Series(
                [
                    0.3745401188473625,
                    0.9507143064099162,
                    0.7319939418114051,
                    0.5986584841970366,
                    0.15601864044243652
                ],
                index=index,
                name='feat0'
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(),
            pd.DataFrame(
                [
                    [0.3745401188473625, 0.9507143064099162, 0.7319939418114051],
                    [0.5986584841970366, 0.15601864044243652, 0.15599452033620265],
                    [0.05808361216819946, 0.8661761457749352, 0.6011150117432088],
                    [0.7080725777960455, 0.020584494295802447, 0.9699098521619943],
                    [0.8324426408004217, 0.21233911067827616, 0.18182496720710062]
                ],
                index=index,
                columns=pd.Index(['feat0', 'feat1', 'feat2'], dtype='object')
            )
        )
        pd.testing.assert_series_equal(
            MyData.download(0, shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
            pd.Series(
                [
                    0.3745401188473625,
                    0.5986584841970366,
                    0.05808361216819946,
                    0.7080725777960455,
                    0.8324426408004217
                ],
                index=index,
                name='feat0'
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5,), columns='feat0').get(),
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.9507143064099162, 1.9507143064099162],
                    [0.7319939418114051, 1.7319939418114051],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.15601864044243652, 1.15601864044243652]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get('feat0'),
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.05808361216819946, 1.05808361216819946],
                    [0.7080725777960455, 1.7080725777960455],
                    [0.8324426408004217, 1.8324426408004217]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get(['feat0', 'feat1'])[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.05808361216819946, 1.05808361216819946],
                    [0.7080725777960455, 1.7080725777960455],
                    [0.8324426408004217, 1.8324426408004217]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )
        pd.testing.assert_frame_equal(
            MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).get()[0],
            pd.DataFrame(
                [
                    [0.3745401188473625, 1.3745401188473625],
                    [0.5986584841970366, 1.5986584841970366],
                    [0.05808361216819946, 1.05808361216819946],
                    [0.7080725777960455, 1.7080725777960455],
                    [0.8324426408004217, 1.8324426408004217]
                ],
                index=index,
                columns=pd.Index([0, 1], dtype='int64', name='symbol')
            )
        )

    def test_indexing(self):
        assert MyData.download([0, 1], shape=(5,), columns='feat0').iloc[:3].wrapper == \
               MyData.download([0, 1], shape=(3,), columns='feat0').wrapper
        assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2']).iloc[:3].wrapper == \
               MyData.download([0, 1], shape=(3, 3), columns=['feat0', 'feat1', 'feat2']).wrapper
        assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])['feat0'].wrapper == \
               MyData.download([0, 1], shape=(5,), columns='feat0').wrapper
        assert MyData.download([0, 1], shape=(5, 3), columns=['feat0', 'feat1', 'feat2'])[['feat0']].wrapper == \
               MyData.download([0, 1], shape=(5, 1), columns=['feat0']).wrapper

    def test_stats(self):
        index_mask = vbt.symbol_dict({
            0: [False, True, True, True, True],
            1: [True, True, True, True, False]
        })
        column_mask = vbt.symbol_dict({
            0: [False, True, True],
            1: [True, True, False]
        })
        data = MyData.download(
            [0, 1], shape=(5, 3), index_mask=index_mask, column_mask=column_mask,
            missing_index='nan', missing_columns='nan', columns=['feat0', 'feat1', 'feat2'])

        stats_index = pd.Index([
            'Start', 'End', 'Period', 'Total Symbols', 'Null Counts: 0', 'Null Counts: 1'
        ], dtype='object')
        pd.testing.assert_series_equal(
            data.stats(),
            pd.Series([
                pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
                pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
                pd.Timedelta('5 days 00:00:00'),
                2, 2.3333333333333335, 2.3333333333333335
            ],
                index=stats_index,
                name='agg_func_mean'
            )
        )
        pd.testing.assert_series_equal(
            data.stats(column='feat0'),
            pd.Series([
                pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
                pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
                pd.Timedelta('5 days 00:00:00'),
                2, 5, 1
            ],
                index=stats_index,
                name='feat0'
            )
        )
        pd.testing.assert_series_equal(
            data.stats(group_by=True),
            pd.Series([
                pd.Timestamp('2020-01-01 00:00:00+0000', tz='UTC'),
                pd.Timestamp('2020-01-05 00:00:00+0000', tz='UTC'),
                pd.Timedelta('5 days 00:00:00'),
                2, 7, 7
            ],
                index=stats_index,
                name='group'
            )
        )
        pd.testing.assert_series_equal(
            data['feat0'].stats(),
            data.stats(column='feat0')
        )
        pd.testing.assert_series_equal(
            data.replace(wrapper=data.wrapper.replace(group_by=True)).stats(),
            data.stats(group_by=True)
        )
        stats_df = data.stats(agg_func=None)
        assert stats_df.shape == (3, 6)
        pd.testing.assert_index_equal(stats_df.index, data.wrapper.columns)
        pd.testing.assert_index_equal(stats_df.columns, stats_index)


# ############# updater.py ############# #

class TestDataUpdater:
    def test_update(self):
        data = MyData.download(0, shape=(5,), return_arr=True)
        updater = vbt.DataUpdater(data)
        updater.update()
        assert updater.data == data.update()
        assert updater.config['data'] == data.update()

    def test_update_every(self):
        data = MyData.download(0, shape=(5,), return_arr=True)
        kwargs = dict(call_count=0)

        class DataUpdater(vbt.DataUpdater):
            def update(self, kwargs):
                super().update()
                kwargs['call_count'] += 1
                if kwargs['call_count'] == 5:
                    raise vbt.CancelledError

        updater = DataUpdater(data)
        updater.update_every(kwargs=kwargs)
        for i in range(5):
            data = data.update()
        assert updater.data == data
        assert updater.config['data'] == data