from datetime import datetime import numpy as np import pandas as pd import pytest from numba import njit import vectorbt as vbt from vectorbt.base import ( array_wrapper, column_grouper, combine_fns, index_fns, indexing, reshape_fns ) ray_available = True try: import ray except: ray_available = False day_dt = np.timedelta64(86400000000000) # Initialize global variables a1 = np.array([1]) a2 = np.array([1, 2, 3]) a3 = np.array([[1, 2, 3]]) a4 = np.array([[1], [2], [3]]) a5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) sr_none = pd.Series([1]) sr1 = pd.Series([1], index=pd.Index(['x1'], name='i1'), name='a1') sr2 = pd.Series([1, 2, 3], index=pd.Index(['x2', 'y2', 'z2'], name='i2'), name='a2') df_none = pd.DataFrame([[1]]) df1 = pd.DataFrame( [[1]], index=pd.Index(['x3'], name='i3'), columns=pd.Index(['a3'], name='c3')) df2 = pd.DataFrame( [[1], [2], [3]], index=pd.Index(['x4', 'y4', 'z4'], name='i4'), columns=pd.Index(['a4'], name='c4')) df3 = pd.DataFrame( [[1, 2, 3]], index=pd.Index(['x5'], name='i5'), columns=pd.Index(['a5', 'b5', 'c5'], name='c5')) df4 = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=pd.Index(['x6', 'y6', 'z6'], name='i6'), columns=pd.Index(['a6', 'b6', 'c6'], name='c6')) multi_i = pd.MultiIndex.from_arrays([['x7', 'y7', 'z7'], ['x8', 'y8', 'z8']], names=['i7', 'i8']) multi_c = pd.MultiIndex.from_arrays([['a7', 'b7', 'c7'], ['a8', 'b8', 'c8']], names=['c7', 'c8']) df5 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=multi_i, columns=multi_c) # ############# Global ############# # def setup_module(): vbt.settings.numba['check_func_suffix'] = True vbt.settings.broadcasting['index_from'] = 'stack' vbt.settings.broadcasting['columns_from'] = 'stack' vbt.settings.caching.enabled = False vbt.settings.caching.whitelist = [] vbt.settings.caching.blacklist = [] if ray_available: ray.init(local_mode=True, num_cpus=1) def teardown_module(): if ray_available: ray.shutdown() vbt.settings.reset() # ############# column_grouper.py ############# # grouped_columns = pd.MultiIndex.from_arrays([ [1, 1, 1, 1, 0, 0, 0, 0], [3, 3, 2, 2, 1, 1, 0, 0], [7, 6, 5, 4, 3, 2, 1, 0] ], names=['first', 'second', 'third']) class TestColumnGrouper: def test_group_by_to_index(self): assert not column_grouper.group_by_to_index(grouped_columns, group_by=False) assert column_grouper.group_by_to_index(grouped_columns, group_by=None) is None pd.testing.assert_index_equal( column_grouper.group_by_to_index(grouped_columns, group_by=True), pd.Index(['group'] * len(grouped_columns)) ) pd.testing.assert_index_equal( column_grouper.group_by_to_index(grouped_columns, group_by=0), pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first') ) pd.testing.assert_index_equal( column_grouper.group_by_to_index(grouped_columns, group_by='first'), pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first') ) pd.testing.assert_index_equal( column_grouper.group_by_to_index(grouped_columns, group_by=[0, 1]), pd.MultiIndex.from_tuples([ (1, 3), (1, 3), (1, 2), (1, 2), (0, 1), (0, 1), (0, 0), (0, 0) ], names=['first', 'second']) ) pd.testing.assert_index_equal( column_grouper.group_by_to_index(grouped_columns, group_by=['first', 'second']), pd.MultiIndex.from_tuples([ (1, 3), (1, 3), (1, 2), (1, 2), (0, 1), (0, 1), (0, 0), (0, 0) ], names=['first', 'second']) ) pd.testing.assert_index_equal( column_grouper.group_by_to_index( grouped_columns, group_by=np.array([3, 2, 1, 1, 1, 0, 0, 0])), pd.Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64') ) pd.testing.assert_index_equal( column_grouper.group_by_to_index( grouped_columns, group_by=pd.Index([3, 2, 1, 1, 1, 0, 0, 0], name='fourth')), pd.Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64', name='fourth') ) def test_get_groups_and_index(self): a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=None) np.testing.assert_array_equal(a, np.array([0, 1, 2, 3, 4, 5, 6, 7])) pd.testing.assert_index_equal(b, grouped_columns) a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=0) np.testing.assert_array_equal(a, np.array([0, 0, 0, 0, 1, 1, 1, 1])) pd.testing.assert_index_equal(b, pd.Index([1, 0], dtype='int64', name='first')) a, b = column_grouper.get_groups_and_index(grouped_columns, group_by=[0, 1]) np.testing.assert_array_equal(a, np.array([0, 0, 1, 1, 2, 2, 3, 3])) pd.testing.assert_index_equal(b, pd.MultiIndex.from_tuples([ (1, 3), (1, 2), (0, 1), (0, 0) ], names=['first', 'second'])) def test_get_group_lens_nb(self): np.testing.assert_array_equal( column_grouper.get_group_lens_nb(np.array([0, 0, 0, 0, 1, 1, 1, 1])), np.array([4, 4]) ) np.testing.assert_array_equal( column_grouper.get_group_lens_nb(np.array([0, 1])), np.array([1, 1]) ) np.testing.assert_array_equal( column_grouper.get_group_lens_nb(np.array([0, 0])), np.array([2]) ) np.testing.assert_array_equal( column_grouper.get_group_lens_nb(np.array([0])), np.array([1]) ) np.testing.assert_array_equal( column_grouper.get_group_lens_nb(np.array([])), np.array([]) ) with pytest.raises(Exception): column_grouper.get_group_lens_nb(np.array([1, 1, 0, 0])) with pytest.raises(Exception): column_grouper.get_group_lens_nb(np.array([0, 1, 0, 1])) def test_is_grouped(self): assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped() assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=True) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=1) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouped(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouped() assert column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=0) assert column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouped(group_by=False) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_grouped(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_is_grouping_enabled(self): assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled() assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=1) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_enabled(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled() assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=0) assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_enabled(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_grouping_enabled(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_is_grouping_disabled(self): assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled() assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=1) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_disabled(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled() assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=0) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_disabled(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_grouping_disabled(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_is_grouping_modified(self): assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified() assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=True) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=1) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_modified(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified() assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=0) assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_modified(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_grouping_modified(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_is_grouping_changed(self): assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed() assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=True) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=1) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_grouping_changed(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed() assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=0) assert column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_grouping_changed(group_by=False) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_grouping_changed(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_is_group_count_changed(self): assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed() assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=True) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=1) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0).is_group_count_changed(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed() assert column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=0) assert column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=True) assert not column_grouper.ColumnGrouper(grouped_columns).is_group_count_changed(group_by=False) assert not column_grouper.ColumnGrouper(grouped_columns, group_by=0) \ .is_group_count_changed(group_by=grouped_columns.get_level_values(0) + 1) # only labels def test_check_group_by(self): column_grouper.ColumnGrouper(grouped_columns, group_by=None, allow_enable=True).check_group_by(group_by=0) with pytest.raises(Exception): column_grouper.ColumnGrouper(grouped_columns, group_by=None, allow_enable=False).check_group_by(group_by=0) column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_disable=True).check_group_by(group_by=False) with pytest.raises(Exception): column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_disable=False).check_group_by( group_by=False) column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=True).check_group_by(group_by=1) column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=False).check_group_by( group_by=np.array([2, 2, 2, 2, 3, 3, 3, 3])) with pytest.raises(Exception): column_grouper.ColumnGrouper(grouped_columns, group_by=0, allow_modify=False).check_group_by(group_by=1) def test_resolve_group_by(self): assert column_grouper.ColumnGrouper(grouped_columns, group_by=None).resolve_group_by() is None # default pd.testing.assert_index_equal( column_grouper.ColumnGrouper(grouped_columns, group_by=None).resolve_group_by(group_by=0), # overrides pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first') ) pd.testing.assert_index_equal( column_grouper.ColumnGrouper(grouped_columns, group_by=0).resolve_group_by(), # default pd.Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first') ) pd.testing.assert_index_equal( column_grouper.ColumnGrouper(grouped_columns, group_by=0).resolve_group_by(group_by=1), # overrides pd.Index([3, 3, 2, 2, 1, 1, 0, 0], dtype='int64', name='second') ) def test_get_groups(self): np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_groups(), np.array([0, 1, 2, 3, 4, 5, 6, 7]) ) np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_groups(group_by=0), np.array([0, 0, 0, 0, 1, 1, 1, 1]) ) def test_get_columns(self): pd.testing.assert_index_equal( column_grouper.ColumnGrouper(grouped_columns).get_columns(), column_grouper.ColumnGrouper(grouped_columns).columns ) pd.testing.assert_index_equal( column_grouper.ColumnGrouper(grouped_columns).get_columns(group_by=0), pd.Index([1, 0], dtype='int64', name='first') ) def test_get_group_lens(self): np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_lens(), np.array([1, 1, 1, 1, 1, 1, 1, 1]) ) np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_lens(group_by=0), np.array([4, 4]) ) def test_get_group_start_idxs(self): np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_start_idxs(), np.array([0, 1, 2, 3, 4, 5, 6, 7]) ) np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_start_idxs(group_by=0), np.array([0, 4]) ) def test_get_group_end_idxs(self): np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_end_idxs(), np.array([1, 2, 3, 4, 5, 6, 7, 8]) ) np.testing.assert_array_equal( column_grouper.ColumnGrouper(grouped_columns).get_group_end_idxs(group_by=0), np.array([4, 8]) ) def test_eq(self): assert column_grouper.ColumnGrouper(grouped_columns) == column_grouper.ColumnGrouper(grouped_columns) assert column_grouper.ColumnGrouper(grouped_columns, group_by=0) == column_grouper.ColumnGrouper( grouped_columns, group_by=0) assert column_grouper.ColumnGrouper(grouped_columns) != 0 assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(grouped_columns, group_by=0) assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper(pd.Index([0])) assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper( grouped_columns, allow_enable=False) assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper( grouped_columns, allow_disable=False) assert column_grouper.ColumnGrouper(grouped_columns) != column_grouper.ColumnGrouper( grouped_columns, allow_modify=False) # ############# array_wrapper.py ############# # sr2_wrapper = array_wrapper.ArrayWrapper.from_obj(sr2) df2_wrapper = array_wrapper.ArrayWrapper.from_obj(df2) df4_wrapper = array_wrapper.ArrayWrapper.from_obj(df4) sr2_wrapper_co = sr2_wrapper.replace(column_only_select=True) df4_wrapper_co = df4_wrapper.replace(column_only_select=True) sr2_grouped_wrapper = sr2_wrapper.replace(group_by=np.array(['g1']), group_select=True) df4_grouped_wrapper = df4_wrapper.replace(group_by=np.array(['g1', 'g1', 'g2']), group_select=True) sr2_grouped_wrapper_co = sr2_grouped_wrapper.replace(column_only_select=True, group_select=True) df4_grouped_wrapper_co = df4_grouped_wrapper.replace(column_only_select=True, group_select=True) class TestArrayWrapper: def test_config(self, tmp_path): assert array_wrapper.ArrayWrapper.loads(sr2_wrapper.dumps()) == sr2_wrapper assert array_wrapper.ArrayWrapper.loads(sr2_wrapper_co.dumps()) == sr2_wrapper_co assert array_wrapper.ArrayWrapper.loads(sr2_grouped_wrapper.dumps()) == sr2_grouped_wrapper assert array_wrapper.ArrayWrapper.loads(sr2_grouped_wrapper_co.dumps()) == sr2_grouped_wrapper_co sr2_grouped_wrapper_co.save(tmp_path / 'sr2_grouped_wrapper_co') assert array_wrapper.ArrayWrapper.load(tmp_path / 'sr2_grouped_wrapper_co') == sr2_grouped_wrapper_co def test_indexing_func_meta(self): # not grouped a, b, c = sr2_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) assert b == 0 assert c == 0 a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[0, :2])[1:] assert a == 0 np.testing.assert_array_equal(b, np.array([0, 1])) np.testing.assert_array_equal(c, np.array([0, 1])) a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) assert b == 0 assert c == 0 a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [0]])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) np.testing.assert_array_equal(b, np.array([0])) np.testing.assert_array_equal(c, np.array([0])) a, b, c = df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) np.testing.assert_array_equal(b, np.array([0, 1])) np.testing.assert_array_equal(c, np.array([0, 1])) with pytest.raises(Exception): _ = df4_wrapper.indexing_func_meta(lambda x: x.iloc[0, 0])[1:] with pytest.raises(Exception): _ = df4_wrapper.indexing_func_meta(lambda x: x.iloc[[0], 0])[1:] # not grouped, column only a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) assert b == 0 assert c == 0 a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[[0]])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) np.testing.assert_array_equal(b, np.array([0])) np.testing.assert_array_equal(c, np.array([0])) a, b, c = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) np.testing.assert_array_equal(b, np.array([0, 1])) np.testing.assert_array_equal(c, np.array([0, 1])) with pytest.raises(Exception): _ = sr2_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:] with pytest.raises(Exception): _ = df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[:, :2])[1:] # grouped a, b, c = sr2_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) assert b == 0 assert c == 0 a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) assert b == 0 np.testing.assert_array_equal(c, np.array([0, 1])) a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 1])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) assert b == 1 assert c == 2 a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [1]])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) np.testing.assert_array_equal(b, np.array([1])) np.testing.assert_array_equal(c, np.array([2])) a, b, c = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:] np.testing.assert_array_equal(a, np.array([0, 1])) np.testing.assert_array_equal(b, np.array([0, 1])) np.testing.assert_array_equal(c, np.array([0, 1, 2])) with pytest.raises(Exception): _ = df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[0, :2])[1:] # grouped, column only a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) assert b == 0 np.testing.assert_array_equal(c, np.array([0, 1])) a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[1])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) assert b == 1 assert c == 2 a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[[1]])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) np.testing.assert_array_equal(b, np.array([1])) np.testing.assert_array_equal(c, np.array([2])) a, b, c = df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:] np.testing.assert_array_equal(a, np.array([0, 1, 2])) np.testing.assert_array_equal(b, np.array([0, 1])) np.testing.assert_array_equal(c, np.array([0, 1, 2])) def test_indexing(self): # not grouped pd.testing.assert_index_equal( sr2_wrapper.iloc[:2].index, pd.Index(['x2', 'y2'], dtype='object', name='i2')) pd.testing.assert_index_equal( sr2_wrapper.iloc[:2].columns, pd.Index(['a2'], dtype='object')) assert sr2_wrapper.iloc[:2].ndim == 1 pd.testing.assert_index_equal( df4_wrapper.iloc[0, :2].index, pd.Index(['a6', 'b6'], dtype='object', name='c6')) pd.testing.assert_index_equal( df4_wrapper.iloc[0, :2].columns, pd.Index(['x6'], dtype='object', name='i6')) assert df4_wrapper.iloc[0, :2].ndim == 1 pd.testing.assert_index_equal( df4_wrapper.iloc[:2, 0].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper.iloc[:2, 0].columns, pd.Index(['a6'], dtype='object', name='c6')) assert df4_wrapper.iloc[:2, 0].ndim == 1 pd.testing.assert_index_equal( df4_wrapper.iloc[:2, [0]].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper.iloc[:2, [0]].columns, pd.Index(['a6'], dtype='object', name='c6')) assert df4_wrapper.iloc[:2, [0]].ndim == 2 pd.testing.assert_index_equal( df4_wrapper.iloc[:2, :2].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper.iloc[:2, :2].columns, pd.Index(['a6', 'b6'], dtype='object', name='c6')) assert df4_wrapper.iloc[:2, :2].ndim == 2 # not grouped, column only pd.testing.assert_index_equal( df4_wrapper_co.iloc[0].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper_co.iloc[0].columns, pd.Index(['a6'], dtype='object', name='c6')) assert df4_wrapper_co.iloc[0].ndim == 1 pd.testing.assert_index_equal( df4_wrapper_co.iloc[[0]].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper_co.iloc[[0]].columns, pd.Index(['a6'], dtype='object', name='c6')) assert df4_wrapper_co.iloc[[0]].ndim == 2 pd.testing.assert_index_equal( df4_wrapper_co.iloc[:2].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_wrapper_co.iloc[:2].columns, pd.Index(['a6', 'b6'], dtype='object', name='c6')) assert df4_wrapper_co.iloc[:2].ndim == 2 # grouped pd.testing.assert_index_equal( sr2_grouped_wrapper.iloc[:2].index, pd.Index(['x2', 'y2'], dtype='object', name='i2')) pd.testing.assert_index_equal( sr2_grouped_wrapper.iloc[:2].columns, pd.Index(['a2'], dtype='object')) assert sr2_grouped_wrapper.iloc[:2].ndim == 1 assert sr2_grouped_wrapper.iloc[:2].grouped_ndim == 1 pd.testing.assert_index_equal( sr2_grouped_wrapper.iloc[:2].grouper.group_by, pd.Index(['g1'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 0].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 0].columns, pd.Index(['a6', 'b6'], dtype='object', name='c6')) assert df4_grouped_wrapper.iloc[:2, 0].ndim == 2 assert df4_grouped_wrapper.iloc[:2, 0].grouped_ndim == 1 pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 0].grouper.group_by, pd.Index(['g1', 'g1'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 1].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 1].columns, pd.Index(['c6'], dtype='object', name='c6')) assert df4_grouped_wrapper.iloc[:2, 1].ndim == 1 assert df4_grouped_wrapper.iloc[:2, 1].grouped_ndim == 1 pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, 1].grouper.group_by, pd.Index(['g2'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, [1]].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, [1]].columns, pd.Index(['c6'], dtype='object', name='c6')) assert df4_grouped_wrapper.iloc[:2, [1]].ndim == 2 assert df4_grouped_wrapper.iloc[:2, [1]].grouped_ndim == 2 pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, [1]].grouper.group_by, pd.Index(['g2'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, :2].index, pd.Index(['x6', 'y6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, :2].columns, pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')) assert df4_grouped_wrapper.iloc[:2, :2].ndim == 2 assert df4_grouped_wrapper.iloc[:2, :2].grouped_ndim == 2 pd.testing.assert_index_equal( df4_grouped_wrapper.iloc[:2, :2].grouper.group_by, pd.Index(['g1', 'g1', 'g2'], dtype='object')) # grouped, column only pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[0].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[0].columns, pd.Index(['a6', 'b6'], dtype='object', name='c6')) assert df4_grouped_wrapper_co.iloc[0].ndim == 2 assert df4_grouped_wrapper_co.iloc[0].grouped_ndim == 1 pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[0].grouper.group_by, pd.Index(['g1', 'g1'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[1].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[1].columns, pd.Index(['c6'], dtype='object', name='c6')) assert df4_grouped_wrapper_co.iloc[1].ndim == 1 assert df4_grouped_wrapper_co.iloc[1].grouped_ndim == 1 pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[1].grouper.group_by, pd.Index(['g2'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[[1]].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[[1]].columns, pd.Index(['c6'], dtype='object', name='c6')) assert df4_grouped_wrapper_co.iloc[[1]].ndim == 2 assert df4_grouped_wrapper_co.iloc[[1]].grouped_ndim == 2 pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[[1]].grouper.group_by, pd.Index(['g2'], dtype='object')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[:2].index, pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6')) pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[:2].columns, pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6')) assert df4_grouped_wrapper_co.iloc[:2].ndim == 2 assert df4_grouped_wrapper_co.iloc[:2].grouped_ndim == 2 pd.testing.assert_index_equal( df4_grouped_wrapper_co.iloc[:2].grouper.group_by, pd.Index(['g1', 'g1', 'g2'], dtype='object')) def test_from_obj(self): assert array_wrapper.ArrayWrapper.from_obj(sr2) == sr2_wrapper assert array_wrapper.ArrayWrapper.from_obj(df4) == df4_wrapper assert array_wrapper.ArrayWrapper.from_obj(sr2, column_only_select=True) == sr2_wrapper_co assert array_wrapper.ArrayWrapper.from_obj(df4, column_only_select=True) == df4_wrapper_co def test_from_shape(self): assert array_wrapper.ArrayWrapper.from_shape((3,)) == \ array_wrapper.ArrayWrapper( pd.RangeIndex(start=0, stop=3, step=1), pd.RangeIndex(start=0, stop=1, step=1), 1) assert array_wrapper.ArrayWrapper.from_shape((3, 3)) == \ array_wrapper.ArrayWrapper.from_obj(pd.DataFrame(np.empty((3, 3)))) def test_columns(self): pd.testing.assert_index_equal(df4_wrapper.columns, df4.columns) pd.testing.assert_index_equal(df4_grouped_wrapper.columns, df4.columns) pd.testing.assert_index_equal(df4_grouped_wrapper.get_columns(), pd.Index(['g1', 'g2'], dtype='object')) def test_name(self): assert sr2_wrapper.name == 'a2' assert df4_wrapper.name is None assert array_wrapper.ArrayWrapper.from_obj(pd.Series([0])).name is None assert sr2_grouped_wrapper.name == 'a2' assert sr2_grouped_wrapper.get_name() == 'g1' assert df4_grouped_wrapper.name is None assert df4_grouped_wrapper.get_name() is None def test_ndim(self): assert sr2_wrapper.ndim == 1 assert df4_wrapper.ndim == 2 assert sr2_grouped_wrapper.ndim == 1 assert sr2_grouped_wrapper.get_ndim() == 1 assert df4_grouped_wrapper.ndim == 2 assert df4_grouped_wrapper.get_ndim() == 2 assert df4_grouped_wrapper['g1'].ndim == 2 assert df4_grouped_wrapper['g1'].get_ndim() == 1 assert df4_grouped_wrapper['g2'].ndim == 1 assert df4_grouped_wrapper['g2'].get_ndim() == 1 def test_shape(self): assert sr2_wrapper.shape == (3,) assert df4_wrapper.shape == (3, 3) assert sr2_grouped_wrapper.shape == (3,) assert sr2_grouped_wrapper.get_shape() == (3,) assert df4_grouped_wrapper.shape == (3, 3) assert df4_grouped_wrapper.get_shape() == (3, 2) def test_shape_2d(self): assert sr2_wrapper.shape_2d == (3, 1) assert df4_wrapper.shape_2d == (3, 3) assert sr2_grouped_wrapper.shape_2d == (3, 1) assert sr2_grouped_wrapper.get_shape_2d() == (3, 1) assert df4_grouped_wrapper.shape_2d == (3, 3) assert df4_grouped_wrapper.get_shape_2d() == (3, 2) def test_freq(self): assert sr2_wrapper.freq is None assert sr2_wrapper.replace(freq='1D').freq == day_dt assert sr2_wrapper.replace(index=pd.DatetimeIndex([ datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3) ], freq='1D')).freq == day_dt assert sr2_wrapper.replace(index=pd.Index([ datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3) ])).freq == day_dt def test_to_timedelta(self): sr = pd.Series([1, 2, np.nan], index=['x', 'y', 'z'], name='name') pd.testing.assert_series_equal( array_wrapper.ArrayWrapper.from_obj(sr, freq='1 days').to_timedelta(sr), pd.Series( np.array([86400000000000, 172800000000000, 'NaT'], dtype='timedelta64[ns]'), index=sr.index, name=sr.name ) ) df = sr.to_frame() pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper.from_obj(df, freq='1 days').to_timedelta(df), pd.DataFrame( np.array([86400000000000, 172800000000000, 'NaT'], dtype='timedelta64[ns]'), index=df.index, columns=df.columns ) ) def test_wrap(self): pd.testing.assert_series_equal( array_wrapper.ArrayWrapper(index=sr1.index, columns=[0], ndim=1).wrap(a1), # empty pd.Series(a1, index=sr1.index, name=None) ) pd.testing.assert_series_equal( array_wrapper.ArrayWrapper(index=sr1.index, columns=[sr1.name], ndim=1).wrap(a1), pd.Series(a1, index=sr1.index, name=sr1.name) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=sr1.index, columns=[sr1.name], ndim=2).wrap(a1), pd.DataFrame(a1, index=sr1.index, columns=[sr1.name]) ) pd.testing.assert_series_equal( array_wrapper.ArrayWrapper(index=sr2.index, columns=[sr2.name], ndim=1).wrap(a2), pd.Series(a2, index=sr2.index, name=sr2.name) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=sr2.index, columns=[sr2.name], ndim=2).wrap(a2), pd.DataFrame(a2, index=sr2.index, columns=[sr2.name]) ) pd.testing.assert_series_equal( array_wrapper.ArrayWrapper(index=df2.index, columns=df2.columns, ndim=1).wrap(a2), pd.Series(a2, index=df2.index, name=df2.columns[0]) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=df2.index, columns=df2.columns, ndim=2).wrap(a2), pd.DataFrame(a2, index=df2.index, columns=df2.columns) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper.from_obj(df2).wrap(a2, index=df4.index), pd.DataFrame(a2, index=df4.index, columns=df2.columns) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2).wrap( np.array([[0, 0, np.nan], [1, np.nan, 1], [2, 2, np.nan]]), fillna=-1 ), pd.DataFrame([ [0., 0., -1.], [1., -1., 1.], [2., 2., -1.] ], index=df4.index, columns=df4.columns) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2).wrap( np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]), to_index=True ), pd.DataFrame([ ['x6', 'x6', 'x6'], ['y6', 'y6', 'y6'], ['z6', 'z6', 'z6'] ], index=df4.index, columns=df4.columns) ) pd.testing.assert_frame_equal( array_wrapper.ArrayWrapper(index=df4.index, columns=df4.columns, ndim=2, freq='d').wrap( np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2]]), to_timedelta=True ), pd.DataFrame([ [pd.Timedelta(days=0), pd.Timedelta(days=0), pd.Timedelta(days=0)], [pd.Timedelta(days=1), pd.Timedelta(days=1), pd.Timedelta(days=1)], [pd.Timedelta(days=2), pd.Timedelta(days=2), pd.Timedelta(days=2)] ], index=df4.index, columns=df4.columns) ) def test_wrap_reduced(self): # sr to value assert sr2_wrapper.wrap_reduced(0) == 0 assert sr2_wrapper.wrap_reduced(np.array([0])) == 0 # result of computation on 2d # sr to array pd.testing.assert_series_equal( sr2_wrapper.wrap_reduced(np.array([0, 1])), pd.Series(np.array([0, 1]), name=sr2.name) ) pd.testing.assert_series_equal( sr2_wrapper.wrap_reduced(np.array([0, 1]), name_or_index=['x', 'y']), pd.Series(np.array([0, 1]), index=['x', 'y'], name=sr2.name) ) pd.testing.assert_series_equal( sr2_wrapper.wrap_reduced(np.array([0, 1]), name_or_index=['x', 'y'], columns=[0]), pd.Series(np.array([0, 1]), index=['x', 'y'], name=None) ) # df to value assert df2_wrapper.wrap_reduced(0) == 0 assert df4_wrapper.wrap_reduced(0) == 0 # df to value per column pd.testing.assert_series_equal( df4_wrapper.wrap_reduced(np.array([0, 1, 2]), name_or_index='test'), pd.Series(np.array([0, 1, 2]), index=df4.columns, name='test') ) pd.testing.assert_series_equal( df4_wrapper.wrap_reduced(np.array([0, 1, 2]), columns=['m', 'n', 'l'], name_or_index='test'), pd.Series(np.array([0, 1, 2]), index=['m', 'n', 'l'], name='test') ) # df to array per column pd.testing.assert_frame_equal( df4_wrapper.wrap_reduced(np.array([[0, 1, 2], [3, 4, 5]]), name_or_index=['x', 'y']), pd.DataFrame(np.array([[0, 1, 2], [3, 4, 5]]), index=['x', 'y'], columns=df4.columns) ) pd.testing.assert_frame_equal( df4_wrapper.wrap_reduced( np.array([[0, 1, 2], [3, 4, 5]]), name_or_index=['x', 'y'], columns=['m', 'n', 'l']), pd.DataFrame(np.array([[0, 1, 2], [3, 4, 5]]), index=['x', 'y'], columns=['m', 'n', 'l']) ) def test_grouped_wrapping(self): pd.testing.assert_frame_equal( df4_grouped_wrapper_co.wrap(np.array([[1, 2], [3, 4], [5, 6]])), pd.DataFrame(np.array([ [1, 2], [3, 4], [5, 6] ]), index=df4.index, columns=pd.Index(['g1', 'g2'], dtype='object')) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2])), pd.Series(np.array([1, 2]), index=pd.Index(['g1', 'g2'], dtype='object')) ) pd.testing.assert_frame_equal( df4_grouped_wrapper_co.wrap(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), group_by=False), pd.DataFrame(np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=df4.index, columns=df4.columns) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2, 3]), group_by=False), pd.Series(np.array([1, 2, 3]), index=df4.columns) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[0].wrap(np.array([1, 2, 3])), pd.Series(np.array([1, 2, 3]), index=df4.index, name='g1') ) assert df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1])) == 1 pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[0].wrap(np.array([[1], [2], [3]])), pd.Series(np.array([1, 2, 3]), index=df4.index, name='g1') ) pd.testing.assert_frame_equal( df4_grouped_wrapper_co.iloc[0].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False), pd.DataFrame(np.array([ [1, 2], [3, 4], [5, 6] ]), index=df4.index, columns=df4.columns[:2]) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1, 2]), group_by=False), pd.Series(np.array([1, 2]), index=df4.columns[:2]) ) pd.testing.assert_frame_equal( df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([1, 2, 3])), pd.DataFrame(np.array([ [1], [2], [3] ]), index=df4.index, columns=pd.Index(['g1'], dtype='object')) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1])), pd.Series(np.array([1]), index=pd.Index(['g1'], dtype='object')) ) pd.testing.assert_frame_equal( df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False), pd.DataFrame(np.array([ [1, 2], [3, 4], [5, 6] ]), index=df4.index, columns=df4.columns[:2]) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1, 2]), group_by=False), pd.Series(np.array([1, 2]), index=df4.columns[:2]) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3])), pd.Series(np.array([1, 2, 3]), index=df4.index, name='g2') ) assert df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1])) == 1 pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3]), group_by=False), pd.Series(np.array([1, 2, 3]), index=df4.index, name=df4.columns[2]) ) assert df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1]), group_by=False) == 1 pd.testing.assert_frame_equal( df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3])), pd.DataFrame(np.array([ [1], [2], [3] ]), index=df4.index, columns=pd.Index(['g2'], dtype='object')) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1])), pd.Series(np.array([1]), index=pd.Index(['g2'], dtype='object')) ) pd.testing.assert_frame_equal( df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3]), group_by=False), pd.DataFrame(np.array([ [1], [2], [3] ]), index=df4.index, columns=df4.columns[2:]) ) pd.testing.assert_series_equal( df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1]), group_by=False), pd.Series(np.array([1]), index=df4.columns[2:]) ) def test_dummy(self): pd.testing.assert_index_equal( sr2_wrapper.dummy().index, sr2_wrapper.index ) pd.testing.assert_index_equal( sr2_wrapper.dummy().to_frame().columns, sr2_wrapper.columns ) pd.testing.assert_index_equal( df4_wrapper.dummy().index, df4_wrapper.index ) pd.testing.assert_index_equal( df4_wrapper.dummy().columns, df4_wrapper.columns ) pd.testing.assert_index_equal( sr2_grouped_wrapper.dummy().index, sr2_grouped_wrapper.index ) pd.testing.assert_index_equal( sr2_grouped_wrapper.dummy().to_frame().columns, sr2_grouped_wrapper.get_columns() ) pd.testing.assert_index_equal( df4_grouped_wrapper.dummy().index, df4_grouped_wrapper.index ) pd.testing.assert_index_equal( df4_grouped_wrapper.dummy().columns, df4_grouped_wrapper.get_columns() ) sr2_wrapping = array_wrapper.Wrapping(sr2_wrapper) df4_wrapping = array_wrapper.Wrapping(df4_wrapper) sr2_grouped_wrapping = array_wrapper.Wrapping(sr2_grouped_wrapper) df4_grouped_wrapping = array_wrapper.Wrapping(df4_grouped_wrapper) class TestWrapping: def test_regroup(self): assert df4_wrapping.regroup(None) == df4_wrapping assert df4_wrapping.regroup(False) == df4_wrapping assert df4_grouped_wrapping.regroup(None) == df4_grouped_wrapping assert df4_grouped_wrapping.regroup(df4_grouped_wrapper.grouper.group_by) == df4_grouped_wrapping pd.testing.assert_index_equal( df4_wrapping.regroup(df4_grouped_wrapper.grouper.group_by).wrapper.grouper.group_by, df4_grouped_wrapper.grouper.group_by ) assert df4_grouped_wrapping.regroup(False).wrapper.grouper.group_by is None def test_select_one(self): assert sr2_wrapping.select_one() == sr2_wrapping assert sr2_grouped_wrapping.select_one() == sr2_grouped_wrapping pd.testing.assert_index_equal( df4_wrapping.select_one(column='a6').wrapper.get_columns(), pd.Index(['a6'], dtype='object', name='c6') ) pd.testing.assert_index_equal( df4_grouped_wrapping.select_one(column='g1').wrapper.get_columns(), pd.Index(['g1'], dtype='object') ) with pytest.raises(Exception): df4_wrapping.select_one() with pytest.raises(Exception): df4_grouped_wrapping.select_one() # ############# index_fns.py ############# # class TestIndexFns: def test_get_index(self): pd.testing.assert_index_equal(index_fns.get_index(sr1, 0), sr1.index) pd.testing.assert_index_equal(index_fns.get_index(sr1, 1), pd.Index([sr1.name])) pd.testing.assert_index_equal(index_fns.get_index(pd.Series([1, 2, 3]), 1), pd.Index([0])) # empty pd.testing.assert_index_equal(index_fns.get_index(df1, 0), df1.index) pd.testing.assert_index_equal(index_fns.get_index(df1, 1), df1.columns) def test_index_from_values(self): pd.testing.assert_index_equal( index_fns.index_from_values([0.1, 0.2], name='a'), pd.Index([0.1, 0.2], dtype='float64', name='a') ) pd.testing.assert_index_equal( index_fns.index_from_values(np.tile(np.arange(1, 4)[:, None][:, None], (1, 3, 3)), name='b'), pd.Index([1, 2, 3], dtype='int64', name='b') ) pd.testing.assert_index_equal( index_fns.index_from_values(np.random.uniform(size=(3, 3, 3)), name='c'), pd.Index(['array_0', 'array_1', 'array_2'], dtype='object', name='c') ) pd.testing.assert_index_equal( index_fns.index_from_values([(1, 2), (3, 4), (5, 6)], name='c'), pd.Index(['tuple_0', 'tuple_1', 'tuple_2'], dtype='object', name='c') ) class A: pass class B: pass class C: pass pd.testing.assert_index_equal( index_fns.index_from_values([A(), B(), C()], name='c'), pd.Index(['A_0', 'B_1', 'C_2'], dtype='object', name='c') ) def test_repeat_index(self): i = pd.Index([1, 2, 3], name='i') pd.testing.assert_index_equal( index_fns.repeat_index(i, 3), pd.Index([1, 1, 1, 2, 2, 2, 3, 3, 3], dtype='int64', name='i') ) pd.testing.assert_index_equal( index_fns.repeat_index(multi_i, 3), pd.MultiIndex.from_tuples([ ('x7', 'x8'), ('x7', 'x8'), ('x7', 'x8'), ('y7', 'y8'), ('y7', 'y8'), ('y7', 'y8'), ('z7', 'z8'), ('z7', 'z8'), ('z7', 'z8') ], names=['i7', 'i8']) ) pd.testing.assert_index_equal( index_fns.repeat_index([0], 3), # empty pd.Index([0, 1, 2], dtype='int64') ) pd.testing.assert_index_equal( index_fns.repeat_index(sr_none.index, 3), # simple range pd.RangeIndex(start=0, stop=3, step=1) ) def test_tile_index(self): i = pd.Index([1, 2, 3], name='i') pd.testing.assert_index_equal( index_fns.tile_index(i, 3), pd.Index([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype='int64', name='i') ) pd.testing.assert_index_equal( index_fns.tile_index(multi_i, 3), pd.MultiIndex.from_tuples([ ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8'), ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8'), ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=['i7', 'i8']) ) pd.testing.assert_index_equal( index_fns.tile_index([0], 3), # empty pd.Index([0, 1, 2], dtype='int64') ) pd.testing.assert_index_equal( index_fns.tile_index(sr_none.index, 3), # simple range pd.RangeIndex(start=0, stop=3, step=1) ) def test_stack_indexes(self): pd.testing.assert_index_equal( index_fns.stack_indexes([sr2.index, df2.index, df5.index]), pd.MultiIndex.from_tuples([ ('x2', 'x4', 'x7', 'x8'), ('y2', 'y4', 'y7', 'y8'), ('z2', 'z4', 'z7', 'z8') ], names=['i2', 'i4', 'i7', 'i8']) ) pd.testing.assert_index_equal( index_fns.stack_indexes([sr2.index, df2.index, sr2.index], drop_duplicates=False), pd.MultiIndex.from_tuples([ ('x2', 'x4', 'x2'), ('y2', 'y4', 'y2'), ('z2', 'z4', 'z2') ], names=['i2', 'i4', 'i2']) ) pd.testing.assert_index_equal( index_fns.stack_indexes([sr2.index, df2.index, sr2.index], drop_duplicates=True), pd.MultiIndex.from_tuples([ ('x4', 'x2'), ('y4', 'y2'), ('z4', 'z2') ], names=['i4', 'i2']) ) pd.testing.assert_index_equal( index_fns.stack_indexes([pd.Index([1, 1]), pd.Index([2, 3])], drop_redundant=True), pd.Index([2, 3]) ) def test_combine_indexes(self): pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=False), pd.MultiIndex.from_tuples([ (1, 2), (1, 3) ]) ) pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=True), pd.Index([2, 3], dtype='int64') ) pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1], name='i'), pd.Index([2, 3])], drop_redundant=True), pd.MultiIndex.from_tuples([ (1, 2), (1, 3) ], names=['i', None]) ) pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1, 2]), pd.Index([3])], drop_redundant=False), pd.MultiIndex.from_tuples([ (1, 3), (2, 3) ]) ) pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1, 2]), pd.Index([3])], drop_redundant=True), pd.Index([1, 2], dtype='int64') ) pd.testing.assert_index_equal( index_fns.combine_indexes([pd.Index([1]), pd.Index([2, 3])], drop_redundant=(False, True)), pd.Index([2, 3], dtype='int64') ) pd.testing.assert_index_equal( index_fns.combine_indexes([df2.index, df5.index]), pd.MultiIndex.from_tuples([ ('x4', 'x7', 'x8'), ('x4', 'y7', 'y8'), ('x4', 'z7', 'z8'), ('y4', 'x7', 'x8'), ('y4', 'y7', 'y8'), ('y4', 'z7', 'z8'), ('z4', 'x7', 'x8'), ('z4', 'y7', 'y8'), ('z4', 'z7', 'z8') ], names=['i4', 'i7', 'i8']) ) def test_drop_levels(self): pd.testing.assert_index_equal( index_fns.drop_levels(multi_i, 'i7'), pd.Index(['x8', 'y8', 'z8'], dtype='object', name='i8') ) pd.testing.assert_index_equal( index_fns.drop_levels(multi_i, 'i8'), pd.Index(['x7', 'y7', 'z7'], dtype='object', name='i7') ) pd.testing.assert_index_equal( index_fns.drop_levels(multi_i, 'i9', strict=False), multi_i ) with pytest.raises(Exception): _ = index_fns.drop_levels(multi_i, 'i9') pd.testing.assert_index_equal( index_fns.drop_levels(multi_i, ['i7', 'i8'], strict=False), # won't do anything pd.MultiIndex.from_tuples([ ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=['i7', 'i8']) ) with pytest.raises(Exception): _ = index_fns.drop_levels(multi_i, ['i7', 'i8']) def test_rename_levels(self): i = pd.Index([1, 2, 3], name='i') pd.testing.assert_index_equal( index_fns.rename_levels(i, {'i': 'f'}), pd.Index([1, 2, 3], dtype='int64', name='f') ) pd.testing.assert_index_equal( index_fns.rename_levels(i, {'a': 'b'}, strict=False), i ) with pytest.raises(Exception): _ = index_fns.rename_levels(i, {'a': 'b'}, strict=True) pd.testing.assert_index_equal( index_fns.rename_levels(multi_i, {'i7': 'f7', 'i8': 'f8'}), pd.MultiIndex.from_tuples([ ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=['f7', 'f8']) ) def test_select_levels(self): pd.testing.assert_index_equal( index_fns.select_levels(multi_i, 'i7'), pd.Index(['x7', 'y7', 'z7'], dtype='object', name='i7') ) pd.testing.assert_index_equal( index_fns.select_levels(multi_i, ['i7']), pd.MultiIndex.from_tuples([ ('x7',), ('y7',), ('z7',) ], names=['i7']) ) pd.testing.assert_index_equal( index_fns.select_levels(multi_i, ['i7', 'i8']), pd.MultiIndex.from_tuples([ ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=['i7', 'i8']) ) def test_drop_redundant_levels(self): pd.testing.assert_index_equal( index_fns.drop_redundant_levels(pd.Index(['a', 'a'])), pd.Index(['a', 'a'], dtype='object') ) # if one unnamed, leaves as-is pd.testing.assert_index_equal( index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']])), pd.MultiIndex.from_tuples([ ('a', 'b'), ('a', 'b') ]) # if all unnamed, leaves as-is ) pd.testing.assert_index_equal( index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']], names=['hi', None])), pd.Index(['a', 'a'], dtype='object', name='hi') # removes level with single unnamed value ) pd.testing.assert_index_equal( index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'b'], ['a', 'b']], names=['hi', 'hi2'])), pd.MultiIndex.from_tuples([ ('a', 'a'), ('b', 'b') ], names=['hi', 'hi2']) # legit ) pd.testing.assert_index_equal( # ignores 0-to-n index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=[None, 'hi2'])), pd.Index(['a', 'b'], dtype='object', name='hi2') ) pd.testing.assert_index_equal( # legit index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 2], ['a', 'b']], names=[None, 'hi2'])), pd.MultiIndex.from_tuples([ (0, 'a'), (2, 'b') ], names=[None, 'hi2']) ) pd.testing.assert_index_equal( # legit (w/ name) index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=['hi', 'hi2'])), pd.MultiIndex.from_tuples([ (0, 'a'), (1, 'b') ], names=['hi', 'hi2']) ) def test_drop_duplicate_levels(self): pd.testing.assert_index_equal( index_fns.drop_duplicate_levels(pd.MultiIndex.from_arrays( [[1, 2, 3], [1, 2, 3]], names=['a', 'a'])), pd.Index([1, 2, 3], dtype='int64', name='a') ) pd.testing.assert_index_equal( index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples( [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='last'), pd.MultiIndex.from_tuples([ (0, 2, 1), ('a', 'c', 'b') ], names=['x', 'z', 'y']) ) pd.testing.assert_index_equal( index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples( [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='first'), pd.MultiIndex.from_tuples([ (0, 1, 2), ('a', 'b', 'c') ], names=['x', 'y', 'z']) ) def test_align_index_to(self): index1 = pd.Index(['c', 'b', 'a'], name='name1') assert index_fns.align_index_to(index1, index1) == pd.IndexSlice[:] index2 = pd.Index(['a', 'b', 'c', 'a', 'b', 'c'], name='name1') np.testing.assert_array_equal( index_fns.align_index_to(index1, index2), np.array([2, 1, 0, 2, 1, 0]) ) with pytest.raises(Exception): index_fns.align_index_to(pd.Index(['a']), pd.Index(['a', 'b', 'c'])) index3 = pd.MultiIndex.from_tuples([ (0, 'c'), (0, 'b'), (0, 'a'), (1, 'c'), (1, 'b'), (1, 'a') ], names=['name2', 'name1']) np.testing.assert_array_equal( index_fns.align_index_to(index1, index3), np.array([0, 1, 2, 0, 1, 2]) ) with pytest.raises(Exception): index_fns.align_index_to( pd.Index(['b', 'a'], name='name1'), index3 ) with pytest.raises(Exception): index_fns.align_index_to( pd.Index(['c', 'b', 'a', 'a'], name='name1'), index3 ) index4 = pd.MultiIndex.from_tuples([ (0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c') ], names=['name2', 'name1']) np.testing.assert_array_equal( index_fns.align_index_to(index1, index4), np.array([2, 1, 0, 2, 1, 0]) ) def test_align_indexes(self): index1 = pd.Index(['a', 'b', 'c']) index2 = pd.MultiIndex.from_tuples([ (0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c') ]) index3 = pd.MultiIndex.from_tuples([ (2, 0, 'a'), (2, 0, 'b'), (2, 0, 'c'), (2, 1, 'a'), (2, 1, 'b'), (2, 1, 'c'), (3, 0, 'a'), (3, 0, 'b'), (3, 0, 'c'), (3, 1, 'a'), (3, 1, 'b'), (3, 1, 'c') ]) indices1, indices2, indices3 = index_fns.align_indexes([index1, index2, index3]) np.testing.assert_array_equal( indices1, np.array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]) ) np.testing.assert_array_equal( indices2, np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]) ) assert indices3 == pd.IndexSlice[:] def test_pick_levels(self): index = index_fns.stack_indexes([multi_i, multi_c]) assert index_fns.pick_levels(index, required_levels=[], optional_levels=[]) \ == ([], []) assert index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8', 'i7'], optional_levels=[]) \ == ([3, 2, 1, 0], []) assert index_fns.pick_levels(index, required_levels=['c8', None, 'i8', 'i7'], optional_levels=[]) \ == ([3, 2, 1, 0], []) assert index_fns.pick_levels(index, required_levels=[None, 'c7', 'i8', 'i7'], optional_levels=[]) \ == ([3, 2, 1, 0], []) assert index_fns.pick_levels(index, required_levels=[None, None, None, None], optional_levels=[]) \ == ([0, 1, 2, 3], []) assert index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8'], optional_levels=['i7']) \ == ([3, 2, 1], [0]) assert index_fns.pick_levels(index, required_levels=['c8', None, 'i8'], optional_levels=['i7']) \ == ([3, 2, 1], [0]) assert index_fns.pick_levels(index, required_levels=[None, 'c7', 'i8'], optional_levels=['i7']) \ == ([3, 2, 1], [0]) assert index_fns.pick_levels(index, required_levels=[None, None, None, None], optional_levels=[None]) \ == ([0, 1, 2, 3], [None]) with pytest.raises(Exception): index_fns.pick_levels(index, required_levels=['i8', 'i8', 'i8', 'i8'], optional_levels=[]) with pytest.raises(Exception): index_fns.pick_levels(index, required_levels=['c8', 'c7', 'i8', 'i7'], optional_levels=['i7']) # ############# reshape_fns.py ############# # class TestReshapeFns: def test_soft_to_ndim(self): np.testing.assert_array_equal(reshape_fns.soft_to_ndim(a2, 1), a2) pd.testing.assert_series_equal(reshape_fns.soft_to_ndim(sr2, 1), sr2) pd.testing.assert_series_equal(reshape_fns.soft_to_ndim(df2, 1), df2.iloc[:, 0]) pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(df4, 1), df4) # cannot -> do nothing np.testing.assert_array_equal(reshape_fns.soft_to_ndim(a2, 2), a2[:, None]) pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(sr2, 2), sr2.to_frame()) pd.testing.assert_frame_equal(reshape_fns.soft_to_ndim(df2, 2), df2) def test_to_1d(self): np.testing.assert_array_equal(reshape_fns.to_1d(None), np.asarray([None])) np.testing.assert_array_equal(reshape_fns.to_1d(0), np.asarray([0])) np.testing.assert_array_equal(reshape_fns.to_1d(a2), a2) pd.testing.assert_series_equal(reshape_fns.to_1d(sr2), sr2) pd.testing.assert_series_equal(reshape_fns.to_1d(df2), df2.iloc[:, 0]) np.testing.assert_array_equal(reshape_fns.to_1d(df2, raw=True), df2.iloc[:, 0].values) def test_to_2d(self): np.testing.assert_array_equal(reshape_fns.to_2d(None), np.asarray([[None]])) np.testing.assert_array_equal(reshape_fns.to_2d(0), np.asarray([[0]])) np.testing.assert_array_equal(reshape_fns.to_2d(a2), a2[:, None]) pd.testing.assert_frame_equal(reshape_fns.to_2d(sr2), sr2.to_frame()) pd.testing.assert_frame_equal(reshape_fns.to_2d(df2), df2) np.testing.assert_array_equal(reshape_fns.to_2d(df2, raw=True), df2.values) def test_repeat_axis0(self): target = np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]) np.testing.assert_array_equal(reshape_fns.repeat(0, 3, axis=0), np.full(3, 0)) np.testing.assert_array_equal( reshape_fns.repeat(a2, 3, axis=0), target) pd.testing.assert_series_equal( reshape_fns.repeat(sr2, 3, axis=0), pd.Series(target, index=index_fns.repeat_index(sr2.index, 3), name=sr2.name)) pd.testing.assert_frame_equal( reshape_fns.repeat(df2, 3, axis=0), pd.DataFrame(target, index=index_fns.repeat_index(df2.index, 3), columns=df2.columns)) def test_repeat_axis1(self): target = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) np.testing.assert_array_equal(reshape_fns.repeat(0, 3, axis=1), np.full((1, 3), 0)) np.testing.assert_array_equal( reshape_fns.repeat(a2, 3, axis=1), target) pd.testing.assert_frame_equal( reshape_fns.repeat(sr2, 3, axis=1), pd.DataFrame(target, index=sr2.index, columns=index_fns.repeat_index([sr2.name], 3))) pd.testing.assert_frame_equal( reshape_fns.repeat(df2, 3, axis=1), pd.DataFrame(target, index=df2.index, columns=index_fns.repeat_index(df2.columns, 3))) def test_tile_axis0(self): target = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) np.testing.assert_array_equal(reshape_fns.tile(0, 3, axis=0), np.full(3, 0)) np.testing.assert_array_equal( reshape_fns.tile(a2, 3, axis=0), target) pd.testing.assert_series_equal( reshape_fns.tile(sr2, 3, axis=0), pd.Series(target, index=index_fns.tile_index(sr2.index, 3), name=sr2.name)) pd.testing.assert_frame_equal( reshape_fns.tile(df2, 3, axis=0), pd.DataFrame(target, index=index_fns.tile_index(df2.index, 3), columns=df2.columns)) def test_tile_axis1(self): target = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) np.testing.assert_array_equal(reshape_fns.tile(0, 3, axis=1), np.full((1, 3), 0)) np.testing.assert_array_equal( reshape_fns.tile(a2, 3, axis=1), target) pd.testing.assert_frame_equal( reshape_fns.tile(sr2, 3, axis=1), pd.DataFrame(target, index=sr2.index, columns=index_fns.tile_index([sr2.name], 3))) pd.testing.assert_frame_equal( reshape_fns.tile(df2, 3, axis=1), pd.DataFrame(target, index=df2.index, columns=index_fns.tile_index(df2.columns, 3))) def test_broadcast_numpy(self): # 1d to_broadcast = 0, a1, a2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast(*to_broadcast) for i in range(len(broadcasted)): np.testing.assert_array_equal( broadcasted[i], broadcasted_arrs[i] ) # 2d to_broadcast = 0, a1, a2, a3, a4, a5 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast(*to_broadcast) for i in range(len(broadcasted)): np.testing.assert_array_equal( broadcasted[i], broadcasted_arrs[i] ) def test_broadcast_stack(self): # 1d to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from='stack', columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_series_equal( broadcasted[i], pd.Series( broadcasted_arrs[i], index=pd.MultiIndex.from_tuples([ ('x1', 'x2'), ('x1', 'y2'), ('x1', 'z2') ], names=['i1', 'i2']), name=None ) ) # 2d to_broadcast_a = 0, a1, a2, a3, a4, a5 to_broadcast_sr = sr_none, sr1, sr2 to_broadcast_df = df_none, df1, df2, df3, df4 broadcasted_arrs = list(np.broadcast_arrays( *to_broadcast_a, *[x.to_frame() for x in to_broadcast_sr], # here is the difference *to_broadcast_df )) broadcasted = reshape_fns.broadcast( *to_broadcast_a, *to_broadcast_sr, *to_broadcast_df, index_from='stack', columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_frame_equal( broadcasted[i], pd.DataFrame( broadcasted_arrs[i], index=pd.MultiIndex.from_tuples([ ('x1', 'x2', 'x3', 'x4', 'x5', 'x6'), ('x1', 'y2', 'x3', 'y4', 'x5', 'y6'), ('x1', 'z2', 'x3', 'z4', 'x5', 'z6') ], names=['i1', 'i2', 'i3', 'i4', 'i5', 'i6']), columns=pd.MultiIndex.from_tuples([ ('a3', 'a4', 'a5', 'a6'), ('a3', 'a4', 'b5', 'b6'), ('a3', 'a4', 'c5', 'c6') ], names=['c3', 'c4', 'c5', 'c6']) ) ) broadcasted = reshape_fns.broadcast( pd.DataFrame([[1, 2, 3]], columns=pd.Index(['a', 'b', 'c'], name='i1')), pd.DataFrame([[4, 5, 6]], columns=pd.Index(['a', 'b', 'c'], name='i2')), index_from='stack', columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) pd.testing.assert_frame_equal( broadcasted[0], pd.DataFrame([[1, 2, 3]], columns=pd.MultiIndex.from_tuples([ ('a', 'a'), ('b', 'b'), ('c', 'c') ], names=['i1', 'i2'])) ) pd.testing.assert_frame_equal( broadcasted[1], pd.DataFrame([[4, 5, 6]], columns=pd.MultiIndex.from_tuples([ ('a', 'a'), ('b', 'b'), ('c', 'c') ], names=['i1', 'i2'])) ) def test_broadcast_keep(self): # 1d to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from='keep', columns_from='keep', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(4): pd.testing.assert_series_equal( broadcasted[i], pd.Series(broadcasted_arrs[i], index=pd.RangeIndex(start=0, stop=3, step=1)) ) pd.testing.assert_series_equal( broadcasted[4], pd.Series(broadcasted_arrs[4], index=pd.Index(['x1', 'x1', 'x1'], name='i1'), name=sr1.name) ) pd.testing.assert_series_equal( broadcasted[5], pd.Series(broadcasted_arrs[5], index=sr2.index, name=sr2.name) ) # 2d to_broadcast_a = 0, a1, a2, a3, a4, a5 to_broadcast_sr = sr_none, sr1, sr2 to_broadcast_df = df_none, df1, df2, df3, df4 broadcasted_arrs = list(np.broadcast_arrays( *to_broadcast_a, *[x.to_frame() for x in to_broadcast_sr], # here is the difference *to_broadcast_df )) broadcasted = reshape_fns.broadcast( *to_broadcast_a, *to_broadcast_sr, *to_broadcast_df, index_from='keep', columns_from='keep', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(7): pd.testing.assert_frame_equal( broadcasted[i], pd.DataFrame( broadcasted_arrs[i], index=pd.RangeIndex(start=0, stop=3, step=1), columns=pd.RangeIndex(start=0, stop=3, step=1) ) ) pd.testing.assert_frame_equal( broadcasted[7], pd.DataFrame( broadcasted_arrs[7], index=pd.Index(['x1', 'x1', 'x1'], dtype='object', name='i1'), columns=pd.Index(['a1', 'a1', 'a1'], dtype='object') ) ) pd.testing.assert_frame_equal( broadcasted[8], pd.DataFrame( broadcasted_arrs[8], index=sr2.index, columns=pd.Index(['a2', 'a2', 'a2'], dtype='object') ) ) pd.testing.assert_frame_equal( broadcasted[9], pd.DataFrame( broadcasted_arrs[9], index=pd.RangeIndex(start=0, stop=3, step=1), columns=pd.RangeIndex(start=0, stop=3, step=1) ) ) pd.testing.assert_frame_equal( broadcasted[10], pd.DataFrame( broadcasted_arrs[10], index=pd.Index(['x3', 'x3', 'x3'], dtype='object', name='i3'), columns=pd.Index(['a3', 'a3', 'a3'], dtype='object', name='c3') ) ) pd.testing.assert_frame_equal( broadcasted[11], pd.DataFrame( broadcasted_arrs[11], index=df2.index, columns=pd.Index(['a4', 'a4', 'a4'], dtype='object', name='c4') ) ) pd.testing.assert_frame_equal( broadcasted[12], pd.DataFrame( broadcasted_arrs[12], index=pd.Index(['x5', 'x5', 'x5'], dtype='object', name='i5'), columns=df3.columns ) ) pd.testing.assert_frame_equal( broadcasted[13], pd.DataFrame( broadcasted_arrs[13], index=df4.index, columns=df4.columns ) ) def test_broadcast_specify(self): # 1d to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from=multi_i, columns_from=['name'], # should translate to Series name drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_series_equal( broadcasted[i], pd.Series( broadcasted_arrs[i], index=multi_i, name='name' ) ) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from=multi_i, columns_from=[0], # should translate to None drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_series_equal( broadcasted[i], pd.Series( broadcasted_arrs[i], index=multi_i, name=None ) ) # 2d to_broadcast_a = 0, a1, a2, a3, a4, a5 to_broadcast_sr = sr_none, sr1, sr2 to_broadcast_df = df_none, df1, df2, df3, df4 broadcasted_arrs = list(np.broadcast_arrays( *to_broadcast_a, *[x.to_frame() for x in to_broadcast_sr], # here is the difference *to_broadcast_df )) broadcasted = reshape_fns.broadcast( *to_broadcast_a, *to_broadcast_sr, *to_broadcast_df, index_from=multi_i, columns_from=multi_c, drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_frame_equal( broadcasted[i], pd.DataFrame( broadcasted_arrs[i], index=multi_i, columns=multi_c ) ) def test_broadcast_idx(self): # 1d to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from=-1, columns_from=-1, # should translate to Series name drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_series_equal( broadcasted[i], pd.Series( broadcasted_arrs[i], index=sr2.index, name=sr2.name ) ) with pytest.raises(Exception): _ = reshape_fns.broadcast( *to_broadcast, index_from=0, columns_from=0, drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) # 2d to_broadcast_a = 0, a1, a2, a3, a4, a5 to_broadcast_sr = sr_none, sr1, sr2 to_broadcast_df = df_none, df1, df2, df3, df4 broadcasted_arrs = list(np.broadcast_arrays( *to_broadcast_a, *[x.to_frame() for x in to_broadcast_sr], # here is the difference *to_broadcast_df )) broadcasted = reshape_fns.broadcast( *to_broadcast_a, *to_broadcast_sr, *to_broadcast_df, index_from=-1, columns_from=-1, drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_frame_equal( broadcasted[i], pd.DataFrame( broadcasted_arrs[i], index=df4.index, columns=df4.columns ) ) def test_broadcast_strict(self): # 1d to_broadcast = sr1, sr2 with pytest.raises(Exception): _ = reshape_fns.broadcast( *to_broadcast, index_from='strict', # changing index not allowed columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) # 2d to_broadcast = df1, df2 with pytest.raises(Exception): _ = reshape_fns.broadcast( *to_broadcast, index_from='stack', columns_from='strict', # changing columns not allowed drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) def test_broadcast_dirty(self): # 1d to_broadcast = sr2, 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, index_from='stack', columns_from='stack', drop_duplicates=False, drop_redundant=False, ignore_sr_names=False ) for i in range(len(broadcasted)): pd.testing.assert_series_equal( broadcasted[i], pd.Series( broadcasted_arrs[i], index=pd.MultiIndex.from_tuples([ ('x2', 'x1', 'x2'), ('y2', 'x1', 'y2'), ('z2', 'x1', 'z2') ], names=['i2', 'i1', 'i2']), name=('a2', 'a1', 'a2') ) ) def test_broadcast_to_shape(self): to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = [ np.broadcast_to(x.to_frame() if isinstance(x, pd.Series) else x, (3, 3)) for x in to_broadcast ] broadcasted = reshape_fns.broadcast( *to_broadcast, to_shape=(3, 3), index_from='stack', columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): pd.testing.assert_frame_equal( broadcasted[i], pd.DataFrame( broadcasted_arrs[i], index=pd.MultiIndex.from_tuples([ ('x1', 'x2'), ('x1', 'y2'), ('x1', 'z2') ], names=['i1', 'i2']), columns=None ) ) @pytest.mark.parametrize( "test_to_pd", [False, [False, False, False, False, False, False]], ) def test_broadcast_to_pd(self, test_to_pd): to_broadcast = 0, a1, a2, sr_none, sr1, sr2 broadcasted_arrs = list(np.broadcast_arrays(*to_broadcast)) broadcasted = reshape_fns.broadcast( *to_broadcast, to_pd=test_to_pd, # to NumPy index_from='stack', columns_from='stack', drop_duplicates=True, drop_redundant=True, ignore_sr_names=True ) for i in range(len(broadcasted)): np.testing.assert_array_equal( broadcasted[i], broadcasted_arrs[i] ) def test_broadcast_require_kwargs(self): a, b = reshape_fns.broadcast(np.empty((1,)), np.empty((1,))) # readonly assert not a.flags.writeable assert not b.flags.writeable a, b = reshape_fns.broadcast( np.empty((1,)), np.empty((1,)), require_kwargs=[{'requirements': 'W'}, {}]) # writeable assert a.flags.writeable assert not b.flags.writeable a, b = reshape_fns.broadcast( np.empty((1,)), np.empty((1,)), require_kwargs=[{'requirements': ('W', 'C')}, {}]) # writeable, C order assert a.flags.writeable # writeable since it was copied to make C order assert not b.flags.writeable assert not np.isfortran(a) assert not np.isfortran(b) def test_broadcast_meta(self): _0, _a2, _sr2, _df2 = reshape_fns.broadcast(0, a2, sr2, df2, keep_raw=True) assert _0 == 0 np.testing.assert_array_equal(_a2, a2) np.testing.assert_array_equal(_sr2, sr2.values[:, None]) np.testing.assert_array_equal(_df2, df2.values) _0, _a2, _sr2, _df2 = reshape_fns.broadcast(0, a2, sr2, df2, keep_raw=[False, True, True, True]) test_shape = (3, 3) test_index = pd.MultiIndex.from_tuples([ ('x2', 'x4'), ('y2', 'y4'), ('z2', 'z4') ], names=['i2', 'i4']) test_columns = pd.Index(['a4', 'a4', 'a4'], name='c4', dtype='object') pd.testing.assert_frame_equal( _0, pd.DataFrame( np.zeros(test_shape, dtype=int), index=test_index, columns=test_columns ) ) np.testing.assert_array_equal(_a2, a2) np.testing.assert_array_equal(_sr2, sr2.values[:, None]) np.testing.assert_array_equal(_df2, df2.values) _, new_shape, new_index, new_columns = reshape_fns.broadcast(0, a2, sr2, df2, return_meta=True) assert new_shape == test_shape pd.testing.assert_index_equal(new_index, test_index) pd.testing.assert_index_equal(new_columns, test_columns) def test_broadcast_align(self): index1 = pd.Index(['a', 'b', 'c']) index2 = pd.MultiIndex.from_tuples([ (0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c') ]) index3 = pd.MultiIndex.from_tuples([ (2, 0, 'a'), (2, 0, 'b'), (2, 0, 'c'), (2, 1, 'a'), (2, 1, 'b'), (2, 1, 'c'), (3, 0, 'a'), (3, 0, 'b'), (3, 0, 'c'), (3, 1, 'a'), (3, 1, 'b'), (3, 1, 'c') ]) sr1 = pd.Series(np.arange(len(index1)), index=index1) df2 = pd.DataFrame( np.reshape(np.arange(len(index2) * len(index2)), (len(index2), len(index2))), index=index2, columns=index2 ) df3 = pd.DataFrame( np.reshape(np.arange(len(index3) * len(index3)), (len(index3), len(index3))), index=index3, columns=index3 ) _df1, _df2, _df3 = reshape_fns.broadcast(sr1, df2, df3, align_index=True, align_columns=True) pd.testing.assert_frame_equal( _df1, pd.DataFrame(np.array([ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] ]), index=index3, columns=index3) ) pd.testing.assert_frame_equal( _df2, pd.DataFrame(np.array([ [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35, 30, 31, 32, 33, 34, 35], [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17, 12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23, 18, 19, 20, 21, 22, 23], [24, 25, 26, 27, 28, 29, 24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35, 30, 31, 32, 33, 34, 35] ]), index=index3, columns=index3) ) pd.testing.assert_frame_equal(_df3, df3) def test_broadcast_to(self): np.testing.assert_array_equal(reshape_fns.broadcast_to(0, a5), np.broadcast_to(0, a5.shape)) pd.testing.assert_series_equal( reshape_fns.broadcast_to(0, sr2), pd.Series(np.broadcast_to(0, sr2.shape), index=sr2.index, name=sr2.name) ) pd.testing.assert_frame_equal( reshape_fns.broadcast_to(0, df5), pd.DataFrame(np.broadcast_to(0, df5.shape), index=df5.index, columns=df5.columns) ) pd.testing.assert_frame_equal( reshape_fns.broadcast_to(sr2, df5), pd.DataFrame(np.broadcast_to(sr2.to_frame(), df5.shape), index=df5.index, columns=df5.columns) ) pd.testing.assert_frame_equal( reshape_fns.broadcast_to(sr2, df5, index_from=0, columns_from=0), pd.DataFrame( np.broadcast_to(sr2.to_frame(), df5.shape), index=sr2.index, columns=pd.Index(['a2', 'a2', 'a2'], dtype='object')) ) @pytest.mark.parametrize( "test_input", [0, a2, a5, sr2, df5, np.zeros((2, 2, 2))], ) def test_broadcast_to_array_of(self, test_input): # broadcasting first element to be an array out of the second argument np.testing.assert_array_equal( reshape_fns.broadcast_to_array_of(0.1, test_input), np.full((1, *np.asarray(test_input).shape), 0.1) ) np.testing.assert_array_equal( reshape_fns.broadcast_to_array_of([0.1], test_input), np.full((1, *np.asarray(test_input).shape), 0.1) ) np.testing.assert_array_equal( reshape_fns.broadcast_to_array_of([0.1, 0.2], test_input), np.concatenate(( np.full((1, *np.asarray(test_input).shape), 0.1), np.full((1, *np.asarray(test_input).shape), 0.2) )) ) np.testing.assert_array_equal( reshape_fns.broadcast_to_array_of(np.expand_dims(np.asarray(test_input), 0), test_input), # do nothing np.expand_dims(np.asarray(test_input), 0) ) def test_broadcast_to_axis_of(self): np.testing.assert_array_equal( reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 0), np.full(2, 10) ) assert reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 1) == 10 np.testing.assert_array_equal( reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 0), np.full(2, 10) ) np.testing.assert_array_equal( reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 1), np.full(3, 10) ) assert reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 2) == 10 def test_unstack_to_array(self): i = pd.MultiIndex.from_arrays([[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']]) sr = pd.Series([1, 2, 3, 4], index=i) np.testing.assert_array_equal( reshape_fns.unstack_to_array(sr), np.asarray([[ [1., np.nan, np.nan, np.nan], [np.nan, 2., np.nan, np.nan] ], [ [np.nan, np.nan, 3., np.nan], [np.nan, np.nan, np.nan, 4.] ]]) ) np.testing.assert_array_equal( reshape_fns.unstack_to_array(sr, levels=(0,)), np.asarray([2., 4.]) ) np.testing.assert_array_equal( reshape_fns.unstack_to_array(sr, levels=(2, 0)), np.asarray([ [1., np.nan], [2., np.nan], [np.nan, 3.], [np.nan, 4.], ]) ) def test_make_symmetric(self): pd.testing.assert_frame_equal( reshape_fns.make_symmetric(sr2), pd.DataFrame( np.array([ [np.nan, 1.0, 2.0, 3.0], [1.0, np.nan, np.nan, np.nan], [2.0, np.nan, np.nan, np.nan], [3.0, np.nan, np.nan, np.nan] ]), index=pd.Index(['a2', 'x2', 'y2', 'z2'], dtype='object', name=('i2', None)), columns=pd.Index(['a2', 'x2', 'y2', 'z2'], dtype='object', name=('i2', None)) ) ) pd.testing.assert_frame_equal( reshape_fns.make_symmetric(df2), pd.DataFrame( np.array([ [np.nan, 1.0, 2.0, 3.0], [1.0, np.nan, np.nan, np.nan], [2.0, np.nan, np.nan, np.nan], [3.0, np.nan, np.nan, np.nan] ]), index=pd.Index(['a4', 'x4', 'y4', 'z4'], dtype='object', name=('i4', 'c4')), columns=pd.Index(['a4', 'x4', 'y4', 'z4'], dtype='object', name=('i4', 'c4')) ) ) pd.testing.assert_frame_equal( reshape_fns.make_symmetric(df5), pd.DataFrame( np.array([ [np.nan, np.nan, np.nan, 1.0, 4.0, 7.0], [np.nan, np.nan, np.nan, 2.0, 5.0, 8.0], [np.nan, np.nan, np.nan, 3.0, 6.0, 9.0], [1.0, 2.0, 3.0, np.nan, np.nan, np.nan], [4.0, 5.0, 6.0, np.nan, np.nan, np.nan], [7.0, 8.0, 9.0, np.nan, np.nan, np.nan] ]), index=pd.MultiIndex.from_tuples([ ('a7', 'a8'), ('b7', 'b8'), ('c7', 'c8'), ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=[('i7', 'c7'), ('i8', 'c8')]), columns=pd.MultiIndex.from_tuples([ ('a7', 'a8'), ('b7', 'b8'), ('c7', 'c8'), ('x7', 'x8'), ('y7', 'y8'), ('z7', 'z8') ], names=[('i7', 'c7'), ('i8', 'c8')]) ) ) pd.testing.assert_frame_equal( reshape_fns.make_symmetric(pd.Series([1, 2, 3], name='yo'), sort=False), pd.DataFrame( np.array([ [np.nan, np.nan, np.nan, 1.0], [np.nan, np.nan, np.nan, 2.0], [np.nan, np.nan, np.nan, 3.0], [1.0, 2.0, 3.0, np.nan] ]), index=pd.Index([0, 1, 2, 'yo'], dtype='object'), columns=pd.Index([0, 1, 2, 'yo'], dtype='object') ) ) def test_unstack_to_df(self): pd.testing.assert_frame_equal( reshape_fns.unstack_to_df(df5.iloc[0]), pd.DataFrame( np.array([ [1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0] ]), index=pd.Index(['a7', 'b7', 'c7'], dtype='object', name='c7'), columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8') ) ) i = pd.MultiIndex.from_arrays([[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']]) sr = pd.Series([1, 2, 3, 4], index=i) pd.testing.assert_frame_equal( reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1), pd.DataFrame( np.array([ [1.0, 2.0], [3.0, 4.0] ]), index=pd.Index([1, 2], dtype='int64'), columns=pd.Index([3, 4], dtype='int64') ) ) pd.testing.assert_frame_equal( reshape_fns.unstack_to_df(sr, index_levels=(0, 1), column_levels=2), pd.DataFrame( np.array([ [1.0, np.nan, np.nan, np.nan], [np.nan, 2.0, np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan], [np.nan, np.nan, np.nan, 4.0] ]), index=pd.MultiIndex.from_tuples([ (1, 3), (1, 4), (2, 3), (2, 4) ]), columns=pd.Index(['a', 'b', 'c', 'd'], dtype='object') ) ) pd.testing.assert_frame_equal( reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1, symmetric=True), pd.DataFrame( np.array([ [np.nan, np.nan, 1.0, 2.0], [np.nan, np.nan, 3.0, 4.0], [1.0, 3.0, np.nan, np.nan], [2.0, 4.0, np.nan, np.nan] ]), index=pd.Index([1, 2, 3, 4], dtype='int64'), columns=pd.Index([1, 2, 3, 4], dtype='int64') ) ) @pytest.mark.parametrize( "test_inputs", [ (0, a1, a2, sr_none, sr1, sr2), (0, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4) ], ) def test_flex(self, test_inputs): raw_args = reshape_fns.broadcast(*test_inputs, keep_raw=True) bc_args = reshape_fns.broadcast(*test_inputs, keep_raw=False) for r in range(len(test_inputs)): raw_arg = raw_args[r] bc_arg = np.array(bc_args[r]) bc_arg_2d = reshape_fns.to_2d(bc_arg) def_i, def_col = reshape_fns.flex_choose_i_and_col_nb(raw_arg, flex_2d=bc_arg.ndim == 2) for col in range(bc_arg_2d.shape[1]): for i in range(bc_arg_2d.shape[0]): assert bc_arg_2d[i, col] == reshape_fns.flex_select_nb( raw_arg, i, col, def_i, def_col, bc_arg.ndim == 2) # ############# indexing.py ############# # called_dict = {} PandasIndexer = indexing.PandasIndexer ParamIndexer = indexing.build_param_indexer(['param1', 'param2', 'tuple']) class H(PandasIndexer, ParamIndexer): def __init__(self, a, param1_mapper, param2_mapper, tuple_mapper, level_names): self.a = a self._param1_mapper = param1_mapper self._param2_mapper = param2_mapper self._tuple_mapper = tuple_mapper self._level_names = level_names PandasIndexer.__init__(self, calling='PandasIndexer') ParamIndexer.__init__( self, [param1_mapper, param2_mapper, tuple_mapper], level_names=[level_names[0], level_names[1], level_names], calling='ParamIndexer' ) def indexing_func(self, pd_indexing_func, calling=None): # As soon as you call iloc etc., performs it on each dataframe and mapper and returns a new class instance called_dict[calling] = True param1_mapper = indexing.indexing_on_mapper(self._param1_mapper, self.a, pd_indexing_func) param2_mapper = indexing.indexing_on_mapper(self._param2_mapper, self.a, pd_indexing_func) tuple_mapper = indexing.indexing_on_mapper(self._tuple_mapper, self.a, pd_indexing_func) return H(pd_indexing_func(self.a), param1_mapper, param2_mapper, tuple_mapper, self._level_names) @classmethod def run(cls, a, params1, params2, level_names=('p1', 'p2')): a = reshape_fns.to_2d(a) # Build column hierarchy params1_idx = pd.Index(params1, name=level_names[0]) params2_idx = pd.Index(params2, name=level_names[1]) params_idx = index_fns.stack_indexes([params1_idx, params2_idx]) new_columns = index_fns.combine_indexes([params_idx, a.columns]) # Build mappers param1_mapper = np.repeat(params1, len(a.columns)) param1_mapper = pd.Series(param1_mapper, index=new_columns) param2_mapper = np.repeat(params2, len(a.columns)) param2_mapper = pd.Series(param2_mapper, index=new_columns) tuple_mapper = list(zip(*list(map(lambda x: x.values, [param1_mapper, param2_mapper])))) tuple_mapper = pd.Series(tuple_mapper, index=new_columns) # Tile a to match the length of new_columns a = array_wrapper.ArrayWrapper(a.index, new_columns, 2).wrap(reshape_fns.tile(a.values, 4, axis=1)) return cls(a, param1_mapper, param2_mapper, tuple_mapper, level_names) # Similate an indicator with two params h = H.run(df4, [0.1, 0.1, 0.2, 0.2], [0.3, 0.4, 0.5, 0.6]) class TestIndexing: def test_kwargs(self): _ = h[(0.1, 0.3, 'a6')] assert called_dict['PandasIndexer'] _ = h.param1_loc[0.1] assert called_dict['ParamIndexer'] def test_pandas_indexing(self): # __getitem__ pd.testing.assert_series_equal( h[(0.1, 0.3, 'a6')].a, pd.Series( np.array([1, 4, 7]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), name=(0.1, 0.3, 'a6') ) ) # loc pd.testing.assert_frame_equal( h.loc[:, (0.1, 0.3, 'a6'):(0.1, 0.3, 'c6')].a, pd.DataFrame( np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.1, 0.3, 'a6'), (0.1, 0.3, 'b6'), (0.1, 0.3, 'c6') ], names=['p1', 'p2', 'c6']) ) ) # iloc pd.testing.assert_frame_equal( h.iloc[-2:, -2:].a, pd.DataFrame( np.array([ [5, 6], [8, 9] ]), index=pd.Index(['y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.2, 0.6, 'b6'), (0.2, 0.6, 'c6') ], names=['p1', 'p2', 'c6']) ) ) # xs pd.testing.assert_frame_equal( h.xs((0.1, 0.3), level=('p1', 'p2'), axis=1).a, pd.DataFrame( np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6') ) ) def test_param_indexing(self): # param1 pd.testing.assert_frame_equal( h.param1_loc[0.1].a, pd.DataFrame( np.array([ [1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6], [7, 8, 9, 7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.3, 'a6'), (0.3, 'b6'), (0.3, 'c6'), (0.4, 'a6'), (0.4, 'b6'), (0.4, 'c6') ], names=['p2', 'c6']) ) ) # param2 pd.testing.assert_frame_equal( h.param2_loc[0.3].a, pd.DataFrame( np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.1, 'a6'), (0.1, 'b6'), (0.1, 'c6') ], names=['p1', 'c6']) ) ) # tuple pd.testing.assert_frame_equal( h.tuple_loc[(0.1, 0.3)].a, pd.DataFrame( np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6') ) ) pd.testing.assert_frame_equal( h.tuple_loc[(0.1, 0.3):(0.1, 0.3)].a, pd.DataFrame( np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.1, 0.3, 'a6'), (0.1, 0.3, 'b6'), (0.1, 0.3, 'c6') ], names=['p1', 'p2', 'c6']) ) ) pd.testing.assert_frame_equal( h.tuple_loc[[(0.1, 0.3), (0.1, 0.3)]].a, pd.DataFrame( np.array([ [1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6], [7, 8, 9, 7, 8, 9] ]), index=pd.Index(['x6', 'y6', 'z6'], dtype='object', name='i6'), columns=pd.MultiIndex.from_tuples([ (0.1, 0.3, 'a6'), (0.1, 0.3, 'b6'), (0.1, 0.3, 'c6'), (0.1, 0.3, 'a6'), (0.1, 0.3, 'b6'), (0.1, 0.3, 'c6') ], names=['p1', 'p2', 'c6']) ) ) # ############# combine_fns.py ############# # class TestCombineFns: def test_apply_and_concat_one(self): def apply_func(i, x, a): return x + a[i] @njit def apply_func_nb(i, x, a): return x + a[i] # 1d target = np.array([ [11, 21, 31], [12, 22, 32], [13, 23, 33] ]) np.testing.assert_array_equal( combine_fns.apply_and_concat_one(3, apply_func, sr2.values, [10, 20, 30]), target ) np.testing.assert_array_equal( combine_fns.apply_and_concat_one_nb(3, apply_func_nb, sr2.values, (10, 20, 30)), target ) # 2d target2 = np.array([ [11, 12, 13, 21, 22, 23, 31, 32, 33], [14, 15, 16, 24, 25, 26, 34, 35, 36], [17, 18, 19, 27, 28, 29, 37, 38, 39] ]) np.testing.assert_array_equal( combine_fns.apply_and_concat_one(3, apply_func, df4.values, [10, 20, 30]), target2 ) np.testing.assert_array_equal( combine_fns.apply_and_concat_one_nb(3, apply_func_nb, df4.values, (10, 20, 30)), target2 ) def test_apply_and_concat_multiple(self): def apply_func(i, x, a): return (x, x + a[i]) @njit def apply_func_nb(i, x, a): return (x, x + a[i]) # 1d target_a = np.array([ [1, 1, 1], [2, 2, 2], [3, 3, 3] ]) target_b = np.array([ [11, 21, 31], [12, 22, 32], [13, 23, 33] ]) a, b = combine_fns.apply_and_concat_multiple(3, apply_func, sr2.values, [10, 20, 30]) np.testing.assert_array_equal(a, target_a) np.testing.assert_array_equal(b, target_b) a, b = combine_fns.apply_and_concat_multiple_nb(3, apply_func_nb, sr2.values, (10, 20, 30)) np.testing.assert_array_equal(a, target_a) np.testing.assert_array_equal(b, target_b) # 2d target_a = np.array([ [1, 2, 3, 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6, 4, 5, 6], [7, 8, 9, 7, 8, 9, 7, 8, 9] ]) target_b = np.array([ [11, 12, 13, 21, 22, 23, 31, 32, 33], [14, 15, 16, 24, 25, 26, 34, 35, 36], [17, 18, 19, 27, 28, 29, 37, 38, 39] ]) a, b = combine_fns.apply_and_concat_multiple(3, apply_func, df4.values, [10, 20, 30]) np.testing.assert_array_equal(a, target_a) np.testing.assert_array_equal(b, target_b) a, b = combine_fns.apply_and_concat_multiple_nb(3, apply_func_nb, df4.values, (10, 20, 30)) np.testing.assert_array_equal(a, target_a) np.testing.assert_array_equal(b, target_b) def test_combine_and_concat(self): def combine_func(x, y, a): return x + y + a @njit def combine_func_nb(x, y, a): return x + y + a # 1d target = np.array([ [103, 104], [106, 108], [109, 112] ]) np.testing.assert_array_equal( combine_fns.combine_and_concat( sr2.values, (sr2.values * 2, sr2.values * 3), combine_func, 100), target ) np.testing.assert_array_equal( combine_fns.combine_and_concat_nb( sr2.values, (sr2.values * 2, sr2.values * 3), combine_func_nb, 100), target ) # 2d target2 = np.array([ [103, 106, 109, 104, 108, 112], [112, 115, 118, 116, 120, 124], [121, 124, 127, 128, 132, 136] ]) np.testing.assert_array_equal( combine_fns.combine_and_concat( df4.values, (df4.values * 2, df4.values * 3), combine_func, 100), target2 ) np.testing.assert_array_equal( combine_fns.combine_and_concat_nb( df4.values, (df4.values * 2, df4.values * 3), combine_func_nb, 100), target2 ) def test_combine_multiple(self): def combine_func(x, y, a): return x + y + a @njit def combine_func_nb(x, y, a): return x + y + a # 1d target = np.array([206, 212, 218]) np.testing.assert_array_equal( combine_fns.combine_multiple( (sr2.values, sr2.values * 2, sr2.values * 3), combine_func, 100), target ) np.testing.assert_array_equal( combine_fns.combine_multiple_nb( (sr2.values, sr2.values * 2, sr2.values * 3), combine_func_nb, 100), target ) # 2d target2 = np.array([ [206, 212, 218], [224, 230, 236], [242, 248, 254] ]) np.testing.assert_array_equal( combine_fns.combine_multiple( (df4.values, df4.values * 2, df4.values * 3), combine_func, 100), target2 ) np.testing.assert_array_equal( combine_fns.combine_multiple_nb( (df4.values, df4.values * 2, df4.values * 3), combine_func_nb, 100), target2 ) # ############# accessors.py ############# # class TestAccessors: def test_indexing(self): pd.testing.assert_series_equal(df4.vbt['a6'].obj, df4['a6'].vbt.obj) def test_freq(self): ts = pd.Series([1, 2, 3], index=pd.DatetimeIndex([ datetime(2018, 1, 1), datetime(2018, 1, 2), datetime(2018, 1, 3) ])) assert ts.vbt.wrapper.freq == day_dt assert ts.vbt(freq='2D').wrapper.freq == day_dt * 2 assert pd.Series([1, 2, 3]).vbt.wrapper.freq is None assert pd.Series([1, 2, 3]).vbt(freq='3D').wrapper.freq == day_dt * 3 assert pd.Series([1, 2, 3]).vbt(freq=np.timedelta64(4, 'D')).wrapper.freq == day_dt * 4 def test_props(self): assert sr1.vbt.is_series() assert not sr1.vbt.is_frame() assert not df1.vbt.is_series() assert df2.vbt.is_frame() def test_wrapper(self): pd.testing.assert_index_equal(sr2.vbt.wrapper.index, sr2.index) pd.testing.assert_index_equal(sr2.vbt.wrapper.columns, sr2.to_frame().columns) assert sr2.vbt.wrapper.ndim == sr2.ndim assert sr2.vbt.wrapper.name == sr2.name assert pd.Series([1, 2, 3]).vbt.wrapper.name is None assert sr2.vbt.wrapper.shape == sr2.shape assert sr2.vbt.wrapper.shape_2d == (sr2.shape[0], 1) pd.testing.assert_index_equal(df4.vbt.wrapper.index, df4.index) pd.testing.assert_index_equal(df4.vbt.wrapper.columns, df4.columns) assert df4.vbt.wrapper.ndim == df4.ndim assert df4.vbt.wrapper.name is None assert df4.vbt.wrapper.shape == df4.shape assert df4.vbt.wrapper.shape_2d == df4.shape pd.testing.assert_series_equal(sr2.vbt.wrapper.wrap(a2), sr2) pd.testing.assert_series_equal(sr2.vbt.wrapper.wrap(df2), sr2) pd.testing.assert_series_equal( sr2.vbt.wrapper.wrap(df2.values, index=df2.index, columns=df2.columns), pd.Series(df2.values[:, 0], index=df2.index, name=df2.columns[0]) ) pd.testing.assert_frame_equal( sr2.vbt.wrapper.wrap(df4.values, columns=df4.columns), pd.DataFrame(df4.values, index=sr2.index, columns=df4.columns) ) pd.testing.assert_frame_equal(df2.vbt.wrapper.wrap(a2), df2) pd.testing.assert_frame_equal(df2.vbt.wrapper.wrap(sr2), df2) pd.testing.assert_frame_equal( df2.vbt.wrapper.wrap(df4.values, columns=df4.columns), pd.DataFrame(df4.values, index=df2.index, columns=df4.columns) ) def test_empty(self): pd.testing.assert_series_equal( pd.Series.vbt.empty(5, index=np.arange(10, 15), name='a', fill_value=5), pd.Series(np.full(5, 5), index=np.arange(10, 15), name='a') ) pd.testing.assert_frame_equal( pd.DataFrame.vbt.empty((5, 3), index=np.arange(10, 15), columns=['a', 'b', 'c'], fill_value=5), pd.DataFrame(np.full((5, 3), 5), index=np.arange(10, 15), columns=['a', 'b', 'c']) ) pd.testing.assert_series_equal( pd.Series.vbt.empty_like(sr2, fill_value=5), pd.Series(np.full(sr2.shape, 5), index=sr2.index, name=sr2.name) ) pd.testing.assert_frame_equal( pd.DataFrame.vbt.empty_like(df4, fill_value=5), pd.DataFrame(np.full(df4.shape, 5), index=df4.index, columns=df4.columns) ) def test_apply_func_on_index(self): pd.testing.assert_frame_equal( df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0), pd.DataFrame( np.asarray([1]), index=pd.Index(['x3_yo'], dtype='object', name='i3'), columns=pd.Index(['a3'], dtype='object', name='c3') ) ) pd.testing.assert_frame_equal( df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1), pd.DataFrame( np.asarray([1]), index=pd.Index(['x3'], dtype='object', name='i3'), columns=pd.Index(['a3_yo'], dtype='object', name='c3') ) ) df1_copy = df1.copy() df1_copy.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0, inplace=True) pd.testing.assert_frame_equal( df1_copy, pd.DataFrame( np.asarray([1]), index=pd.Index(['x3_yo'], dtype='object', name='i3'), columns=pd.Index(['a3'], dtype='object', name='c3') ) ) df1_copy2 = df1.copy() df1_copy2.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1, inplace=True) pd.testing.assert_frame_equal( df1_copy2, pd.DataFrame( np.asarray([1]), index=pd.Index(['x3'], dtype='object', name='i3'), columns=pd.Index(['a3_yo'], dtype='object', name='c3') ) ) def test_stack_index(self): pd.testing.assert_frame_equal( df5.vbt.stack_index([1, 2, 3], on_top=True), pd.DataFrame( df5.values, index=df5.index, columns=pd.MultiIndex.from_tuples([ (1, 'a7', 'a8'), (2, 'b7', 'b8'), (3, 'c7', 'c8') ], names=[None, 'c7', 'c8']) ) ) pd.testing.assert_frame_equal( df5.vbt.stack_index([1, 2, 3], on_top=False), pd.DataFrame( df5.values, index=df5.index, columns=pd.MultiIndex.from_tuples([ ('a7', 'a8', 1), ('b7', 'b8', 2), ('c7', 'c8', 3) ], names=['c7', 'c8', None]) ) ) def test_drop_levels(self): pd.testing.assert_frame_equal( df5.vbt.drop_levels('c7'), pd.DataFrame( df5.values, index=df5.index, columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8') ) ) def test_rename_levels(self): pd.testing.assert_frame_equal( df5.vbt.rename_levels({'c8': 'c9'}), pd.DataFrame( df5.values, index=df5.index, columns=pd.MultiIndex.from_tuples([ ('a7', 'a8'), ('b7', 'b8'), ('c7', 'c8') ], names=['c7', 'c9']) ) ) def test_select_levels(self): pd.testing.assert_frame_equal( df5.vbt.select_levels('c8'), pd.DataFrame( df5.values, index=df5.index, columns=pd.Index(['a8', 'b8', 'c8'], dtype='object', name='c8') ) ) def test_drop_redundant_levels(self): pd.testing.assert_frame_equal( df5.vbt.stack_index(pd.RangeIndex(start=0, step=1, stop=3)).vbt.drop_redundant_levels(), df5 ) def test_drop_duplicate_levels(self): pd.testing.assert_frame_equal( df5.vbt.stack_index(df5.columns.get_level_values(0)).vbt.drop_duplicate_levels(), df5 ) def test_to_array(self): np.testing.assert_array_equal(sr2.vbt.to_1d_array(), sr2.values) np.testing.assert_array_equal(sr2.vbt.to_2d_array(), sr2.to_frame().values) np.testing.assert_array_equal(df2.vbt.to_1d_array(), df2.iloc[:, 0].values) np.testing.assert_array_equal(df2.vbt.to_2d_array(), df2.values) def test_tile(self): pd.testing.assert_frame_equal( df4.vbt.tile(2, keys=['a', 'b'], axis=0), pd.DataFrame( np.asarray([ [1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 2, 3], [4, 5, 6], [7, 8, 9] ]), index=pd.MultiIndex.from_tuples([ ('a', 'x6'), ('a', 'y6'), ('a', 'z6'), ('b', 'x6'), ('b', 'y6'), ('b', 'z6') ], names=[None, 'i6']), columns=df4.columns ) ) pd.testing.assert_frame_equal( df4.vbt.tile(2, keys=['a', 'b'], axis=1), pd.DataFrame( np.asarray([ [1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6], [7, 8, 9, 7, 8, 9] ]), index=df4.index, columns=pd.MultiIndex.from_tuples([ ('a', 'a6'), ('a', 'b6'), ('a', 'c6'), ('b', 'a6'), ('b', 'b6'), ('b', 'c6') ], names=[None, 'c6']) ) ) def test_repeat(self): pd.testing.assert_frame_equal( df4.vbt.repeat(2, keys=['a', 'b'], axis=0), pd.DataFrame( np.asarray([ [1, 2, 3], [1, 2, 3], [4, 5, 6], [4, 5, 6], [7, 8, 9], [7, 8, 9] ]), index=pd.MultiIndex.from_tuples([ ('x6', 'a'), ('x6', 'b'), ('y6', 'a'), ('y6', 'b'), ('z6', 'a'), ('z6', 'b') ], names=['i6', None]), columns=df4.columns ) ) pd.testing.assert_frame_equal( df4.vbt.repeat(2, keys=['a', 'b'], axis=1), pd.DataFrame( np.asarray([ [1, 1, 2, 2, 3, 3], [4, 4, 5, 5, 6, 6], [7, 7, 8, 8, 9, 9] ]), index=df4.index, columns=pd.MultiIndex.from_tuples([ ('a6', 'a'), ('a6', 'b'), ('b6', 'a'), ('b6', 'b'), ('c6', 'a'), ('c6', 'b') ], names=['c6', None]) ) ) def test_align_to(self): multi_c1 = pd.MultiIndex.from_arrays([['a8', 'b8']], names=['c8']) multi_c2 = pd.MultiIndex.from_arrays([['a7', 'a7', 'c7', 'c7'], ['a8', 'b8', 'a8', 'b8']], names=['c7', 'c8']) df10 = pd.DataFrame([[1, 2], [4, 5], [7, 8]], columns=multi_c1) df20 = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=multi_c2) pd.testing.assert_frame_equal( df10.vbt.align_to(df20), pd.DataFrame( np.asarray([ [1, 2, 1, 2], [4, 5, 4, 5], [7, 8, 7, 8] ]), index=pd.RangeIndex(start=0, stop=3, step=1), columns=multi_c2 ) ) def test_broadcast(self): a, b = pd.Series.vbt.broadcast(sr2, 10) b_target = pd.Series(np.full(sr2.shape, 10), index=sr2.index, name=sr2.name) pd.testing.assert_series_equal(a, sr2) pd.testing.assert_series_equal(b, b_target) a, b = sr2.vbt.broadcast(10) pd.testing.assert_series_equal(a, sr2) pd.testing.assert_series_equal(b, b_target) def test_broadcast_to(self): pd.testing.assert_frame_equal(sr2.vbt.broadcast_to(df2), df2) pd.testing.assert_frame_equal(sr2.vbt.broadcast_to(df2.vbt), df2) def test_apply(self): pd.testing.assert_series_equal(sr2.vbt.apply(apply_func=lambda x: x ** 2), sr2 ** 2) pd.testing.assert_series_equal(sr2.vbt.apply(apply_func=lambda x: x ** 2, to_2d=True), sr2 ** 2) pd.testing.assert_frame_equal(df4.vbt.apply(apply_func=lambda x: x ** 2), df4 ** 2) def test_concat(self): pd.testing.assert_frame_equal( pd.DataFrame.vbt.concat(pd.Series([1, 2, 3]), pd.Series([1, 2, 3])), pd.DataFrame({0: pd.Series([1, 2, 3]), 1: pd.Series([1, 2, 3])}) ) target = pd.DataFrame( np.array([ [1, 1, 1, 10, 10, 10, 1, 2, 3], [2, 2, 2, 10, 10, 10, 4, 5, 6], [3, 3, 3, 10, 10, 10, 7, 8, 9] ]), index=pd.MultiIndex.from_tuples([ ('x2', 'x6'), ('y2', 'y6'), ('z2', 'z6') ], names=['i2', 'i6']), columns=pd.MultiIndex.from_tuples([ ('a', 'a6'), ('a', 'b6'), ('a', 'c6'), ('b', 'a6'), ('b', 'b6'), ('b', 'c6'), ('c', 'a6'), ('c', 'b6'), ('c', 'c6') ], names=[None, 'c6']) ) pd.testing.assert_frame_equal( pd.DataFrame.vbt.concat(sr2, 10, df4, keys=['a', 'b', 'c']), target ) pd.testing.assert_frame_equal( sr2.vbt.concat(10, df4, keys=['a', 'b', 'c']), target ) def test_apply_and_concat(self): def apply_func(i, x, y, c, d=1): return x + y[i] + c + d @njit def apply_func_nb(i, x, y, c, d): return x + y[i] + c + d target = pd.DataFrame( np.array([ [112, 113, 114], [113, 114, 115], [114, 115, 116] ]), index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'), columns=pd.Index(['a', 'b', 'c'], dtype='object') ) pd.testing.assert_frame_equal( sr2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100, keys=['a', 'b', 'c'] ), target ) pd.testing.assert_frame_equal( sr2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True, keys=['a', 'b', 'c'] ), target ) if ray_available: with pytest.raises(Exception): sr2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True, use_ray=True, keys=['a', 'b', 'c'] ) pd.testing.assert_frame_equal( sr2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100, keys=['a', 'b', 'c'], use_ray=True ), target ) pd.testing.assert_frame_equal( sr2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100 ), pd.DataFrame( target.values, index=target.index, columns=pd.Index([0, 1, 2], dtype='int64', name='apply_idx') ) ) def apply_func2(i, x, y, c, d=1): return x + y + c + d pd.testing.assert_frame_equal( sr2.vbt.apply_and_concat( 3, np.array([[1], [2], [3]]), 10, apply_func=apply_func2, d=100, keys=['a', 'b', 'c'], to_2d=True # otherwise (3, 1) + (1, 3) = (3, 3) != (3, 1) -> error ), pd.DataFrame( np.array([ [112, 112, 112], [114, 114, 114], [116, 116, 116] ]), index=target.index, columns=target.columns ) ) target2 = pd.DataFrame( np.array([ [112, 113, 114], [113, 114, 115], [114, 115, 116] ]), index=pd.Index(['x4', 'y4', 'z4'], dtype='object', name='i4'), columns=pd.MultiIndex.from_tuples([ ('a', 'a4'), ('b', 'a4'), ('c', 'a4') ], names=[None, 'c4']) ) pd.testing.assert_frame_equal( df2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100, keys=['a', 'b', 'c'] ), target2 ) pd.testing.assert_frame_equal( df2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, 100, apply_func=apply_func_nb, numba_loop=True, keys=['a', 'b', 'c'] ), target2 ) if ray_available: pd.testing.assert_frame_equal( df2.vbt.apply_and_concat( 3, np.array([1, 2, 3]), 10, apply_func=apply_func, d=100, keys=['a', 'b', 'c'], use_ray=True ), target2 ) def test_combine(self): def combine_func(x, y, a, b=1): return x + y + a + b @njit def combine_func_nb(x, y, a, b): return x + y + a + b pd.testing.assert_series_equal( sr2.vbt.combine(10, 100, b=1000, combine_func=combine_func), pd.Series( np.array([1111, 1112, 1113]), index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'), name=sr2.name ) ) pd.testing.assert_series_equal( sr2.vbt.combine(10, 100, 1000, combine_func=combine_func_nb), pd.Series( np.array([1111, 1112, 1113]), index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'), name=sr2.name ) ) @njit def combine_func2_nb(x, y): return x + y + np.array([[1], [2], [3]]) pd.testing.assert_series_equal( sr2.vbt.combine(10, combine_func=combine_func2_nb, to_2d=True), pd.Series( np.array([12, 14, 16]), index=pd.Index(['x2', 'y2', 'z2'], dtype='object', name='i2'), name='a2' ) ) @njit def combine_func3_nb(x, y): return x + y pd.testing.assert_frame_equal( df4.vbt.combine(sr2, combine_func=combine_func3_nb), pd.DataFrame( np.array([ [2, 3, 4], [6, 7, 8], [10, 11, 12] ]), index=pd.MultiIndex.from_tuples([ ('x6', 'x2'), ('y6', 'y2'), ('z6', 'z2') ], names=['i6', 'i2']), columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6') ) ) target = pd.DataFrame( np.array([ [232, 233, 234], [236, 237, 238], [240, 241, 242] ]), index=pd.MultiIndex.from_tuples([ ('x2', 'x6'), ('y2', 'y6'), ('z2', 'z6') ], names=['i2', 'i6']), columns=pd.Index(['a6', 'b6', 'c6'], dtype='object', name='c6') ) pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, b=100, combine_func=combine_func ), target ) pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, 100, combine_func=combine_func_nb, numba_loop=True ), target ) if ray_available: with pytest.raises(Exception): sr2.vbt.combine( [10, df4], 10, 100, combine_func=combine_func_nb, numba_loop=True, use_ray=True ) pd.testing.assert_frame_equal( df4.vbt.combine( [10, sr2], 10, b=100, combine_func=combine_func ), pd.DataFrame( target.values, index=pd.MultiIndex.from_tuples([ ('x6', 'x2'), ('y6', 'y2'), ('z6', 'z2') ], names=['i6', 'i2']), columns=target.columns ) ) target2 = pd.DataFrame( np.array([ [121, 121, 121, 112, 113, 114], [122, 122, 122, 116, 117, 118], [123, 123, 123, 120, 121, 122] ]), index=pd.MultiIndex.from_tuples([ ('x2', 'x6'), ('y2', 'y6'), ('z2', 'z6') ], names=['i2', 'i6']), columns=pd.MultiIndex.from_tuples([ (0, 'a6'), (0, 'b6'), (0, 'c6'), (1, 'a6'), (1, 'b6'), (1, 'c6') ], names=['combine_idx', 'c6']) ) pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, b=100, combine_func=combine_func, concat=True ), target2 ) pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, 100, combine_func=combine_func_nb, numba_loop=True, concat=True ), target2 ) if ray_available: pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, b=100, combine_func=combine_func, concat=True, use_ray=True ), target2 ) pd.testing.assert_frame_equal( sr2.vbt.combine( [10, df4], 10, b=100, combine_func=lambda x, y, a, b=1: x + y + a + b, concat=True, keys=['a', 'b'] ), pd.DataFrame( target2.values, index=target2.index, columns=pd.MultiIndex.from_tuples([ ('a', 'a6'), ('a', 'b6'), ('a', 'c6'), ('b', 'a6'), ('b', 'b6'), ('b', 'c6') ], names=[None, 'c6']) ) )