{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# base" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import vectorbt as vbt\n", "\n", "from vectorbt.base import column_grouper, array_wrapper, combine_fns, index_fns, indexing, reshape_fns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "Collapsed": "false" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from datetime import datetime\n", "from numba import njit\n", "import itertools" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", " 0\n", "0 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 4 5 6\n", "z7 z8 7 8 9\n" ] } ], "source": [ "v1 = 0\n", "a1 = np.array([1])\n", "a2 = np.array([1, 2, 3])\n", "a3 = np.array([[1, 2, 3]])\n", "a4 = np.array([[1], [2], [3]])\n", "a5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n", "sr_none = pd.Series([1])\n", "print(sr_none)\n", "sr1 = pd.Series([1], index=pd.Index(['x1'], name='i1'), name='a1')\n", "print(sr1)\n", "sr2 = pd.Series([1, 2, 3], index=pd.Index(['x2', 'y2', 'z2'], name='i2'), name='a2')\n", "print(sr2)\n", "df_none = pd.DataFrame([[1]])\n", "print(df_none)\n", "df1 = pd.DataFrame(\n", " [[1]], \n", " index=pd.Index(['x3'], name='i3'), \n", " columns=pd.Index(['a3'], name='c3'))\n", "print(df1)\n", "df2 = pd.DataFrame(\n", " [[1], [2], [3]], \n", " index=pd.Index(['x4', 'y4', 'z4'], name='i4'), \n", " columns=pd.Index(['a4'], name='c4'))\n", "print(df2)\n", "df3 = pd.DataFrame(\n", " [[1, 2, 3]], \n", " index=pd.Index(['x5'], name='i5'), \n", " columns=pd.Index(['a5', 'b5', 'c5'], name='c5'))\n", "print(df3)\n", "df4 = pd.DataFrame(\n", " [[1, 2, 3], [4, 5, 6], [7, 8, 9]], \n", " index=pd.Index(['x6', 'y6', 'z6'], name='i6'), \n", " columns=pd.Index(['a6', 'b6', 'c6'], name='c6'))\n", "print(df4)\n", "\n", "multi_i = pd.MultiIndex.from_arrays([['x7', 'y7', 'z7'], ['x8', 'y8', 'z8']], names=['i7', 'i8']) \n", "multi_c = pd.MultiIndex.from_arrays([['a7', 'b7', 'c7'], ['a8', 'b8', 'c8']], names=['c7', 'c8'])\n", "df5 = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=multi_i, columns=multi_c)\n", "print(df5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## column_grouper" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "some_columns = pd.MultiIndex.from_arrays([\n", " [1, 1, 1, 1, 0, 0, 0, 0],\n", " [3, 3, 2, 2, 1, 1, 0, 0],\n", " [7, 6, 5, 4, 3, 2, 1, 0]\n", "], names=['first', 'second', 'third'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')\n", "Index([1, 1, 1, 1, 0, 0, 0, 0], dtype='int64', name='first')\n", "MultiIndex([(1, 3),\n", " (1, 3),\n", " (1, 2),\n", " (1, 2),\n", " (0, 1),\n", " (0, 1),\n", " (0, 0),\n", " (0, 0)],\n", " names=['first', 'second'])\n", "MultiIndex([(1, 3),\n", " (1, 3),\n", " (1, 2),\n", " (1, 2),\n", " (0, 1),\n", " (0, 1),\n", " (0, 0),\n", " (0, 0)],\n", " names=['first', 'second'])\n", "Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64')\n", "Index([3, 2, 1, 1, 1, 0, 0, 0], dtype='int64', name='fourth')\n" ] } ], "source": [ "print(column_grouper.group_by_to_index(some_columns, group_by=0))\n", "print(column_grouper.group_by_to_index(some_columns, group_by='first'))\n", "print(column_grouper.group_by_to_index(some_columns, group_by=[0, 1]))\n", "print(column_grouper.group_by_to_index(some_columns, group_by=['first', 'second']))\n", "print(column_grouper.group_by_to_index(some_columns, group_by=np.array([3, 2, 1, 1, 1, 0, 0, 0])))\n", "print(column_grouper.group_by_to_index(some_columns, group_by=pd.Index([3, 2, 1, 1, 1, 0, 0, 0], name='fourth')))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 0, 0, 0, 1, 1, 1, 1]), Index([1, 0], dtype='int64', name='first'))\n", "(array([0, 0, 1, 1, 2, 2, 3, 3]), MultiIndex([(1, 3),\n", " (1, 2),\n", " (0, 1),\n", " (0, 0)],\n", " names=['first', 'second']))\n", "(array([0, 1, 2, 2, 2, 3, 3, 3]), Index([3, 2, 1, 0], dtype='int64'))\n" ] } ], "source": [ "# group_arr comes always from 0 to n, also keeps order\n", "print(column_grouper.get_groups_and_index(some_columns, 0))\n", "print(column_grouper.get_groups_and_index(some_columns, [0, 1]))\n", "print(column_grouper.get_groups_and_index(some_columns, np.array([3, 2, 1, 1, 1, 0, 0, 0])))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4 4]\n", "[1 1]\n", "[2]\n", "[1]\n", "[]\n" ] } ], "source": [ "print(column_grouper.get_group_lens_nb(np.array([0, 0, 0, 0, 1, 1, 1, 1])))\n", "print(column_grouper.get_group_lens_nb(np.array([0, 1])))\n", "print(column_grouper.get_group_lens_nb(np.array([0, 0])))\n", "print(column_grouper.get_group_lens_nb(np.array([0])))\n", "print(column_grouper.get_group_lens_nb(np.array([])))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([0], dtype='int64')\n", "(array([0]), Index([0], dtype='int64'))\n", "[0]\n", "Index([0], dtype='int64')\n", "[1]\n", "[0]\n", "[1]\n" ] } ], "source": [ "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).group_by)\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_groups_and_columns())\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_groups())\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_columns())\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_group_lens())\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_group_start_idxs())\n", "print(column_grouper.ColumnGrouper(sr2.to_frame().columns, group_by=np.array([0])).get_group_end_idxs())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([0, 0, 1], dtype='int64')\n", "(array([0, 0, 1]), Index([0, 1], dtype='int64'))\n", "[0 0 1]\n", "Index([0, 1], dtype='int64')\n", "[2 1]\n", "[0 2]\n", "[2 3]\n" ] } ], "source": [ "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).group_by)\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_groups_and_columns())\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_groups())\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_columns())\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_group_lens())\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_group_start_idxs())\n", "print(column_grouper.ColumnGrouper(df4.columns, group_by=np.array([0, 0, 1])).get_group_end_idxs())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## array_wrapper" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "sr2_wrapper = array_wrapper.ArrayWrapper.from_obj(sr2)\n", "df4_wrapper = array_wrapper.ArrayWrapper.from_obj(df4)\n", "\n", "sr2_wrapper_co = sr2_wrapper.copy(column_only_select=True)\n", "df4_wrapper_co = df4_wrapper.copy(column_only_select=True)\n", "\n", "sr2_grouped_wrapper = sr2_wrapper.copy(group_by=np.array([0]))\n", "df4_grouped_wrapper = df4_wrapper.copy(group_by=np.array([0, 0, 1]))\n", "\n", "sr2_grouped_wrapper_co = sr2_grouped_wrapper.copy(column_only_select=True)\n", "df4_grouped_wrapper_co = df4_grouped_wrapper.copy(column_only_select=True)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 1]), 0, 0)\n", "(0, array([0, 1]), array([0, 1]))\n", "(array([0, 1]), 0, 0)\n", "(array([0, 1]), array([0]), array([0]))\n", "(array([0, 1]), array([0, 1]), array([0, 1]))\n" ] } ], "source": [ "# test indexing\n", "print(sr2_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:])\n", "print(df4_wrapper.indexing_func_meta(lambda x: x.iloc[0, :2])[1:])\n", "print(df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:])\n", "print(df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [0]])[1:])\n", "print(df4_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 1, 2]), 0, 0)\n", "(array([0, 1, 2]), array([0]), array([0]))\n", "(array([0, 1, 2]), array([0, 1]), array([0, 1]))\n" ] } ], "source": [ "print(df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:])\n", "print(df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[[0]])[1:])\n", "print(df4_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 1]), 0, 0)\n", "(array([0, 1]), 0, array([0, 1]))\n", "(array([0, 1]), 1, 2)\n", "(array([0, 1]), array([1]), array([2]))\n", "(array([0, 1]), array([0, 1]), array([0, 1, 2]))\n" ] } ], "source": [ "print(sr2_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2])[1:])\n", "print(df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 0])[1:])\n", "print(df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, 1])[1:])\n", "print(df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, [1]])[1:])\n", "print(df4_grouped_wrapper.indexing_func_meta(lambda x: x.iloc[:2, :2])[1:])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([0, 1, 2]), 0, array([0, 1]))\n", "(array([0, 1, 2]), 1, 2)\n", "(array([0, 1, 2]), array([1]), array([2]))\n", "(array([0, 1, 2]), array([0, 1]), array([0, 1, 2]))\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[0])[1:])\n", "print(df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[1])[1:])\n", "print(df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[[1]])[1:])\n", "print(df4_grouped_wrapper_co.indexing_func_meta(lambda x: x.iloc[:2])[1:])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x2', 'y2'], dtype='object', name='i2')\n", "Index(['a2'], dtype='object')\n", "1\n", "Index(['a6', 'b6'], dtype='object', name='c6')\n", "Index(['x6'], dtype='object', name='i6')\n", "1\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['a6'], dtype='object', name='c6')\n", "1\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['a6'], dtype='object', name='c6')\n", "2\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['a6', 'b6'], dtype='object', name='c6')\n", "2\n" ] } ], "source": [ "print(sr2_wrapper.iloc[:2].index)\n", "print(sr2_wrapper.iloc[:2].columns)\n", "print(sr2_wrapper.iloc[:2].ndim)\n", "\n", "print(df4_wrapper.iloc[0, :2].index)\n", "print(df4_wrapper.iloc[0, :2].columns)\n", "print(df4_wrapper.iloc[0, :2].ndim)\n", "\n", "print(df4_wrapper.iloc[:2, 0].index)\n", "print(df4_wrapper.iloc[:2, 0].columns)\n", "print(df4_wrapper.iloc[:2, 0].ndim)\n", "\n", "print(df4_wrapper.iloc[:2, [0]].index)\n", "print(df4_wrapper.iloc[:2, [0]].columns)\n", "print(df4_wrapper.iloc[:2, [0]].ndim)\n", "\n", "print(df4_wrapper.iloc[:2, :2].index)\n", "print(df4_wrapper.iloc[:2, :2].columns)\n", "print(df4_wrapper.iloc[:2, :2].ndim)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6'], dtype='object', name='c6')\n", "1\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6'], dtype='object', name='c6')\n", "2\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6', 'b6'], dtype='object', name='c6')\n", "2\n" ] } ], "source": [ "print(df4_wrapper_co.iloc[0].index)\n", "print(df4_wrapper_co.iloc[0].columns)\n", "print(df4_wrapper_co.iloc[0].ndim)\n", "\n", "print(df4_wrapper_co.iloc[[0]].index)\n", "print(df4_wrapper_co.iloc[[0]].columns)\n", "print(df4_wrapper_co.iloc[[0]].ndim)\n", "\n", "print(df4_wrapper_co.iloc[:2].index)\n", "print(df4_wrapper_co.iloc[:2].columns)\n", "print(df4_wrapper_co.iloc[:2].ndim)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x2', 'y2'], dtype='object', name='i2')\n", "Index(['a2'], dtype='object')\n", "1\n", "1\n", "Index([0], dtype='int64')\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['a6', 'b6'], dtype='object', name='c6')\n", "2\n", "1\n", "Index([0, 0], dtype='int64')\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['c6'], dtype='object', name='c6')\n", "1\n", "1\n", "Index([1], dtype='int64')\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['c6'], dtype='object', name='c6')\n", "2\n", "2\n", "Index([1], dtype='int64')\n", "Index(['x6', 'y6'], dtype='object', name='i6')\n", "Index(['a6', 'b6', 'c6'], dtype='object', name='c6')\n", "2\n", "2\n", "Index([0, 0, 1], dtype='int64')\n" ] } ], "source": [ "print(sr2_grouped_wrapper.iloc[:2].index)\n", "print(sr2_grouped_wrapper.iloc[:2].columns)\n", "print(sr2_grouped_wrapper.iloc[:2].ndim)\n", "print(sr2_grouped_wrapper.iloc[:2].grouped_ndim)\n", "print(sr2_grouped_wrapper.iloc[:2].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper.iloc[:2, 0].index)\n", "print(df4_grouped_wrapper.iloc[:2, 0].columns)\n", "print(df4_grouped_wrapper.iloc[:2, 0].ndim)\n", "print(df4_grouped_wrapper.iloc[:2, 0].grouped_ndim)\n", "print(df4_grouped_wrapper.iloc[:2, 0].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper.iloc[:2, 1].index)\n", "print(df4_grouped_wrapper.iloc[:2, 1].columns)\n", "print(df4_grouped_wrapper.iloc[:2, 1].ndim)\n", "print(df4_grouped_wrapper.iloc[:2, 1].grouped_ndim)\n", "print(df4_grouped_wrapper.iloc[:2, 1].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper.iloc[:2, [1]].index)\n", "print(df4_grouped_wrapper.iloc[:2, [1]].columns)\n", "print(df4_grouped_wrapper.iloc[:2, [1]].ndim)\n", "print(df4_grouped_wrapper.iloc[:2, [1]].grouped_ndim)\n", "print(df4_grouped_wrapper.iloc[:2, [1]].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper.iloc[:2, :2].index)\n", "print(df4_grouped_wrapper.iloc[:2, :2].columns)\n", "print(df4_grouped_wrapper.iloc[:2, :2].ndim)\n", "print(df4_grouped_wrapper.iloc[:2, :2].grouped_ndim)\n", "print(df4_grouped_wrapper.iloc[:2, :2].grouper.group_by)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6', 'b6'], dtype='object', name='c6')\n", "2\n", "1\n", "Index([0, 0], dtype='int64')\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['c6'], dtype='object', name='c6')\n", "1\n", "1\n", "Index([1], dtype='int64')\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['c6'], dtype='object', name='c6')\n", "2\n", "2\n", "Index([1], dtype='int64')\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6', 'b6', 'c6'], dtype='object', name='c6')\n", "2\n", "2\n", "Index([0, 0, 1], dtype='int64')\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.iloc[0].index)\n", "print(df4_grouped_wrapper_co.iloc[0].columns)\n", "print(df4_grouped_wrapper_co.iloc[0].ndim)\n", "print(df4_grouped_wrapper_co.iloc[0].grouped_ndim)\n", "print(df4_grouped_wrapper_co.iloc[0].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper_co.iloc[1].index)\n", "print(df4_grouped_wrapper_co.iloc[1].columns)\n", "print(df4_grouped_wrapper_co.iloc[1].ndim)\n", "print(df4_grouped_wrapper_co.iloc[1].grouped_ndim)\n", "print(df4_grouped_wrapper_co.iloc[1].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper_co.iloc[[1]].index)\n", "print(df4_grouped_wrapper_co.iloc[[1]].columns)\n", "print(df4_grouped_wrapper_co.iloc[[1]].ndim)\n", "print(df4_grouped_wrapper_co.iloc[[1]].grouped_ndim)\n", "print(df4_grouped_wrapper_co.iloc[[1]].grouper.group_by)\n", "\n", "print(df4_grouped_wrapper_co.iloc[:2].index)\n", "print(df4_grouped_wrapper_co.iloc[:2].columns)\n", "print(df4_grouped_wrapper_co.iloc[:2].ndim)\n", "print(df4_grouped_wrapper_co.iloc[:2].grouped_ndim)\n", "print(df4_grouped_wrapper_co.iloc[:2].grouper.group_by)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "big_df = pd.DataFrame(np.empty((1000, 1000)))\n", "\n", "big_df_wrapper = array_wrapper.ArrayWrapper.from_obj(big_df)\n", "big_df_wrapper_co = big_df_wrapper.copy(column_only_select=True)\n", "big_df_grouped_wrapper = df4_wrapper.copy(group_by=np.array([0, 0, 1]))\n", "big_df_grouped_wrapper_co = big_df_grouped_wrapper.copy(column_only_select=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "759 µs ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "680 µs ± 15.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "569 µs ± 8.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "579 µs ± 6.89 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "839 µs ± 4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "748 µs ± 7.08 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "648 µs ± 6.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "658 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ], "source": [ "%timeit big_df_wrapper.iloc[:, 0]\n", "%timeit big_df_wrapper.iloc[:, :]\n", "\n", "%timeit big_df_wrapper_co.iloc[0]\n", "%timeit big_df_wrapper_co.iloc[:]\n", "\n", "%timeit big_df_grouped_wrapper.iloc[:, 0]\n", "%timeit big_df_grouped_wrapper.iloc[:, :]\n", "\n", "%timeit big_df_grouped_wrapper_co.iloc[0]\n", "%timeit big_df_grouped_wrapper_co.iloc[:]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 1\n", "i6 \n", "x6 1 2\n", "y6 3 4\n", "z6 5 6\n", "0 1\n", "1 2\n", "dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c6\n", "a6 1\n", "b6 2\n", "c6 3\n", "dtype: int64\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.wrap(np.array([[1, 2], [3, 4], [5, 6]])))\n", "print(df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2])))\n", "\n", "print(df4_grouped_wrapper_co.wrap(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), group_by=False))\n", "print(df4_grouped_wrapper_co.wrap_reduced(np.array([1, 2, 3]), group_by=False))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i6\n", "x6 1\n", "y6 2\n", "z6 3\n", "dtype: int64\n", "1\n", "c6 a6 b6\n", "i6 \n", "x6 1 2\n", "y6 3 4\n", "z6 5 6\n", "c6\n", "a6 1\n", "b6 2\n", "dtype: int64\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.iloc[0].wrap(np.array([1, 2, 3])))\n", "print(df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1])))\n", "\n", "print(df4_grouped_wrapper_co.iloc[0].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False))\n", "print(df4_grouped_wrapper_co.iloc[0].wrap_reduced(np.array([1, 2]), group_by=False))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0\n", "i6 \n", "x6 1\n", "y6 2\n", "z6 3\n", "0 1\n", "dtype: int64\n", "c6 a6 b6\n", "i6 \n", "x6 1 2\n", "y6 3 4\n", "z6 5 6\n", "c6\n", "a6 1\n", "b6 2\n", "dtype: int64\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([1, 2, 3])))\n", "print(df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1])))\n", "\n", "print(df4_grouped_wrapper_co.iloc[[0]].wrap(np.array([[1, 2], [3, 4], [5, 6]]), group_by=False))\n", "print(df4_grouped_wrapper_co.iloc[[0]].wrap_reduced(np.array([1, 2]), group_by=False))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i6\n", "x6 1\n", "y6 2\n", "z6 3\n", "Name: 1, dtype: int64\n", "1\n", "i6\n", "x6 1\n", "y6 2\n", "z6 3\n", "Name: c6, dtype: int64\n", "1\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3])))\n", "print(df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1])))\n", "\n", "print(df4_grouped_wrapper_co.iloc[1].wrap(np.array([1, 2, 3]), group_by=False))\n", "print(df4_grouped_wrapper_co.iloc[1].wrap_reduced(np.array([1]), group_by=False))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1\n", "i6 \n", "x6 1\n", "y6 2\n", "z6 3\n", "1 1\n", "dtype: int64\n", "c6 c6\n", "i6 \n", "x6 1\n", "y6 2\n", "z6 3\n", "c6\n", "c6 1\n", "dtype: int64\n" ] } ], "source": [ "print(df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3])))\n", "print(df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1])))\n", "\n", "print(df4_grouped_wrapper_co.iloc[[1]].wrap(np.array([1, 2, 3]), group_by=False))\n", "print(df4_grouped_wrapper_co.iloc[[1]].wrap_reduced(np.array([1]), group_by=False))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## index_fns" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([0.1, 0.2], dtype='float64', name='a')\n", "Index([1, 2, 3], dtype='int64', name='b')\n", "Index(['array_0', 'array_1', 'array_2'], dtype='object', name='c')\n" ] } ], "source": [ "i1 = index_fns.index_from_values([0.1, 0.2], name='a')\n", "i2 = index_fns.index_from_values(np.tile(np.arange(1, 4)[:, None][:, None], (1, 3, 3)), name='b')\n", "i3 = index_fns.index_from_values(np.random.uniform(size=(3, 3, 3)), name='c')\n", "\n", "print(i1)\n", "print(i2)\n", "print(i3)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([1, 1, 1, 2, 2, 2, 3, 3, 3], dtype='int64', name='b')\n", "MultiIndex([('x7', 'x8'),\n", " ('x7', 'x8'),\n", " ('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('y7', 'y8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8'),\n", " ('z7', 'z8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n" ] } ], "source": [ "print(index_fns.repeat_index(i2, 3))\n", "print(index_fns.repeat_index(multi_i, 3))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([1, 2, 3, 1, 2, 3, 1, 2, 3], dtype='int64', name='b')\n", "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8'),\n", " ('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8'),\n", " ('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n" ] } ], "source": [ "print(index_fns.tile_index(i2, 3))\n", "print(index_fns.tile_index(multi_i, 3))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MultiIndex([(1, 'array_0'),\n", " (2, 'array_1'),\n", " (3, 'array_2')],\n", " names=['b', 'c'])\n", "MultiIndex([('array_0', 1),\n", " ('array_1', 2),\n", " ('array_2', 3)],\n", " names=['c', 'b'])\n", "MultiIndex([('x7', 'x8', 'x7', 'x8'),\n", " ('y7', 'y8', 'y7', 'y8'),\n", " ('z7', 'z8', 'z7', 'z8')],\n", " names=['i7', 'i8', 'i7', 'i8'])\n", "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n", "MultiIndex([(0, 'a'),\n", " (1, 'b')],\n", " )\n", "Index(['a', 'b'], dtype='object')\n", "MultiIndex([(0, 'a'),\n", " (1, 'b')],\n", " names=['test_name', None])\n", "Index(['a', 'b'], dtype='object')\n", "MultiIndex([('a', 'a'),\n", " ('a', 'b')],\n", " names=['test_name', None])\n" ] } ], "source": [ "i23 = index_fns.stack_indexes((i2, i3))\n", "i32 = index_fns.stack_indexes((i3, i2))\n", "\n", "print(i23)\n", "print(i32)\n", "\n", "print(index_fns.stack_indexes((multi_i, multi_i), drop_duplicates=False))\n", "print(index_fns.stack_indexes((multi_i, multi_i), drop_duplicates=True))\n", "print(index_fns.stack_indexes(([0, 1], ['a', 'b']), drop_redundant=False))\n", "print(index_fns.stack_indexes(([0, 1], ['a', 'b']), drop_redundant=True))\n", "print(index_fns.stack_indexes((pd.Index([0, 1], name='test_name'), ['a', 'b']), drop_redundant=True))\n", "print(index_fns.stack_indexes((['a', 'a'], ['a', 'b']), drop_redundant=True))\n", "print(index_fns.stack_indexes((pd.Index(['a', 'a'], name='test_name'), ['a', 'b']), drop_redundant=True))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([2, 3], dtype='int64')\n", "Index([2, 3], dtype='int64')\n", "Index([1, 2], dtype='int64')\n", "Index([1, 2], dtype='int64')\n", "MultiIndex([(0.1, 1),\n", " (0.1, 2),\n", " (0.1, 3),\n", " (0.2, 1),\n", " (0.2, 2),\n", " (0.2, 3)],\n", " names=['a', 'b'])\n", "MultiIndex([(1, 'array_0'),\n", " (1, 'array_1'),\n", " (1, 'array_2'),\n", " (2, 'array_0'),\n", " (2, 'array_1'),\n", " (2, 'array_2'),\n", " (3, 'array_0'),\n", " (3, 'array_1'),\n", " (3, 'array_2')],\n", " names=['b', 'c'])\n", "MultiIndex([(1, 'array_0', 1, 'array_0'),\n", " (1, 'array_0', 2, 'array_1'),\n", " (1, 'array_0', 3, 'array_2'),\n", " (2, 'array_1', 1, 'array_0'),\n", " (2, 'array_1', 2, 'array_1'),\n", " (2, 'array_1', 3, 'array_2'),\n", " (3, 'array_2', 1, 'array_0'),\n", " (3, 'array_2', 2, 'array_1'),\n", " (3, 'array_2', 3, 'array_2')],\n", " names=['b', 'c', 'b', 'c'])\n" ] } ], "source": [ "print(index_fns.combine_indexes((pd.Index([1]), pd.Index([2, 3])), drop_duplicates=False))\n", "print(index_fns.combine_indexes((pd.Index([1]), pd.Index([2, 3])), drop_duplicates=True))\n", "print(index_fns.combine_indexes((pd.Index([1, 2]), pd.Index([3])), drop_duplicates=False))\n", "print(index_fns.combine_indexes((pd.Index([1, 2]), pd.Index([3])), drop_duplicates=True))\n", "print(index_fns.combine_indexes((i1, i2))) # combine_fns uses stack\n", "print(index_fns.combine_indexes((i2, i3)))\n", "print(index_fns.combine_indexes((i23, i23)))" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n", "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n" ] } ], "source": [ "print(index_fns.drop_levels(multi_i, 'i10'))\n", "print(index_fns.drop_levels(multi_i, ['i7', 'i8']))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([1, 2, 3], dtype='int64', name='f')\n", "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['f7', 'f8'])\n" ] } ], "source": [ "print(index_fns.rename_levels(pd.Index([1, 2, 3], name='i'), {'i': 'f'}))\n", "print(index_fns.rename_levels(multi_i, {'i7': 'f7', 'i8': 'f8'}))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x7', 'y7', 'z7'], dtype='object', name='i7')\n", "MultiIndex([('x7',),\n", " ('y7',),\n", " ('z7',)],\n", " names=['i7'])\n", "MultiIndex([('x7', 'x8'),\n", " ('y7', 'y8'),\n", " ('z7', 'z8')],\n", " names=['i7', 'i8'])\n" ] } ], "source": [ "print(index_fns.select_levels(multi_i, 'i7'))\n", "print(index_fns.select_levels(multi_i, ['i7']))\n", "print(index_fns.select_levels(multi_i, ['i7', 'i8']))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['a', 'a'], dtype='object')\n", "Index(['a', 'a'], dtype='object', name='hi')\n", "MultiIndex([('a', 'b'),\n", " ('a', 'b')],\n", " names=['hi', 'hi2'])\n", "MultiIndex([('a', 'a'),\n", " ('b', 'b')],\n", " names=['hi', 'hi2'])\n", "Index(['a', 'b'], dtype='object', name='hi2')\n", "MultiIndex([(0, 'a'),\n", " (2, 'b')],\n", " names=[None, 'hi2'])\n", "MultiIndex([(0, 'a'),\n", " (1, 'b')],\n", " names=['hi', 'hi2'])\n" ] } ], "source": [ "print(index_fns.drop_redundant_levels(pd.Index(['a', 'a']))) # ignores levels with single element\n", "print(index_fns.drop_redundant_levels(pd.Index(['a', 'a'], name='hi')))\n", "print(index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'a'], ['b', 'b']], names=['hi', 'hi2'])))\n", "print(index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([['a', 'b'], ['a', 'b']], names=['hi', 'hi2'])))\n", "print(index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=[None, 'hi2']))) # ignores 0-to-n\n", "print(index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 2], ['a', 'b']], names=[None, 'hi2']))) # legit\n", "print(index_fns.drop_redundant_levels(pd.MultiIndex.from_arrays([[0, 1], ['a', 'b']], names=['hi', 'hi2']))) # legit (w/ name)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index([1, 2, 3], dtype='int64', name='a')\n", "MultiIndex([( 0, 2, 1),\n", " ('a', 'c', 'b')],\n", " names=['x', 'z', 'y'])\n", "MultiIndex([( 0, 1, 2),\n", " ('a', 'b', 'c')],\n", " names=['x', 'y', 'z'])\n" ] } ], "source": [ "print(index_fns.drop_duplicate_levels(pd.MultiIndex.from_arrays(\n", " [[1, 2, 3], [1, 2, 3]], names=['a', 'a'])))\n", "print(index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples(\n", " [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='last'))\n", "print(index_fns.drop_duplicate_levels(pd.MultiIndex.from_tuples(\n", " [(0, 1, 2, 1), ('a', 'b', 'c', 'b')], names=['x', 'y', 'z', 'y']), keep='first'))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 0, 1])" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "multi_c1 = pd.MultiIndex.from_arrays([['a8', 'b8']], names=['c8'])\n", "multi_c2 = pd.MultiIndex.from_arrays([['a7', 'a7', 'c7', 'c7'], ['a8', 'b8', 'a8', 'b8']], names=['c7', 'c8'])\n", "\n", "index_fns.align_index_to(multi_c1, multi_c2)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "([], [])\n", "([1], [])\n", "([1], [])\n", "([0, 1], [])\n", "([1, 0], [])\n", "([0, 1], [])\n", "([0], [1])\n", "([1], [None])\n", "([], [0, 1])\n" ] } ], "source": [ "print(index_fns.pick_levels(multi_c, required_levels=[], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=['c8'], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=['c8'], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=['c7', 'c8'], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=['c8', None], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=[None, None], optional_levels=[]))\n", "print(index_fns.pick_levels(multi_c, required_levels=[None], optional_levels=['c8']))\n", "print(index_fns.pick_levels(multi_c, required_levels=['c8'], optional_levels=[None]))\n", "print(index_fns.pick_levels(multi_c, required_levels=[], optional_levels=['c7', 'c8']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## reshape_fns" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 2 3]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i4\n", "x4 1\n", "y4 2\n", "z4 3\n", "Name: a4, dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "[[1]\n", " [2]\n", " [3]]\n", " a2\n", "i2 \n", "x2 1\n", "y2 2\n", "z2 3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n" ] } ], "source": [ "print(reshape_fns.soft_to_ndim(a2, 1))\n", "print(reshape_fns.soft_to_ndim(sr2, 1))\n", "print(reshape_fns.soft_to_ndim(df2, 1))\n", "print(reshape_fns.soft_to_ndim(df4, 1)) # cannot -> do nothing\n", "print(reshape_fns.soft_to_ndim(a2, 2))\n", "print(reshape_fns.soft_to_ndim(sr2, 2))\n", "print(reshape_fns.soft_to_ndim(df2, 2))" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[None]\n", "[0]\n", "[1]\n", "[1 2 3]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i3\n", "x3 1\n", "Name: a3, dtype: int64\n", "i4\n", "x4 1\n", "y4 2\n", "z4 3\n", "Name: a4, dtype: int64\n" ] } ], "source": [ "print(reshape_fns.to_1d(None))\n", "print(reshape_fns.to_1d(v1))\n", "print(reshape_fns.to_1d(a1))\n", "print(reshape_fns.to_1d(a2))\n", "print(reshape_fns.to_1d(sr1))\n", "print(reshape_fns.to_1d(sr2))\n", "print(reshape_fns.to_1d(df1))\n", "print(reshape_fns.to_1d(df2))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[None]]\n", "[[0]]\n", "[[1]]\n", "[[1]\n", " [2]\n", " [3]]\n", " a1\n", "i1 \n", "x1 1\n", " a2\n", "i2 \n", "x2 1\n", "y2 2\n", "z2 3\n", "i2 x2 y2 z2\n", "0 1 2 3\n" ] } ], "source": [ "print(reshape_fns.to_2d(None))\n", "print(reshape_fns.to_2d(v1))\n", "print(reshape_fns.to_2d(a1))\n", "print(reshape_fns.to_2d(a2))\n", "print(reshape_fns.to_2d(sr1))\n", "print(reshape_fns.to_2d(sr2))\n", "print(reshape_fns.to_2d(sr2, expand_axis=0))" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0]\n", "[1 1 1]\n", "[1 1 1 2 2 2 3 3 3]\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1]\n", " [1]\n", " [1]\n", " [2]\n", " [2]\n", " [2]\n", " [3]\n", " [3]\n", " [3]]\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]\n", " [4 5 6]\n", " [4 5 6]\n", " [4 5 6]\n", " [7 8 9]\n", " [7 8 9]\n", " [7 8 9]]\n", "0 1\n", "1 1\n", "2 1\n", "dtype: int64\n", "i1\n", "x1 1\n", "x1 1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "x2 1\n", "x2 1\n", "y2 2\n", "y2 2\n", "y2 2\n", "z2 3\n", "z2 3\n", "z2 3\n", "Name: a2, dtype: int64\n", " 0\n", "0 1\n", "1 1\n", "2 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "x3 1\n", "x3 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "x4 1\n", "x4 1\n", "y4 2\n", "y4 2\n", "y4 2\n", "z4 3\n", "z4 3\n", "z4 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "x5 1 2 3\n", "x5 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "x6 1 2 3\n", "x6 1 2 3\n", "y6 4 5 6\n", "y6 4 5 6\n", "y6 4 5 6\n", "z6 7 8 9\n", "z6 7 8 9\n", "z6 7 8 9\n" ] } ], "source": [ "print(reshape_fns.repeat(v1, 3, axis=0))\n", "print(reshape_fns.repeat(a1, 3, axis=0))\n", "print(reshape_fns.repeat(a2, 3, axis=0))\n", "print(reshape_fns.repeat(a3, 3, axis=0))\n", "print(reshape_fns.repeat(a4, 3, axis=0))\n", "print(reshape_fns.repeat(a5, 3, axis=0))\n", "print(reshape_fns.repeat(sr_none, 3, axis=0))\n", "print(reshape_fns.repeat(sr1, 3, axis=0))\n", "print(reshape_fns.repeat(sr2, 3, axis=0))\n", "print(reshape_fns.repeat(df_none, 3, axis=0))\n", "print(reshape_fns.repeat(df1, 3, axis=0))\n", "print(reshape_fns.repeat(df2, 3, axis=0))\n", "print(reshape_fns.repeat(df3, 3, axis=0))\n", "print(reshape_fns.repeat(df4, 3, axis=0))" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0 0 0]]\n", "[[1 1 1]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "[[1 1 1 2 2 2 3 3 3]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "[[1 1 1 2 2 2 3 3 3]\n", " [4 4 4 5 5 5 6 6 6]\n", " [7 7 7 8 8 8 9 9 9]]\n", " 0 1 2\n", "0 1 1 1\n", " a1 a1 a1\n", "i1 \n", "x1 1 1 1\n", " a2 a2 a2\n", "i2 \n", "x2 1 1 1\n", "y2 2 2 2\n", "z2 3 3 3\n", " 0 1 2\n", "0 1 1 1\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 1 1\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 1 1\n", "y4 2 2 2\n", "z4 3 3 3\n", "c5 a5 a5 a5 b5 b5 b5 c5 c5 c5\n", "i5 \n", "x5 1 1 1 2 2 2 3 3 3\n", "c6 a6 a6 a6 b6 b6 b6 c6 c6 c6\n", "i6 \n", "x6 1 1 1 2 2 2 3 3 3\n", "y6 4 4 4 5 5 5 6 6 6\n", "z6 7 7 7 8 8 8 9 9 9\n" ] } ], "source": [ "print(reshape_fns.repeat(v1, 3, axis=1))\n", "print(reshape_fns.repeat(a1, 3, axis=1))\n", "print(reshape_fns.repeat(a2, 3, axis=1))\n", "print(reshape_fns.repeat(a3, 3, axis=1))\n", "print(reshape_fns.repeat(a4, 3, axis=1))\n", "print(reshape_fns.repeat(a5, 3, axis=1))\n", "print(reshape_fns.repeat(sr_none, 3, axis=1))\n", "print(reshape_fns.repeat(sr1, 3, axis=1))\n", "print(reshape_fns.repeat(sr2, 3, axis=1))\n", "print(reshape_fns.repeat(df_none, 3, axis=1))\n", "print(reshape_fns.repeat(df1, 3, axis=1))\n", "print(reshape_fns.repeat(df2, 3, axis=1))\n", "print(reshape_fns.repeat(df3, 3, axis=1))\n", "print(reshape_fns.repeat(df4, 3, axis=1))" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0]\n", "[1 1 1]\n", "[1 2 3 1 2 3 1 2 3]\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1]\n", " [2]\n", " [3]\n", " [1]\n", " [2]\n", " [3]\n", " [1]\n", " [2]\n", " [3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]\n", " [1 2 3]\n", " [4 5 6]\n", " [7 8 9]\n", " [1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "0 1\n", "1 1\n", "2 1\n", "dtype: int64\n", "i1\n", "x1 1\n", "x1 1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "x2 1\n", "y2 2\n", "z2 3\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", " 0\n", "0 1\n", "1 1\n", "2 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "x3 1\n", "x3 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "x4 1\n", "y4 2\n", "z4 3\n", "x4 1\n", "y4 2\n", "z4 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "x5 1 2 3\n", "x5 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n" ] } ], "source": [ "print(reshape_fns.tile(v1, 3, axis=0))\n", "print(reshape_fns.tile(a1, 3, axis=0))\n", "print(reshape_fns.tile(a2, 3, axis=0))\n", "print(reshape_fns.tile(a3, 3, axis=0))\n", "print(reshape_fns.tile(a4, 3, axis=0))\n", "print(reshape_fns.tile(a5, 3, axis=0))\n", "print(reshape_fns.tile(sr_none, 3, axis=0))\n", "print(reshape_fns.tile(sr1, 3, axis=0))\n", "print(reshape_fns.tile(sr2, 3, axis=0))\n", "print(reshape_fns.tile(df_none, 3, axis=0))\n", "print(reshape_fns.tile(df1, 3, axis=0))\n", "print(reshape_fns.tile(df2, 3, axis=0))\n", "print(reshape_fns.tile(df3, 3, axis=0))\n", "print(reshape_fns.tile(df4, 3, axis=0))" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0 0 0]]\n", "[[1 1 1]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "[[1 2 3 1 2 3 1 2 3]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "[[1 2 3 1 2 3 1 2 3]\n", " [4 5 6 4 5 6 4 5 6]\n", " [7 8 9 7 8 9 7 8 9]]\n", " 0 1 2\n", "0 1 1 1\n", " a1 a1 a1\n", "i1 \n", "x1 1 1 1\n", " a2 a2 a2\n", "i2 \n", "x2 1 1 1\n", "y2 2 2 2\n", "z2 3 3 3\n", " 0 1 2\n", "0 1 1 1\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 1 1\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 1 1\n", "y4 2 2 2\n", "z4 3 3 3\n", "c5 a5 b5 c5 a5 b5 c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3 1 2 3 1 2 3\n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3 1 2 3 1 2 3\n", "y6 4 5 6 4 5 6 4 5 6\n", "z6 7 8 9 7 8 9 7 8 9\n" ] } ], "source": [ "print(reshape_fns.tile(v1, 3, axis=1))\n", "print(reshape_fns.tile(a1, 3, axis=1))\n", "print(reshape_fns.tile(a2, 3, axis=1))\n", "print(reshape_fns.tile(a3, 3, axis=1))\n", "print(reshape_fns.tile(a4, 3, axis=1))\n", "print(reshape_fns.tile(a5, 3, axis=1))\n", "print(reshape_fns.tile(sr_none, 3, axis=1))\n", "print(reshape_fns.tile(sr1, 3, axis=1))\n", "print(reshape_fns.tile(sr2, 3, axis=1))\n", "print(reshape_fns.tile(df_none, 3, axis=1))\n", "print(reshape_fns.tile(df1, 3, axis=1))\n", "print(reshape_fns.tile(df2, 3, axis=1))\n", "print(reshape_fns.tile(df3, 3, axis=1))\n", "print(reshape_fns.tile(df4, 3, axis=1))" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Config({\n", " \"align_index\": false,\n", " \"align_columns\": true,\n", " \"index_from\": \"stack\",\n", " \"columns_from\": \"stack\",\n", " \"ignore_sr_names\": true,\n", " \"drop_duplicates\": true,\n", " \"keep\": \"last\",\n", " \"drop_redundant\": true,\n", " \"ignore_default\": true\n", "})\n" ] } ], "source": [ "# Change broadcasting rules globally\n", "vbt.settings.broadcasting['index_from'] = 'stack' # default is 'strict'\n", "vbt.settings.broadcasting['columns_from'] = 'stack'\n", "\n", "print(vbt.settings.broadcasting)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "0\n", "================\n", "0\n", "0\n", "\n", "0\n", "[1]\n", "================\n", "[0]\n", "[1]\n", "\n", "0\n", "[1 2 3]\n", "================\n", "[0 0 0]\n", "[1 2 3]\n", "\n", "0\n", "[[1 2 3]]\n", "================\n", "[[0 0 0]]\n", "[[1 2 3]]\n", "\n", "0\n", "[[1]\n", " [2]\n", " [3]]\n", "================\n", "[[0]\n", " [0]\n", " [0]]\n", "[[1]\n", " [2]\n", " [3]]\n", "\n", "0\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[0 0 0]\n", " [0 0 0]\n", " [0 0 0]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n", "[1]\n", "[1]\n", "================\n", "[1]\n", "[1]\n", "\n", "[1]\n", "[1 2 3]\n", "================\n", "[1 1 1]\n", "[1 2 3]\n", "\n", "[1]\n", "[[1 2 3]]\n", "================\n", "[[1 1 1]]\n", "[[1 2 3]]\n", "\n", "[1]\n", "[[1]\n", " [2]\n", " [3]]\n", "================\n", "[[1]\n", " [1]\n", " [1]]\n", "[[1]\n", " [2]\n", " [3]]\n", "\n", "[1]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[1 1 1]\n", " [1 1 1]\n", " [1 1 1]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n", "[1 2 3]\n", "[1 2 3]\n", "================\n", "[1 2 3]\n", "[1 2 3]\n", "\n", "[1 2 3]\n", "[[1 2 3]]\n", "================\n", "[[1 2 3]]\n", "[[1 2 3]]\n", "\n", "[1 2 3]\n", "[[1]\n", " [2]\n", " [3]]\n", "================\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "\n", "[1 2 3]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n", "[[1 2 3]]\n", "[[1 2 3]]\n", "================\n", "[[1 2 3]]\n", "[[1 2 3]]\n", "\n", "[[1 2 3]]\n", "[[1]\n", " [2]\n", " [3]]\n", "================\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "\n", "[[1 2 3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[1 2 3]\n", " [1 2 3]\n", " [1 2 3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n", "[[1]\n", " [2]\n", " [3]]\n", "[[1]\n", " [2]\n", " [3]]\n", "================\n", "[[1]\n", " [2]\n", " [3]]\n", "[[1]\n", " [2]\n", " [3]]\n", "\n", "[[1]\n", " [2]\n", " [3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[1 1 1]\n", " [2 2 2]\n", " [3 3 3]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "================\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "\n" ] } ], "source": [ "# Broadcasting arrays\n", "args = [\n", " ('v1', v1),\n", " ('a1', a1),\n", " ('a2', a2),\n", " ('a3', a3),\n", " ('a4', a4),\n", " ('a5', a5)\n", "]\n", "arg_combs = list(itertools.combinations_with_replacement(args, 2))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sr_none+sr_none\n", "0 1\n", "dtype: int64\n", "0 1\n", "dtype: int64\n", "================\n", "0 1\n", "dtype: int64\n", "0 1\n", "dtype: int64\n", "\n", "sr_none+sr1\n", "0 1\n", "dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", "i1\n", "x1 1\n", "dtype: int64\n", "i1\n", "x1 1\n", "dtype: int64\n", "\n", "sr_none+sr2\n", "0 1\n", "dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i2\n", "x2 1\n", "y2 1\n", "z2 1\n", "dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "dtype: int64\n", "\n", "sr1+sr1\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "\n", "sr1+sr2\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i1 i2\n", "x1 x2 1\n", " y2 1\n", " z2 1\n", "dtype: int64\n", "i1 i2\n", "x1 x2 1\n", " y2 2\n", " z2 3\n", "dtype: int64\n", "\n", "sr2+sr2\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "\n" ] } ], "source": [ "# Broadcasting series\n", "args = [\n", " ('sr_none', sr_none),\n", " ('sr1', sr1),\n", " ('sr2', sr2)\n", "]\n", "arg_combs = list(itertools.combinations_with_replacement(args, 2))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(n1 + '+' + n2)\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "v1+sr_none\n", "0\n", "0 1\n", "dtype: int64\n", "================\n", "0 0\n", "dtype: int64\n", "0 1\n", "dtype: int64\n", "\n", "v1+sr1\n", "0\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", "i1\n", "x1 0\n", "Name: a1, dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "\n", "v1+sr2\n", "0\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i2\n", "x2 0\n", "y2 0\n", "z2 0\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "\n", "a1+sr_none\n", "[1]\n", "0 1\n", "dtype: int64\n", "================\n", "0 1\n", "dtype: int64\n", "0 1\n", "dtype: int64\n", "\n", "a1+sr1\n", "[1]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "\n", "a1+sr2\n", "[1]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i2\n", "x2 1\n", "y2 1\n", "z2 1\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "\n", "a2+sr_none\n", "[1 2 3]\n", "0 1\n", "dtype: int64\n", "================\n", "0 1\n", "1 2\n", "2 3\n", "dtype: int64\n", "0 1\n", "1 1\n", "2 1\n", "dtype: int64\n", "\n", "a2+sr1\n", "[1 2 3]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", "i1\n", "x1 1\n", "x1 2\n", "x1 3\n", "Name: a1, dtype: int64\n", "i1\n", "x1 1\n", "x1 1\n", "x1 1\n", "Name: a1, dtype: int64\n", "\n", "a2+sr2\n", "[1 2 3]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "\n", "a3+sr_none\n", "[[1 2 3]]\n", "0 1\n", "dtype: int64\n", "================\n", " 0 1 2\n", "0 1 2 3\n", " 0 1 2\n", "0 1 1 1\n", "\n", "a3+sr1\n", "[[1 2 3]]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", " a1 a1 a1\n", "i1 \n", "x1 1 2 3\n", " a1 a1 a1\n", "i1 \n", "x1 1 1 1\n", "\n", "a3+sr2\n", "[[1 2 3]]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", " a2 a2 a2\n", "i2 \n", "x2 1 2 3\n", "y2 1 2 3\n", "z2 1 2 3\n", " a2 a2 a2\n", "i2 \n", "x2 1 1 1\n", "y2 2 2 2\n", "z2 3 3 3\n", "\n", "a4+sr_none\n", "[[1]\n", " [2]\n", " [3]]\n", "0 1\n", "dtype: int64\n", "================\n", " 0\n", "0 1\n", "1 2\n", "2 3\n", " 0\n", "0 1\n", "1 1\n", "2 1\n", "\n", "a4+sr1\n", "[[1]\n", " [2]\n", " [3]]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", " a1\n", "i1 \n", "x1 1\n", "x1 2\n", "x1 3\n", " a1\n", "i1 \n", "x1 1\n", "x1 1\n", "x1 1\n", "\n", "a4+sr2\n", "[[1]\n", " [2]\n", " [3]]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", " a2\n", "i2 \n", "x2 1\n", "y2 2\n", "z2 3\n", " a2\n", "i2 \n", "x2 1\n", "y2 2\n", "z2 3\n", "\n", "a5+sr_none\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "0 1\n", "dtype: int64\n", "================\n", " 0 1 2\n", "0 1 2 3\n", "1 4 5 6\n", "2 7 8 9\n", " 0 1 2\n", "0 1 1 1\n", "1 1 1 1\n", "2 1 1 1\n", "\n", "a5+sr1\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "================\n", " a1 a1 a1\n", "i1 \n", "x1 1 2 3\n", "x1 4 5 6\n", "x1 7 8 9\n", " a1 a1 a1\n", "i1 \n", "x1 1 1 1\n", "x1 1 1 1\n", "x1 1 1 1\n", "\n", "a5+sr2\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "================\n", " a2 a2 a2\n", "i2 \n", "x2 1 2 3\n", "y2 4 5 6\n", "z2 7 8 9\n", " a2 a2 a2\n", "i2 \n", "x2 1 1 1\n", "y2 2 2 2\n", "z2 3 3 3\n", "\n" ] } ], "source": [ "# Broadcasting arrays and series\n", "a_args = [\n", " ('v1', v1),\n", " ('a1', a1),\n", " ('a2', a2),\n", " ('a3', a3),\n", " ('a4', a4),\n", " ('a5', a5)\n", "]\n", "sr_args = [\n", " ('sr_none', sr_none),\n", " ('sr1', sr1),\n", " ('sr2', sr2)\n", "]\n", "arg_combs = list(itertools.product(a_args, sr_args))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(n1 + '+' + n2)\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df_none+df_none\n", " 0\n", "0 1\n", " 0\n", "0 1\n", "================\n", " 0\n", "0 1\n", " 0\n", "0 1\n", "\n", "df_none+df1\n", " 0\n", "0 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "\n", "df_none+df2\n", " 0\n", "0 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 1\n", "z4 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "df_none+df3\n", " 0\n", "0 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 1 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "df_none+df4\n", " 0\n", "0 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "df1+df1\n", "c3 a3\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "\n", "df1+df2\n", "c3 a3\n", "i3 \n", "x3 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c3 a3\n", "c4 a4\n", "i3 i4 \n", "x3 x4 1\n", " y4 1\n", " z4 1\n", "c3 a3\n", "c4 a4\n", "i3 i4 \n", "x3 x4 1\n", " y4 2\n", " z4 3\n", "\n", "df1+df3\n", "c3 a3\n", "i3 \n", "x3 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c3 a3 \n", "c5 a5 b5 c5\n", "i3 i5 \n", "x3 x5 1 1 1\n", "c3 a3 \n", "c5 a5 b5 c5\n", "i3 i5 \n", "x3 x5 1 2 3\n", "\n", "df1+df4\n", "c3 a3\n", "i3 \n", "x3 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c3 a3 \n", "c6 a6 b6 c6\n", "i3 i6 \n", "x3 x6 1 1 1\n", " y6 1 1 1\n", " z6 1 1 1\n", "c3 a3 \n", "c6 a6 b6 c6\n", "i3 i6 \n", "x3 x6 1 2 3\n", " y6 4 5 6\n", " z6 7 8 9\n", "\n", "df2+df2\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "df2+df3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c4 a4 \n", "c5 a5 b5 c5\n", "i4 i5 \n", "x4 x5 1 1 1\n", "y4 x5 2 2 2\n", "z4 x5 3 3 3\n", "c4 a4 \n", "c5 a5 b5 c5\n", "i4 i5 \n", "x4 x5 1 2 3\n", "y4 x5 1 2 3\n", "z4 x5 1 2 3\n", "\n", "df2+df4\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c4 a4 \n", "c6 a6 b6 c6\n", "i4 i6 \n", "x4 x6 1 1 1\n", "y4 y6 2 2 2\n", "z4 z6 3 3 3\n", "c4 a4 \n", "c6 a6 b6 c6\n", "i4 i6 \n", "x4 x6 1 2 3\n", "y4 y6 4 5 6\n", "z4 z6 7 8 9\n", "\n", "df3+df3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "df3+df4\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i5 i6 \n", "x5 x6 1 2 3\n", " y6 1 2 3\n", " z6 1 2 3\n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i5 i6 \n", "x5 x6 1 2 3\n", " y6 4 5 6\n", " z6 7 8 9\n", "\n", "df4+df4\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n" ] } ], "source": [ "# Broadcasting dataframes\n", "args = [\n", " ('df_none', df_none),\n", " ('df1', df1),\n", " ('df2', df2),\n", " ('df3', df3),\n", " ('df4', df4)\n", "]\n", "arg_combs = list(itertools.combinations_with_replacement(args, 2))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(n1 + '+' + n2)\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "v1+df_none\n", "0\n", " 0\n", "0 1\n", "================\n", " 0\n", "0 0\n", " 0\n", "0 1\n", "\n", "v1+df1\n", "0\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 0\n", "c3 a3\n", "i3 \n", "x3 1\n", "\n", "v1+df2\n", "0\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 0\n", "y4 0\n", "z4 0\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "v1+df3\n", "0\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 0 0 0\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "v1+df4\n", "0\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 0 0 0\n", "y6 0 0 0\n", "z6 0 0 0\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "a1+df_none\n", "[1]\n", " 0\n", "0 1\n", "================\n", " 0\n", "0 1\n", " 0\n", "0 1\n", "\n", "a1+df1\n", "[1]\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "\n", "a1+df2\n", "[1]\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 1\n", "z4 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "a1+df3\n", "[1]\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 1 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "a1+df4\n", "[1]\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "a2+df_none\n", "[1 2 3]\n", " 0\n", "0 1\n", "================\n", " 0 1 2\n", "0 1 2 3\n", " 0 1 2\n", "0 1 1 1\n", "\n", "a2+df1\n", "[1 2 3]\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 2 3\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 1 1\n", "\n", "a2+df2\n", "[1 2 3]\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 2 3\n", "y4 1 2 3\n", "z4 1 2 3\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 1 1\n", "y4 2 2 2\n", "z4 3 3 3\n", "\n", "a2+df3\n", "[1 2 3]\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "a2+df4\n", "[1 2 3]\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 1 2 3\n", "z6 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "a3+df_none\n", "[[1 2 3]]\n", " 0\n", "0 1\n", "================\n", " 0 1 2\n", "0 1 2 3\n", " 0 1 2\n", "0 1 1 1\n", "\n", "a3+df1\n", "[[1 2 3]]\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 2 3\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 1 1\n", "\n", "a3+df2\n", "[[1 2 3]]\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 2 3\n", "y4 1 2 3\n", "z4 1 2 3\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 1 1\n", "y4 2 2 2\n", "z4 3 3 3\n", "\n", "a3+df3\n", "[[1 2 3]]\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "a3+df4\n", "[[1 2 3]]\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 1 2 3\n", "z6 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "a4+df_none\n", "[[1]\n", " [2]\n", " [3]]\n", " 0\n", "0 1\n", "================\n", " 0\n", "0 1\n", "1 2\n", "2 3\n", " 0\n", "0 1\n", "1 1\n", "2 1\n", "\n", "a4+df1\n", "[[1]\n", " [2]\n", " [3]]\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 1\n", "x3 2\n", "x3 3\n", "c3 a3\n", "i3 \n", "x3 1\n", "x3 1\n", "x3 1\n", "\n", "a4+df2\n", "[[1]\n", " [2]\n", " [3]]\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "a4+df3\n", "[[1]\n", " [2]\n", " [3]]\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 1 1\n", "x5 2 2 2\n", "x5 3 3 3\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "x5 1 2 3\n", "x5 1 2 3\n", "\n", "a4+df4\n", "[[1]\n", " [2]\n", " [3]]\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 2 2 2\n", "z6 3 3 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "a5+df_none\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", " 0\n", "0 1\n", "================\n", " 0 1 2\n", "0 1 2 3\n", "1 4 5 6\n", "2 7 8 9\n", " 0 1 2\n", "0 1 1 1\n", "1 1 1 1\n", "2 1 1 1\n", "\n", "a5+df1\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 2 3\n", "x3 4 5 6\n", "x3 7 8 9\n", "c3 a3 a3 a3\n", "i3 \n", "x3 1 1 1\n", "x3 1 1 1\n", "x3 1 1 1\n", "\n", "a5+df2\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 2 3\n", "y4 4 5 6\n", "z4 7 8 9\n", "c4 a4 a4 a4\n", "i4 \n", "x4 1 1 1\n", "y4 2 2 2\n", "z4 3 3 3\n", "\n", "a5+df3\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "x5 4 5 6\n", "x5 7 8 9\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "x5 1 2 3\n", "x5 1 2 3\n", "\n", "a5+df4\n", "[[1 2 3]\n", " [4 5 6]\n", " [7 8 9]]\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n" ] } ], "source": [ "# Broadcasting arrays and dataframes\n", "a_args = [\n", " ('v1', v1),\n", " ('a1', a1),\n", " ('a2', a2),\n", " ('a3', a3),\n", " ('a4', a4),\n", " ('a5', a5)\n", "]\n", "sr_args = [\n", " ('df_none', df_none),\n", " ('df1', df1),\n", " ('df2', df2),\n", " ('df3', df3),\n", " ('df4', df4)\n", "]\n", "arg_combs = list(itertools.product(a_args, sr_args))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(n1 + '+' + n2)\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sr_none+df_none\n", "0 1\n", "dtype: int64\n", " 0\n", "0 1\n", "================\n", " 0\n", "0 1\n", " 0\n", "0 1\n", "\n", "sr_none+df1\n", "0 1\n", "dtype: int64\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3 1\n", "\n", "sr_none+df2\n", "0 1\n", "dtype: int64\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 1\n", "z4 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "\n", "sr_none+df3\n", "0 1\n", "dtype: int64\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 1 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "\n", "sr_none+df4\n", "0 1\n", "dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "\n", "sr1+df_none\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", " 0\n", "0 1\n", "================\n", " 0\n", "i1 \n", "x1 1\n", " 0\n", "i1 \n", "x1 1\n", "\n", "sr1+df1\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i1 i3 \n", "x1 x3 1\n", "c3 a3\n", "i1 i3 \n", "x1 x3 1\n", "\n", "sr1+df2\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i1 i4 \n", "x1 x4 1\n", " y4 1\n", " z4 1\n", "c4 a4\n", "i1 i4 \n", "x1 x4 1\n", " y4 2\n", " z4 3\n", "\n", "sr1+df3\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i1 i5 \n", "x1 x5 1 1 1\n", "c5 a5 b5 c5\n", "i1 i5 \n", "x1 x5 1 2 3\n", "\n", "sr1+df4\n", "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i1 i6 \n", "x1 x6 1 1 1\n", " y6 1 1 1\n", " z6 1 1 1\n", "c6 a6 b6 c6\n", "i1 i6 \n", "x1 x6 1 2 3\n", " y6 4 5 6\n", " z6 7 8 9\n", "\n", "sr2+df_none\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", " 0\n", "0 1\n", "================\n", " 0\n", "i2 \n", "x2 1\n", "y2 2\n", "z2 3\n", " 0\n", "i2 \n", "x2 1\n", "y2 1\n", "z2 1\n", "\n", "sr2+df1\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "c3 a3\n", "i3 \n", "x3 1\n", "================\n", "c3 a3\n", "i2 i3 \n", "x2 x3 1\n", "y2 x3 2\n", "z2 x3 3\n", "c3 a3\n", "i2 i3 \n", "x2 x3 1\n", "y2 x3 1\n", "z2 x3 1\n", "\n", "sr2+df2\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "================\n", "c4 a4\n", "i2 i4 \n", "x2 x4 1\n", "y2 y4 2\n", "z2 z4 3\n", "c4 a4\n", "i2 i4 \n", "x2 x4 1\n", "y2 y4 2\n", "z2 z4 3\n", "\n", "sr2+df3\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 2 3\n", "================\n", "c5 a5 b5 c5\n", "i2 i5 \n", "x2 x5 1 1 1\n", "y2 x5 2 2 2\n", "z2 x5 3 3 3\n", "c5 a5 b5 c5\n", "i2 i5 \n", "x2 x5 1 2 3\n", "y2 x5 1 2 3\n", "z2 x5 1 2 3\n", "\n", "sr2+df4\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "================\n", "c6 a6 b6 c6\n", "i2 i6 \n", "x2 x6 1 1 1\n", "y2 y6 2 2 2\n", "z2 z6 3 3 3\n", "c6 a6 b6 c6\n", "i2 i6 \n", "x2 x6 1 2 3\n", "y2 y6 4 5 6\n", "z2 z6 7 8 9\n", "\n" ] } ], "source": [ "# Broadcasting series and dataframes\n", "a_args = [\n", " ('sr_none', sr_none),\n", " ('sr1', sr1),\n", " ('sr2', sr2)\n", "]\n", "sr_args = [\n", " ('df_none', df_none),\n", " ('df1', df1),\n", " ('df2', df2),\n", " ('df3', df3),\n", " ('df4', df4)\n", "]\n", "arg_combs = list(itertools.product(a_args, sr_args))\n", "\n", "for (n1, arg1), (n2, arg2) in arg_combs:\n", " print(n1 + '+' + n2)\n", " print(arg1)\n", " print(arg2)\n", " print(\"================\")\n", " arg1, arg2 = reshape_fns.broadcast(arg1, arg2)\n", " print(arg1)\n", " print(arg2)\n", " print()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array([[0, 0, 0],\n", " [0, 0, 0],\n", " [0, 0, 0]]),\n", " array([[1, 1, 1],\n", " [1, 1, 1],\n", " [1, 1, 1]]),\n", " array([[1, 2, 3],\n", " [1, 2, 3],\n", " [1, 2, 3]]),\n", " array([[1, 1, 1],\n", " [1, 1, 1],\n", " [1, 1, 1]]),\n", " array([[1, 1, 1],\n", " [1, 1, 1],\n", " [1, 1, 1]]),\n", " array([[1, 2, 3],\n", " [1, 2, 3],\n", " [1, 2, 3]])]" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[np.broadcast_to(x, (3, 3)) for x in (0, a1, a2, sr_none, sr1, sr2)]" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0 1 2\n", "i1 i2 \n", "x1 x2 0 0 0\n", " y2 0 0 0\n", " z2 0 0 0\n", " 0 1 2\n", "i1 i2 \n", "x1 x2 1 1 1\n", " y2 1 1 1\n", " z2 1 1 1\n", " 0 1 2\n", "i1 i2 \n", "x1 x2 1 2 3\n", " y2 1 2 3\n", " z2 1 2 3\n", " 0 1 2\n", "i1 i2 \n", "x1 x2 1 1 1\n", " y2 1 1 1\n", " z2 1 1 1\n", " 0 1 2\n", "i1 i2 \n", "x1 x2 1 1 1\n", " y2 1 1 1\n", " z2 1 1 1\n", " 0 1 2\n", "i1 i2 \n", "x1 x2 1 1 1\n", " y2 2 2 2\n", " z2 3 3 3\n" ] } ], "source": [ "# Broadcasting all at once\n", "for i in reshape_fns.broadcast(\n", " 0, a1, a2, sr_none, sr1, sr2,\n", " to_shape=(3, 3),\n", " index_from='stack',\n", " columns_from='stack'\n", "):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 0 0 0\n", " y2 x3 y4 x5 y6 0 0 0\n", " z2 x3 z4 x5 z6 0 0 0\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n" ] } ], "source": [ "# Broadcasting all at once\n", "for i in reshape_fns.broadcast(\n", " v1, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4,\n", " index_from='stack',\n", " columns_from='stack'\n", "):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 0 0 0\n", " y2 x3 y4 x5 y6 0 0 0\n", " z2 x3 z4 x5 z6 0 0 0\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n" ] } ], "source": [ "for i in reshape_fns.broadcast(\n", " v1, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4,\n", " index_from=None, # use as-is\n", " columns_from=None\n", "):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c6 a6 b6 c6\n", "i6 \n", "x6 0 0 0\n", "y6 0 0 0\n", "z6 0 0 0\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 1 2 3\n", "z6 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 1 2 3\n", "z6 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 2 2 2\n", "z6 3 3 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 2 2 2\n", "z6 3 3 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 2 2 2\n", "z6 3 3 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 1 2 3\n", "z6 1 2 3\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n" ] } ], "source": [ "for i in reshape_fns.broadcast(\n", " v1, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4,\n", " index_from=-1, # take index from the last dataframe\n", " columns_from=-1\n", "):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 0 0 0\n", "y7 y8 0 0 0\n", "z7 z8 0 0 0\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 1 1 1\n", "z7 z8 1 1 1\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 1 2 3\n", "z7 z8 1 2 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 1 2 3\n", "z7 z8 1 2 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 2 2 2\n", "z7 z8 3 3 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 4 5 6\n", "z7 z8 7 8 9\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 1 1 1\n", "z7 z8 1 1 1\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 1 1 1\n", "z7 z8 1 1 1\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 2 2 2\n", "z7 z8 3 3 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 1 1 1\n", "z7 z8 1 1 1\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 1 1 1\n", "z7 z8 1 1 1\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 1 1\n", "y7 y8 2 2 2\n", "z7 z8 3 3 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 1 2 3\n", "z7 z8 1 2 3\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i7 i8 \n", "x7 x8 1 2 3\n", "y7 y8 4 5 6\n", "z7 z8 7 8 9\n" ] } ], "source": [ "for i in reshape_fns.broadcast(\n", " v1, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4,\n", " index_from=multi_i, # specify manually\n", " columns_from=multi_c\n", "):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 0 0 0\n", " y2 x3 y4 x5 y6 0 0 0\n", " z2 x3 z4 x5 z6 0 0 0\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 1 1 1\n", " z2 x3 z4 x5 z6 1 1 1\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 1 1\n", " y2 x3 y4 x5 y6 2 2 2\n", " z2 x3 z4 x5 z6 3 3 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 1 2 3\n", " z2 x3 z4 x5 z6 1 2 3\n", " a1 \n", " a2 \n", "c3 a3 \n", "c4 a4 \n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i1 i2 i3 i4 i5 i6 \n", "x1 x2 x3 x4 x5 x6 1 2 3\n", " y2 x3 y4 x5 y6 4 5 6\n", " z2 x3 z4 x5 z6 7 8 9\n" ] } ], "source": [ "# Do not clean columns\n", "vbt.settings.broadcasting['drop_duplicates'] = False\n", "vbt.settings.broadcasting['drop_redundant'] = False\n", "vbt.settings.broadcasting['ignore_sr_names'] = False\n", "\n", "for i in reshape_fns.broadcast(\n", " v1, a1, a2, a3, a4, a5, sr_none, sr1, sr2, df_none, df1, df2, df3, df4,\n", " index_from='stack', # stack but do not clean\n", " columns_from='stack'\n", "):\n", " print(i)\n", " \n", "vbt.settings.broadcasting.reset()" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "big_a = np.empty((1000, 1000))" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " C_CONTIGUOUS : False\n", " F_CONTIGUOUS : False\n", " OWNDATA : False\n", " WRITEABLE : False\n", " ALIGNED : True\n", " WRITEBACKIFCOPY : False\n", " UPDATEIFCOPY : False\n", "\n", "19.1 µs ± 471 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", " C_CONTIGUOUS : True\n", " F_CONTIGUOUS : False\n", " OWNDATA : True\n", " WRITEABLE : True\n", " ALIGNED : True\n", " WRITEBACKIFCOPY : False\n", " UPDATEIFCOPY : False\n", "\n", "596 µs ± 10.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", " C_CONTIGUOUS : True\n", " F_CONTIGUOUS : False\n", " OWNDATA : True\n", " WRITEABLE : True\n", " ALIGNED : True\n", " WRITEBACKIFCOPY : False\n", " UPDATEIFCOPY : False\n", "\n", "181 µs ± 11.4 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", " C_CONTIGUOUS : False\n", " F_CONTIGUOUS : True\n", " OWNDATA : True\n", " WRITEABLE : True\n", " ALIGNED : True\n", " WRITEBACKIFCOPY : False\n", " UPDATEIFCOPY : False\n", "\n", "1.26 ms ± 35.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" ] } ], "source": [ "print(reshape_fns.broadcast(np.empty((1,)), big_a)[0].flags)\n", "%timeit reshape_fns.broadcast(np.empty((1,)), big_a)\n", "\n", "print(reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'W'})[0].flags)\n", "%timeit reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'W'})\n", "\n", "print(reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'C'})[0].flags)\n", "%timeit reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'C'})\n", "\n", "print(reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'F'})[0].flags)\n", "%timeit reshape_fns.broadcast(np.empty((1,)), big_a, require_kwargs={'requirements': 'F'})" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([[0, 0, 0],\n", " [0, 0, 0],\n", " [0, 0, 0]]), array([[1, 2, 3],\n", " [4, 5, 6],\n", " [7, 8, 9]]))\n", "(c6 a6 b6 c6\n", "i6 \n", "x6 0 0 0\n", "y6 0 0 0\n", "z6 0 0 0, c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9)\n" ] } ], "source": [ "print(reshape_fns.broadcast(v1, df4, to_pd=False))\n", "print(reshape_fns.broadcast(v1, df4, to_pd=True))" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i1\n", "x1 1\n", "Name: a1, dtype: int64\n", "i2\n", "x2 1\n", "y2 1\n", "z2 1\n", "Name: a2, dtype: int64\n", "c3 a3\n", "i3 \n", "x3 1\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 1\n", "z4 1\n", "c5 a5 b5 c5\n", "i5 \n", "x5 1 1 1\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 1 1\n", "y6 1 1 1\n", "z6 1 1 1\n" ] } ], "source": [ "# One-side broadcasting, default behaviour is copying index/columns from the second argument\n", "print(reshape_fns.broadcast_to(sr1, sr1))\n", "print(reshape_fns.broadcast_to(sr1, sr2))\n", "print(reshape_fns.broadcast_to(sr1, df1))\n", "print(reshape_fns.broadcast_to(sr1, df2))\n", "print(reshape_fns.broadcast_to(sr1, df3))\n", "print(reshape_fns.broadcast_to(sr1, df4))" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.1]\n", "[0.1]\n", "[0.1 0.2]\n" ] } ], "source": [ "# Broadcasting first element to be an array out of the second argument\n", "print(reshape_fns.broadcast_to_array_of(0.1, v1))\n", "print(reshape_fns.broadcast_to_array_of([0.1], v1))\n", "print(reshape_fns.broadcast_to_array_of([0.1, 0.2], v1))" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[0.1 0.1 0.1]]\n", "[[0.1 0.1 0.1]]\n", "[[0.1 0.1 0.1]\n", " [0.2 0.2 0.2]]\n", "[[0.1 0.2 0.3]\n", " [0.4 0.5 0.6]]\n" ] } ], "source": [ "print(reshape_fns.broadcast_to_array_of(0.1, sr2))\n", "print(reshape_fns.broadcast_to_array_of([0.1], sr2))\n", "print(reshape_fns.broadcast_to_array_of([0.1, 0.2], sr2))\n", "print(reshape_fns.broadcast_to_array_of([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], sr2))" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[[0.1]\n", " [0.1]\n", " [0.1]]]\n", "[[[0.1]\n", " [0.1]\n", " [0.1]]]\n", "[[[0.1]\n", " [0.1]\n", " [0.1]]\n", "\n", " [[0.2]\n", " [0.2]\n", " [0.2]]]\n", "[[[0.1]\n", " [0.2]\n", " [0.3]]\n", "\n", " [[0.4]\n", " [0.5]\n", " [0.6]]]\n" ] } ], "source": [ "print(reshape_fns.broadcast_to_array_of(0.1, df2))\n", "print(reshape_fns.broadcast_to_array_of([0.1], df2))\n", "print(reshape_fns.broadcast_to_array_of([0.1, 0.2], df2))\n", "print(reshape_fns.broadcast_to_array_of([[[0.1], [0.2], [0.3]], [[0.4], [0.5], [0.6]]], df2))" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[[[0.1 0.1]\n", " [0.1 0.1]]\n", "\n", " [[0.1 0.1]\n", " [0.1 0.1]]]]\n" ] } ], "source": [ "print(reshape_fns.broadcast_to_array_of(0.1, np.empty((2, 2, 2)))) # works even for ndim > 2" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[10 10]\n", "10\n", "[10 10]\n", "[10 10 10]\n", "10\n" ] } ], "source": [ "print(reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 0))\n", "print(reshape_fns.broadcast_to_axis_of(10, np.empty((2,)), 1))\n", "print(reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 0))\n", "print(reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 1))\n", "print(reshape_fns.broadcast_to_axis_of(10, np.empty((2, 3)), 2))" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[[ 1. nan nan nan]\n", " [nan 2. nan nan]]\n", "\n", " [[nan nan 3. nan]\n", " [nan nan nan 4.]]]\n" ] } ], "source": [ "i = pd.MultiIndex.from_arrays([[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']])\n", "sr = pd.Series([1, 2, 3, 4], index=i)\n", "print(reshape_fns.unstack_to_array(sr))" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('i1', None) a1 x1\n", "(i1, None) \n", "a1 NaN 1.0\n", "x1 1.0 NaN\n", "('i2', None) a2 x2 y2 z2\n", "(i2, None) \n", "a2 NaN 1.0 2.0 3.0\n", "x2 1.0 NaN NaN NaN\n", "y2 2.0 NaN NaN NaN\n", "z2 3.0 NaN NaN NaN\n", "('i3', 'c3') a3 x3\n", "(i3, c3) \n", "a3 NaN 1.0\n", "x3 1.0 NaN\n", "('i4', 'c4') a4 x4 y4 z4\n", "(i4, c4) \n", "a4 NaN 1.0 2.0 3.0\n", "x4 1.0 NaN NaN NaN\n", "y4 2.0 NaN NaN NaN\n", "z4 3.0 NaN NaN NaN\n", "('i5', 'c5') a5 b5 c5 x5\n", "(i5, c5) \n", "a5 NaN NaN NaN 1.0\n", "b5 NaN NaN NaN 2.0\n", "c5 NaN NaN NaN 3.0\n", "x5 1.0 2.0 3.0 NaN\n", "('i6', 'c6') a6 b6 c6 x6 y6 z6\n", "(i6, c6) \n", "a6 NaN NaN NaN 1.0 4.0 7.0\n", "b6 NaN NaN NaN 2.0 5.0 8.0\n", "c6 NaN NaN NaN 3.0 6.0 9.0\n", "x6 1.0 2.0 3.0 NaN NaN NaN\n", "y6 4.0 5.0 6.0 NaN NaN NaN\n", "z6 7.0 8.0 9.0 NaN NaN NaN\n", "('i7', 'c7') a7 b7 c7 x7 y7 z7\n", "('i8', 'c8') a8 b8 c8 x8 y8 z8\n", "(i7, c7) (i8, c8) \n", "a7 a8 NaN NaN NaN 1.0 4.0 7.0\n", "b7 b8 NaN NaN NaN 2.0 5.0 8.0\n", "c7 c8 NaN NaN NaN 3.0 6.0 9.0\n", "x7 x8 1.0 2.0 3.0 NaN NaN NaN\n", "y7 y8 4.0 5.0 6.0 NaN NaN NaN\n", "z7 z8 7.0 8.0 9.0 NaN NaN NaN\n", " 0 1 2 yo\n", "0 NaN NaN NaN 1.0\n", "1 NaN NaN NaN 2.0\n", "2 NaN NaN NaN 3.0\n", "yo 1.0 2.0 3.0 NaN\n" ] } ], "source": [ "print(reshape_fns.make_symmetric(sr1))\n", "print(reshape_fns.make_symmetric(sr2))\n", "print(reshape_fns.make_symmetric(df1))\n", "print(reshape_fns.make_symmetric(df2))\n", "print(reshape_fns.make_symmetric(df3))\n", "print(reshape_fns.make_symmetric(df4))\n", "print(reshape_fns.make_symmetric(df5))\n", "print(reshape_fns.make_symmetric(pd.Series([1, 2, 3], name='yo'), sort=False))" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c8 a8 b8 c8\n", "c7 \n", "a7 1.0 NaN NaN\n", "b7 NaN 2.0 NaN\n", "c7 NaN NaN 3.0\n", " 3 4\n", "1 1.0 2.0\n", "2 3.0 4.0\n", " a b c d\n", "1 3 1.0 NaN NaN NaN\n", " 4 NaN 2.0 NaN NaN\n", "2 3 NaN NaN 3.0 NaN\n", " 4 NaN NaN NaN 4.0\n", "Index([1, 2, 3, 4], dtype='int64')\n" ] } ], "source": [ "print(reshape_fns.unstack_to_df(df5.iloc[0]))\n", "print(reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1))\n", "print(reshape_fns.unstack_to_df(sr, index_levels=(0, 1), column_levels=2))\n", "print(reshape_fns.unstack_to_df(sr, index_levels=0, column_levels=1, symmetric=True).columns)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## indexing" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "p1 0.1 0.2 \n", "p2 0.3 0.4 0.5 0.6 \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3 1 2 3 1 2 3 1 2 3\n", "y6 4 5 6 4 5 6 4 5 6 4 5 6\n", "z6 7 8 9 7 8 9 7 8 9 7 8 9\n", "p1 p2 c6\n", "0.1 0.3 a6 0.1\n", " b6 0.1\n", " c6 0.1\n", " 0.4 a6 0.1\n", " b6 0.1\n", " c6 0.1\n", "0.2 0.5 a6 0.2\n", " b6 0.2\n", " c6 0.2\n", " 0.6 a6 0.2\n", " b6 0.2\n", " c6 0.2\n", "Name: p1, dtype: float64\n", "p1 p2 c6\n", "0.1 0.3 a6 0.3\n", " b6 0.3\n", " c6 0.3\n", " 0.4 a6 0.4\n", " b6 0.4\n", " c6 0.4\n", "0.2 0.5 a6 0.5\n", " b6 0.5\n", " c6 0.5\n", " 0.6 a6 0.6\n", " b6 0.6\n", " c6 0.6\n", "Name: p2, dtype: float64\n", "p1 p2 c6\n", "0.1 0.3 a6 (0.1, 0.3)\n", " b6 (0.1, 0.3)\n", " c6 (0.1, 0.3)\n", " 0.4 a6 (0.1, 0.4)\n", " b6 (0.1, 0.4)\n", " c6 (0.1, 0.4)\n", "0.2 0.5 a6 (0.2, 0.5)\n", " b6 (0.2, 0.5)\n", " c6 (0.2, 0.5)\n", " 0.6 a6 (0.2, 0.6)\n", " b6 (0.2, 0.6)\n", " c6 (0.2, 0.6)\n", "Name: (p1, p2), dtype: object\n" ] } ], "source": [ "PandasIndexer = indexing.PandasIndexer\n", "ParamIndexer = indexing.build_param_indexer(['param1', 'param2', 'tuple'])\n", "\n", "class H(PandasIndexer, ParamIndexer):\n", " def __init__(self, a, param1_mapper, param2_mapper, tuple_mapper):\n", " self.a = a\n", " \n", " self._param1_mapper = param1_mapper\n", " self._param2_mapper = param2_mapper\n", " self._tuple_mapper = tuple_mapper\n", " \n", " PandasIndexer.__init__(self, my_kw='PandasIndexer')\n", " ParamIndexer.__init__(self, [param1_mapper, param2_mapper, tuple_mapper], my_kw='ParamIndexer')\n", " \n", " def indexing_func(self, pd_indexing_func, my_kw=None): \n", " # As soon as you call iloc etc., performs it on each dataframe and mapper and returns a new class instance\n", " print(my_kw)\n", " param1_mapper = indexing.indexing_on_mapper(self._param1_mapper, self.a, pd_indexing_func)\n", " param2_mapper = indexing.indexing_on_mapper(self._param2_mapper, self.a, pd_indexing_func)\n", " tuple_mapper = indexing.indexing_on_mapper(self._tuple_mapper, self.a, pd_indexing_func)\n", " return H(pd_indexing_func(self.a), param1_mapper, param2_mapper, tuple_mapper)\n", " \n", " @classmethod\n", " def run(cls, a, params1, params2, level_names=('p1', 'p2')):\n", " a = reshape_fns.to_2d(a)\n", " # Build column hierarchy\n", " params1_idx = pd.Index(params1, name=level_names[0])\n", " params2_idx = pd.Index(params2, name=level_names[1])\n", " params_idx = index_fns.stack_indexes((params1_idx, params2_idx))\n", " new_columns = index_fns.combine_indexes((params_idx, a.columns))\n", " \n", " # Build mappers\n", " param1_mapper = np.repeat(params1, len(a.columns))\n", " param1_mapper = pd.Series(param1_mapper, index=new_columns, name=params1_idx.name)\n", " \n", " param2_mapper = np.repeat(params2, len(a.columns))\n", " param2_mapper = pd.Series(param2_mapper, index=new_columns, name=params2_idx.name)\n", " \n", " tuple_mapper = list(zip(*list(map(lambda x: x.values, [param1_mapper, param2_mapper]))))\n", " tuple_mapper = pd.Series(tuple_mapper, index=new_columns, name=(params1_idx.name, params2_idx.name))\n", " \n", " # Tile a to match the length of new_columns\n", " a = array_wrapper.ArrayWrapper(a.index, new_columns, 2).wrap(reshape_fns.tile(a.values, 4, axis=1))\n", " return cls(a, param1_mapper, param2_mapper, tuple_mapper)\n", " \n", "\n", "# Similate an indicator with two params\n", "h = H.run(df4, [0.1, 0.1, 0.2, 0.2], [0.3, 0.4, 0.5, 0.6])\n", "\n", "print(df4)\n", "print(h.a)\n", "print(h._param1_mapper)\n", "print(h._param2_mapper)\n", "print(h._tuple_mapper)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PandasIndexer\n", "i6\n", "x6 1\n", "y6 4\n", "z6 7\n", "Name: (0.1, 0.3, a6), dtype: int64\n", "PandasIndexer\n", "p1 0.1 \n", "p2 0.3 \n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "PandasIndexer\n", "p1 0.2 \n", "p2 0.6 \n", "c6 b6 c6\n", "i6 \n", "y6 5 6\n", "z6 8 9\n", "PandasIndexer\n", "Index(['a6', 'b6', 'c6'], dtype='object', name='c6')\n" ] } ], "source": [ "# Indexing operations are delegated to the underlying dataframes\n", "print(h[(0.1, 0.3, 'a6')].a)\n", "print(h.loc[:, (0.1, 0.3, 'a6'):(0.1, 0.3, 'c6')].a)\n", "print(h.iloc[-2:, -2:].a)\n", "print(h.xs((0.1, 0.3), level=('p1', 'p2'), axis=1).a.columns)" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ParamIndexer\n", "MultiIndex([(0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6'),\n", " (0.1, 0.4, 'a6'),\n", " (0.1, 0.4, 'b6'),\n", " (0.1, 0.4, 'c6')],\n", " names=['p1', 'p2', 'c6'])\n", "ParamIndexer\n", "p1 0.1 \n", "p2 0.3 0.4 \n", "c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3 1 2 3\n", "y6 4 5 6 4 5 6\n", "z6 7 8 9 7 8 9\n", "ParamIndexer\n", "p1 0.1 \n", "p2 0.3 0.4 0.3 0.4 \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3 1 2 3 1 2 3 1 2 3\n", "y6 4 5 6 4 5 6 4 5 6 4 5 6\n", "z6 7 8 9 7 8 9 7 8 9 7 8 9\n" ] } ], "source": [ "print(h.param1_loc[0.1].a.columns)\n", "print(h.param1_loc[0.1:0.1].a)\n", "print(h.param1_loc[[0.1, 0.1]].a)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ParamIndexer\n", "p1 0.1 \n", "p2 0.3 \n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "ParamIndexer\n", "p1 0.1 \n", "p2 0.3 \n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "ParamIndexer\n", "MultiIndex([(0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6'),\n", " (0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6')],\n", " names=['p1', 'p2', 'c6'])\n" ] } ], "source": [ "print(h.param2_loc[0.3].a)\n", "print(h.param2_loc[0.3:0.3].a)\n", "print(h.param2_loc[[0.3, 0.3]].a.columns)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ParamIndexer\n", "p1 0.1 \n", "p2 0.3 \n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n", "ParamIndexer\n", "MultiIndex([(0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6')],\n", " names=['p1', 'p2', 'c6'])\n", "ParamIndexer\n", "MultiIndex([(0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6'),\n", " (0.1, 0.3, 'a6'),\n", " (0.1, 0.3, 'b6'),\n", " (0.1, 0.3, 'c6')],\n", " names=['p1', 'p2', 'c6'])\n" ] } ], "source": [ "print(h.tuple_loc[(0.1, 0.3)].a)\n", "print(h.tuple_loc[(0.1, 0.3):(0.1, 0.3)].a.columns)\n", "print(h.tuple_loc[[(0.1, 0.3), (0.1, 0.3)]].a.columns)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## combine_fns" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "vbt.settings.broadcasting['index_from'] = 'stack'\n", "vbt.settings.broadcasting['columns_from'] = 'stack'" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[11 21 31]\n", " [12 22 32]\n", " [13 23 33]]\n", "[[11 21 31]\n", " [12 22 32]\n", " [13 23 33]]\n", "[[11 12 13 21 22 23 31 32 33]\n", " [14 15 16 24 25 26 34 35 36]\n", " [17 18 19 27 28 29 37 38 39]]\n", "[[11 12 13 21 22 23 31 32 33]\n", " [14 15 16 24 25 26 34 35 36]\n", " [17 18 19 27 28 29 37 38 39]]\n" ] } ], "source": [ "print(combine_fns.apply_and_concat_one(3, lambda i, x, a: x + a[i], sr2.values, [10, 20, 30]))\n", "print(combine_fns.apply_and_concat_one_nb(3, njit(lambda i, x, a: x + a[i]), sr2.values, (10, 20, 30)))\n", "\n", "print(combine_fns.apply_and_concat_one(3, lambda i, x, a: x + a[i], df4.values, [10, 20, 30]))\n", "print(combine_fns.apply_and_concat_one_nb(3, njit(lambda i, x, a: x + a[i]), df4.values, (10, 20, 30)))" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[array([[1, 1, 1],\n", " [2, 2, 2],\n", " [3, 3, 3]]), array([[11, 21, 31],\n", " [12, 22, 32],\n", " [13, 23, 33]])]\n", "[array([[1, 1, 1],\n", " [2, 2, 2],\n", " [3, 3, 3]]), array([[11, 21, 31],\n", " [12, 22, 32],\n", " [13, 23, 33]])]\n", "[array([[1, 2, 3, 1, 2, 3, 1, 2, 3],\n", " [4, 5, 6, 4, 5, 6, 4, 5, 6],\n", " [7, 8, 9, 7, 8, 9, 7, 8, 9]]), array([[11, 12, 13, 21, 22, 23, 31, 32, 33],\n", " [14, 15, 16, 24, 25, 26, 34, 35, 36],\n", " [17, 18, 19, 27, 28, 29, 37, 38, 39]])]\n", "[array([[1, 2, 3, 1, 2, 3, 1, 2, 3],\n", " [4, 5, 6, 4, 5, 6, 4, 5, 6],\n", " [7, 8, 9, 7, 8, 9, 7, 8, 9]]), array([[11, 12, 13, 21, 22, 23, 31, 32, 33],\n", " [14, 15, 16, 24, 25, 26, 34, 35, 36],\n", " [17, 18, 19, 27, 28, 29, 37, 38, 39]])]\n" ] } ], "source": [ "print(combine_fns.apply_and_concat_multiple(3, lambda i, x, a: (x, x + a[i]), sr2.values, [10, 20, 30]))\n", "print(combine_fns.apply_and_concat_multiple_nb(3, njit(lambda i, x, a: (x, x + a[i])), sr2.values, (10, 20, 30)))\n", "\n", "print(combine_fns.apply_and_concat_multiple(3, lambda i, x, a: (x, x + a[i]), df4.values, [10, 20, 30]))\n", "print(combine_fns.apply_and_concat_multiple_nb(3, njit(lambda i, x, a: (x, x + a[i])), df4.values, (10, 20, 30)))" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[103 104]\n", " [106 108]\n", " [109 112]]\n", "[[103 104]\n", " [106 108]\n", " [109 112]]\n", "[[103 106 109 104 108 112]\n", " [112 115 118 116 120 124]\n", " [121 124 127 128 132 136]]\n", "[[103 106 109 104 108 112]\n", " [112 115 118 116 120 124]\n", " [121 124 127 128 132 136]]\n" ] } ], "source": [ "print(combine_fns.combine_and_concat(sr2.values, (sr2.values*2, sr2.values*3), lambda x, y, a: x + y + a, 100))\n", "print(combine_fns.combine_and_concat_nb(sr2.values, (sr2.values*2, sr2.values*3), njit(lambda x, y, a: x + y + a), 100))\n", "\n", "print(combine_fns.combine_and_concat(df4.values, (df4.values*2, df4.values*3), lambda x, y, a: x + y + a, 100))\n", "print(combine_fns.combine_and_concat_nb(df4.values, (df4.values*2, df4.values*3), njit(lambda x, y, a: x + y + a), 100))" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[206 212 218]\n", "[206 212 218]\n", "[[206 212 218]\n", " [224 230 236]\n", " [242 248 254]]\n", "[[206 212 218]\n", " [224 230 236]\n", " [242 248 254]]\n" ] } ], "source": [ "print(combine_fns.combine_multiple((sr2.values, sr2.values*2, sr2.values*3), lambda x, y, a: x + y + a, 100))\n", "print(combine_fns.combine_multiple_nb((sr2.values, sr2.values*2, sr2.values*3), njit(lambda x, y, a: x + y + a), 100))\n", "\n", "print(combine_fns.combine_multiple((df4.values, df4.values*2, df4.values*3), lambda x, y, a: x + y + a, 100))\n", "print(combine_fns.combine_multiple_nb((df4.values, df4.values*2, df4.values*3), njit(lambda x, y, a: x + y + a), 100))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## accessors" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10 5\n", "11 5\n", "12 5\n", "13 5\n", "14 5\n", "Name: a, dtype: int64\n", " a b c\n", "10 5 5 5\n", "11 5 5 5\n", "12 5 5 5\n", "13 5 5 5\n", "14 5 5 5\n", "i2\n", "x2 5\n", "y2 5\n", "z2 5\n", "Name: a2, dtype: int64\n", "c6 a6 b6 c6\n", "i6 \n", "x6 5 5 5\n", "y6 5 5 5\n", "z6 5 5 5\n" ] } ], "source": [ "print(pd.Series.vbt.empty(5, index=np.arange(10, 15), name='a', fill_value=5))\n", "print(pd.DataFrame.vbt.empty((5, 3), index=np.arange(10, 15), columns=['a', 'b', 'c'], fill_value=5))\n", "\n", "print(pd.Series.vbt.empty_like(sr2, fill_value=5))\n", "print(pd.DataFrame.vbt.empty_like(df4, fill_value=5))" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n", "False\n", "False\n", "True\n" ] } ], "source": [ "print(sr1.vbt.is_series())\n", "print(sr1.vbt.is_frame())\n", "print(df1.vbt.is_series())\n", "print(df2.vbt.is_frame())" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['x2', 'y2', 'z2'], dtype='object', name='i2')\n", "Index(['a2'], dtype='object')\n", "Index(['x6', 'y6', 'z6'], dtype='object', name='i6')\n", "Index(['a6', 'b6', 'c6'], dtype='object', name='c6')\n" ] } ], "source": [ "print(sr2.vbt.wrapper.index)\n", "print(sr2.vbt.wrapper.columns)\n", "print(df4.vbt.wrapper.index)\n", "print(df4.vbt.wrapper.columns)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c3 a3\n", "i3 \n", "x3_yo 1\n", "c3 a3_yo\n", "i3 \n", "x3 1\n", "c3 a3\n", "i3 \n", "x3_yo 1\n", "c3 a3_yo\n", "i3 \n", "x3_yo 1\n" ] } ], "source": [ "print(df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0))\n", "print(df1.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1))\n", "df1_copy = df1.copy()\n", "df1_copy.vbt.apply_on_index(lambda idx: idx + '_yo', axis=0, inplace=True)\n", "print(df1_copy)\n", "df1_copy.vbt.apply_on_index(lambda idx: idx + '_yo', axis=1, inplace=True)\n", "print(df1_copy)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 2 3]\n", "[[1]\n", " [2]\n", " [3]]\n" ] } ], "source": [ "print(sr2.vbt.to_1d_array())\n", "print(sr2.vbt.to_2d_array())" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64\n", "i4\n", "x4 1\n", "y4 2\n", "z4 3\n", "Name: a4, dtype: int64\n", "c6 a6 b6 c6\n", "i2 \n", "x2 1 2 3\n", "y2 4 5 6\n", "z2 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n" ] } ], "source": [ "# It will try to return pd.Series\n", "print(sr2.vbt.wrapper.wrap(a2)) # returns sr\n", "print(sr2.vbt.wrapper.wrap(df2.values)) # returns sr\n", "print(sr2.vbt.wrapper.wrap(df2.values, index=df2.index, columns=df2.columns)) # returns sr\n", "print(sr2.vbt.wrapper.wrap(df4.values, columns=df4.columns)) # returns df\n", "print(sr2.vbt.wrapper.wrap(df4.values, index=df4.index, columns=df4.columns)) # returns df" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n", "c6 a6 b6 c6\n", "i4 \n", "x4 1 2 3\n", "y4 4 5 6\n", "z4 7 8 9\n", "c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3\n", "y6 4 5 6\n", "z6 7 8 9\n" ] } ], "source": [ "# It will try to return pd.DataFrame\n", "print(df2.vbt.wrapper.wrap(a2)) # returns df\n", "print(df2.vbt.wrapper.wrap(sr2.values)) # returns df\n", "print(df2.vbt.wrapper.wrap(df4.values, columns=df4.columns)) # returns df\n", "print(df2.vbt.wrapper.wrap(df4.values, index=df4.index, columns=df4.columns)) # returns df" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b \n", "c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 1 2 3 1 2 3\n", "y6 4 5 6 4 5 6\n", "z6 7 8 9 7 8 9\n", "c6 a6 b6 c6 \n", " a b a b a b\n", "i6 \n", "x6 1 1 2 2 3 3\n", "y6 4 4 5 5 6 6\n", "z6 7 7 8 8 9 9\n" ] } ], "source": [ "print(df4.vbt.tile(2, keys=['a', 'b']))\n", "print(df4.vbt.repeat(2, keys=['a', 'b']))" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c8 a8 b8\n", "0 1 2\n", "1 4 5\n", "2 7 8\n", "c7 a7 c7 \n", "c8 a8 b8 a8 b8\n", "0 1 2 3 4\n", "1 4 5 6 7\n", "2 7 8 9 10\n", "c7 a7 c7 \n", "c8 a8 b8 a8 b8\n", "0 1 2 1 2\n", "1 4 5 4 5\n", "2 7 8 7 8\n" ] } ], "source": [ "df10 = pd.DataFrame([[1, 2], [4, 5], [7, 8]], columns=multi_c1)\n", "df20 = pd.DataFrame([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]], columns=multi_c2)\n", "\n", "print(df10)\n", "print(df20)\n", "print(df10.vbt.align_to(df20))" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64, i2\n", "x2 10\n", "y2 10\n", "z2 10\n", "Name: a2, dtype: int64)\n", "(i2\n", "x2 1\n", "y2 2\n", "z2 3\n", "Name: a2, dtype: int64, i2\n", "x2 10\n", "y2 10\n", "z2 10\n", "Name: a2, dtype: int64)\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 2\n", "z4 3\n" ] } ], "source": [ "print(pd.DataFrame.vbt.broadcast(\n", " sr2,\n", " 10\n", "))\n", "print(sr2.vbt.broadcast(\n", " 10\n", "))\n", "print(sr2.vbt.broadcast_to(\n", " df2\n", "))" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('i2', None) a2 x2 y2 z2\n", "(i2, None) \n", "a2 NaN 1.0 2.0 3.0\n", "x2 1.0 NaN NaN NaN\n", "y2 2.0 NaN NaN NaN\n", "z2 3.0 NaN NaN NaN\n", "('i4', 'c4') a4 x4 y4 z4\n", "(i4, c4) \n", "a4 NaN 1.0 2.0 3.0\n", "x4 1.0 NaN NaN NaN\n", "y4 2.0 NaN NaN NaN\n", "z4 3.0 NaN NaN NaN\n", "('i5', 'c5') a5 b5 c5 x5\n", "(i5, c5) \n", "a5 NaN NaN NaN 1.0\n", "b5 NaN NaN NaN 2.0\n", "c5 NaN NaN NaN 3.0\n", "x5 1.0 2.0 3.0 NaN\n", "('i6', 'c6') a6 b6 c6 x6 y6 z6\n", "(i6, c6) \n", "a6 NaN NaN NaN 1.0 4.0 7.0\n", "b6 NaN NaN NaN 2.0 5.0 8.0\n", "c6 NaN NaN NaN 3.0 6.0 9.0\n", "x6 1.0 2.0 3.0 NaN NaN NaN\n", "y6 4.0 5.0 6.0 NaN NaN NaN\n", "z6 7.0 8.0 9.0 NaN NaN NaN\n" ] } ], "source": [ "print(sr2.vbt.make_symmetric())\n", "print(df2.vbt.make_symmetric())\n", "print(df3.vbt.make_symmetric())\n", "print(df4.vbt.make_symmetric())" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1. nan nan]\n", " [nan 4. nan]\n", " [nan nan 7.]]\n" ] } ], "source": [ "print(df5.iloc[:, 0].vbt.unstack_to_array())" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i8 x8 y8 z8\n", "i7 \n", "x7 1.0 NaN NaN\n", "y7 NaN 4.0 NaN\n", "z7 NaN NaN 7.0\n" ] } ], "source": [ "print(df5.iloc[:, 0].vbt.unstack_to_df())" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i2\n", "x2 1\n", "y2 4\n", "z2 9\n", "Name: a2, dtype: int64\n", "i2\n", "x2 1\n", "y2 4\n", "z2 9\n", "Name: a2, dtype: int64\n", "c4 a4\n", "i4 \n", "x4 1\n", "y4 4\n", "z4 9\n" ] } ], "source": [ "print(sr2.vbt.apply(apply_func=lambda x: x ** 2))\n", "print(sr2.vbt.apply(apply_func=lambda x: x ** 2, to_2d=True))\n", "print(df2.vbt.apply(apply_func=lambda x: x ** 2))" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b c \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i2 i6 \n", "x2 x6 1 1 1 10 10 10 1 2 3\n", "y2 y6 2 2 2 10 10 10 4 5 6\n", "z2 z6 3 3 3 10 10 10 7 8 9\n", " a b c \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i2 i6 \n", "x2 x6 1 1 1 10 10 10 1 2 3\n", "y2 y6 2 2 2 10 10 10 4 5 6\n", "z2 z6 3 3 3 10 10 10 7 8 9\n" ] } ], "source": [ "print(pd.DataFrame.vbt.concat(sr2, 10, df4, keys=['a', 'b', 'c']))\n", "print(sr2.vbt.concat(10, df4, keys=['a', 'b', 'c']))" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "apply_idx 0 1 2\n", "i2 \n", "x2 112 113 114\n", "y2 113 114 115\n", "z2 114 115 116\n", "apply_idx 0 1 2\n", "i2 \n", "x2 112 113 114\n", "y2 113 114 115\n", "z2 114 115 116\n", "apply_idx 0 1 2\n", "i2 \n", "x2 112 113 114\n", "y2 116 117 118\n", "z2 120 121 122\n", "apply_idx 0 1 2\n", "i2 \n", "x2 112 113 114\n", "y2 116 117 118\n", "z2 120 121 122\n", "apply_idx 0 1 2 \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 112 116 120 113 117 121 114 118 122\n", "y6 115 119 123 116 120 124 117 121 125\n", "z6 118 122 126 119 123 127 120 124 128\n", "hello a b c \n", "c6 a6 b6 c6 a6 b6 c6 a6 b6 c6\n", "i6 \n", "x6 112 116 120 113 117 121 114 118 122\n", "y6 115 119 123 116 120 124 117 121 125\n", "z6 118 122 126 119 123 127 120 124 128\n" ] } ], "source": [ "print(sr2.vbt.apply_and_concat(3, sr2.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[i] + c + d, d=100))\n", "print(sr2.vbt.apply_and_concat(3, sr2.values, 10, apply_func=njit(lambda i, x, y, c: x + y[i] + c + 100)))\n", "print(sr2.vbt.apply_and_concat(3, df4.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[:, i] + c + d, d=100))\n", "print(sr2.vbt.apply_and_concat(3, df4.values, 10, apply_func=njit(lambda i, x, y, c: x + y[:, i] + c + 100)))\n", "print(df4.vbt.apply_and_concat(3, df4.values, 10, apply_func=lambda i, x, y, c, d=1: x + y[:, i] + c + d, d=100))\n", "print(df4.vbt.apply_and_concat(\n", " 3, \n", " df4.values, \n", " 10, \n", " apply_func=njit(lambda i, x, y, c: x + y[:, i] + c + 100), \n", " keys=pd.Index(['a', 'b', 'c'], name='hello')))" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i2\n", "x2 11.0\n", "y2 12.0\n", "z2 13.0\n", "Name: a2, dtype: float64\n", "i2\n", "x2 1111\n", "y2 1112\n", "z2 1113\n", "Name: a2, dtype: int64\n", "i2\n", "x2 11\n", "y2 22\n", "z2 33\n", "Name: a2, dtype: int64\n", " a2 a2 a2\n", "i2 \n", "x2 11 21 31\n", "y2 12 22 32\n", "z2 13 23 33\n", "i2 i1\n", "x2 x1 2\n", "y2 x1 3\n", "z2 x1 4\n", "dtype: int64\n", "i2\n", "x2 2\n", "y2 4\n", "z2 6\n", "Name: a2, dtype: int64\n", "c4 a4\n", "i2 i4 \n", "x2 x4 2\n", "y2 y4 4\n", "z2 z4 6\n", "c5 a5 b5 c5\n", "i2 i5 \n", "x2 x5 2 3 4\n", "y2 x5 3 4 5\n", "z2 x5 4 5 6\n", "c6 a6 b6 c6\n", "i2 i6 \n", "x2 x6 2 3 4\n", "y2 y6 6 7 8\n", "z2 z6 10 11 12\n", "c7 a7 b7 c7\n", "c8 a8 b8 c8\n", "i2 i7 i8 \n", "x2 x7 x8 2 3 4\n", "y2 y7 y8 6 7 8\n", "z2 z7 z8 10 11 12\n" ] } ], "source": [ "print(sr2.vbt.combine(10., combine_func=lambda x, y: x + y))\n", "print(sr2.vbt.combine(10, 100, d=1000, combine_func=lambda x, y, c, d=1: x + y + c + d)) # test args and kwargs\n", "print(sr2.vbt.combine(np.array([10, 20, 30]), combine_func=lambda x, y: x + y))\n", "print(sr2.vbt.combine(np.array([[10, 20, 30]]), combine_func=lambda x, y: x + y))\n", "print(sr2.vbt.combine(sr1, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(sr2, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(df2, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(df3, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(df4, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(df5, combine_func=lambda x, y: x + y, broadcast_kwargs=dict(index_from='stack')))" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "i2\n", "x2 361\n", "y2 382\n", "z2 403\n", "dtype: int64\n", "c3 a3 \n", "c5 a5 b5 c5\n", "i2 i3 i5 \n", "x2 x3 x5 703 724 745\n", "y2 x3 x5 714 735 756\n", "z2 x3 x5 725 746 767\n", "c3 a3 \n", "c5 a5 b5 c5\n", "i2 i3 i5 \n", "x2 x3 x5 703 724 745\n", "y2 x3 x5 714 735 756\n", "z2 x3 x5 725 746 767\n", "c3 a3 \n", "c5 a5 b5 c5\n", "i2 i3 i5 \n", "x2 x3 x5 703 724 745\n", "y2 x3 x5 714 735 756\n", "z2 x3 x5 725 746 767\n" ] } ], "source": [ "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], pd.Series([10, 20, 30])],\n", " 10, b=100,\n", " combine_func=lambda x, y, a, b=1: x + y + a + b, \n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10, b=100,\n", " combine_func=lambda x, y, a, b=1: x + y + a + b, \n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10,\n", " combine_func=njit(lambda x, y, a, b=1: x + y + a + 100), \n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10,\n", " combine_func=njit(lambda x, y, a, b=1: x + y + a + 100), \n", " broadcast_kwargs=dict(index_from='stack')))" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "combine_idx 0 1 2\n", "i2 \n", "x2 121 121 121\n", "y2 122 132 132\n", "z2 123 143 143\n", "combine_idx 0 1 2 3 4 \\\n", "c3 a3 a3 a3 a3 a3 \n", "c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 \n", "i2 i3 i5 \n", "x2 x3 x5 121 121 121 121 131 141 121 131 141 121 121 121 112 \n", "y2 x3 x5 122 122 122 122 132 142 122 132 142 132 132 132 113 \n", "z2 x3 x5 123 123 123 123 133 143 123 133 143 143 143 143 114 \n", "\n", "combine_idx 5 \n", "c3 a3 \n", "c5 b5 c5 a5 b5 c5 \n", "i2 i3 i5 \n", "x2 x3 x5 112 112 112 113 114 \n", "y2 x3 x5 113 113 113 114 115 \n", "z2 x3 x5 114 114 114 115 116 \n", "combine_idx 0 1 2 3 4 \\\n", "c3 a3 a3 a3 a3 a3 \n", "c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 \n", "i2 i3 i5 \n", "x2 x3 x5 121 121 121 121 131 141 121 131 141 121 121 121 112 \n", "y2 x3 x5 122 122 122 122 132 142 122 132 142 132 132 132 113 \n", "z2 x3 x5 123 123 123 123 133 143 123 133 143 143 143 143 114 \n", "\n", "combine_idx 5 \n", "c3 a3 \n", "c5 b5 c5 a5 b5 c5 \n", "i2 i3 i5 \n", "x2 x3 x5 112 112 112 113 114 \n", "y2 x3 x5 113 113 113 114 115 \n", "z2 x3 x5 114 114 114 115 116 \n", " a b c d e \\\n", "c3 a3 a3 a3 a3 a3 \n", "c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 b5 c5 a5 \n", "i2 i3 i5 \n", "x2 x3 x5 121 121 121 121 131 141 121 131 141 121 121 121 112 \n", "y2 x3 x5 122 122 122 122 132 142 122 132 142 132 132 132 113 \n", "z2 x3 x5 123 123 123 123 133 143 123 133 143 143 143 143 114 \n", "\n", " f \n", "c3 a3 \n", "c5 b5 c5 a5 b5 c5 \n", "i2 i3 i5 \n", "x2 x3 x5 112 112 112 113 114 \n", "y2 x3 x5 113 113 113 114 115 \n", "z2 x3 x5 114 114 114 115 116 \n" ] } ], "source": [ "# Test concat=True\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], pd.Series([10, 20, 30])],\n", " 10, b=100,\n", " combine_func=lambda x, y, a, b=1: x + y + a + b, \n", " concat=True,\n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10, b=100,\n", " combine_func=lambda x, y, a, b=1: x + y + a + b, \n", " concat=True,\n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10,\n", " combine_func=njit(lambda x, y, a, b=1: x + y + a + 100),\n", " concat=True,\n", " broadcast_kwargs=dict(index_from='stack')))\n", "print(sr2.vbt.combine(\n", " [10, [10, 20, 30], [[10, 20, 30]], pd.Series([10, 20, 30]), df1, df3],\n", " 10,\n", " combine_func=njit(lambda x, y, a, b=1: x + y + a + 100),\n", " concat=True,\n", " keys=['a', 'b', 'c', 'd', 'e', 'f'],\n", " broadcast_kwargs=dict(index_from='stack')))" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 2\n", "1 4\n", "2 6\n", "dtype: int64\n", "c5 a5 b5 c5\n", "c6 a6 b6 c6\n", "i5 i6 \n", "x5 x6 2 4 6\n", " y6 5 7 9\n", " z6 8 10 12\n" ] } ], "source": [ "# Use magic methods with .vbt to do operations with custom broadcasting\n", "print(pd.Series([1, 2, 3]).vbt + [1, 2, 3])\n", "print(df3.vbt + df4.vbt) # regular df3 + df4 will return nans" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }