신호생성 repo (24. 1. 5 ~).
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

1296 lines
94 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 158,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1. 데이터를 로드합니다.\n",
"1-1. 네트워크가 로드되었습니다.\n",
"1-2. 테이블들이 로드되었습니다.\n",
"1-3. 네트워크의 모든 clean state requirement들을 체크했습니다.\n",
"1-4. 테이블들의 무결성 검사를 완료했습니다.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"이동류정보 불러오는 중: 100%|██████████| 17280/17280 [00:13<00:00, 1269.15it/s]\n"
]
}
],
"source": [
"import sys\n",
"from datetime import datetime\n",
"sys.path.append('../../Scripts')\n",
"from preprocess_daily import DailyPreprocessor\n",
"self = DailyPreprocessor()\n",
"\n",
"# 1. 데이터 준비\n",
"self.load_data()\n",
"\n",
"self.make_match1()\n",
"self.make_match2()\n",
"self.make_match3()\n",
"self.make_match4()\n",
"self.make_match5()"
]
},
{
"cell_type": "code",
"execution_count": 159,
"metadata": {},
"outputs": [],
"source": [
"# self.node2inter = dict(zip(self.inter_node['node_id'], self.inter_node['inter_no']))\n",
"\n",
"# child_ids = self.inter_node[self.inter_node.inter_type=='child'].node_id.unique()\n",
"# ch2pa = {} # child to parent\n",
"# for child_id in child_ids:\n",
"# parent_no = self.inter_node[self.inter_node.node_id==child_id].inter_no.iloc[0]\n",
"# sub_inter_node = self.inter_node[self.inter_node.inter_no==parent_no]\n",
"# ch2pa[child_id] = sub_inter_node[sub_inter_node.inter_type=='parent'].iloc[0].node_id\n",
"# directions = ['북', '북동', '동', '남동', '남', '남서', '서', '북서'] # 정북기준 시계방향으로 8방향\n",
"\n",
"# # 각 uturn node에 대하여 (inc_edge_id, out_edge_id) 부여\n",
"# cmatches = []\n",
"# for _, row in self.uturn.iterrows():\n",
"# child_id = row.child_id\n",
"# parent_id = row.parent_id\n",
"# direction = row.direction\n",
"# condition = row.condition\n",
"# inc_edge_id = row.inc_edge\n",
"# out_edge_id = row.out_edge\n",
"# # match5에서 parent_id에 해당하는 행들을 가져옴\n",
"# cmatch = self.match5.copy()[self.match5.node_id==parent_id] # match dataframe for a child node\n",
"# cmatch = cmatch.sort_values(by=['phase_no', 'ring_type']).reset_index(drop=True)\n",
"# cmatch['node_id'] = child_id\n",
"# cmatch[['inc_edge', 'out_edge']] = np.nan\n",
"\n",
"# # condition 별로 inc_dire, out_dire_A, out_dire_B를 정함\n",
"# ind = directions.index(direction)\n",
"# if condition == \"좌회전시\":\n",
"# inc_dire = direction\n",
"# out_dire_A = out_dire_B = directions[(ind + 2) % len(directions)]\n",
"# elif condition == \"보행신호시\":\n",
"# inc_dire = directions[(ind + 2) % len(directions)]\n",
"# out_dire_A = directions[(ind - 2) % len(directions)]\n",
"# out_dire_B = directions[(ind - 2) % len(directions)]\n",
"# print(child_id, ((cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A)).any())\n",
"# # (inc_dire, out_dire_A, out_dire_B) 별로 inc_edge_id, out_edge_id를 정함\n",
"# cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]\n",
"# cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]\n",
"# if condition == '보행신호시':\n",
"# # 이동류번호가 17(보행신호)이면서 유턴노드방향으로 가는 신호가 없으면 (inc_edge_id, out_edge_id)를 부여한다.\n",
"# cmatch.loc[(cmatch.move_no==17) & (cmatch.out_dir!=direction), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]\n",
"# # 유턴신호의 이동류번호를 19로 부여한다.\n",
"# cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), 'move_no'] = 19\n",
"# cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), 'move_no'] = 19\n",
"# cmatches.append(cmatch)\n",
"\n",
"# # 각 coordination node에 대하여 (inc_edge_id, out_edge_id) 부여\n",
"# self.coord['inter_no'] = self.coord['parent_id'].map(self.node2inter)\n",
"# self.coord = self.coord.rename(columns={'child_id':'node_id'})\n",
"# self.coord[['inc_dir', 'out_dir', 'inc_angle','out_angle']] = np.nan\n",
"# self.coord['move_no'] = 20\n",
"# self.coord = self.coord[['inter_no', 'phase_no', 'ring_type', 'move_no', 'inc_dir', 'out_dir', 'inc_angle','out_angle', 'inc_edge', 'out_edge', 'node_id']]\n",
"\n",
"# # display(coord)\n",
"# cmatches = pd.concat(cmatches)\n",
"# self.match6 = pd.concat([self.match5, cmatches, self.coord]).drop_duplicates().sort_values(by=['inter_no', 'node_id', 'phase_no', 'ring_type'])\n",
"# # self.match6.to_csv(os.path.join(self.path_intermediates, 'match6.csv'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"splits 딕셔너리 다시 만들기"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[37 39 55 29 0 0 0 0]\n",
"[37 39 25 59 0 0 0 0]\n",
"[37 39 25 30 29]\n",
"{(1, 1): 1, (2, 2): 2, (3, 3): 3, (3, 4): 4, (4, 4): 5}\n",
"{(1, 1): 1, (2, 2): 2, (3, 3): 3, (3, 4): 4, (4, 4): 5}\n"
]
}
],
"source": [
"import numpy as np\n",
"row = self.plan.iloc[0]\n",
"# print(row)\n",
"inter_no = row.inter_no\n",
"start_hour = row.start_hour\n",
"start_minute = row.start_minute\n",
"cycle = row.cycle\n",
"\n",
"dura_A = np.array(row[[f'dura_A{j}' for j in range(1, 9)]])\n",
"dura_B = np.array(row[[f'dura_B{j}' for j in range(1, 9)]])\n",
"\n",
"print(dura_A)\n",
"print(dura_B)\n",
"\n",
"cums_A = dura_A.cumsum()\n",
"cums_B = dura_B.cumsum()\n",
"\n",
"combined_row = np.unique(np.concatenate((cums_A,cums_B)))\n",
"detailed_durations = np.concatenate(([combined_row[0]], np.diff(combined_row)))\n",
"\n",
"print(detailed_durations)\n",
"\n",
"split = {}\n",
"ja = 0\n",
"jb = 0\n",
"for k in range(len(detailed_durations)):\n",
" dura_A[ja] -= detailed_durations[k]\n",
" dura_B[jb] -= detailed_durations[k]\n",
" split[(ja+1, jb+1)] = k+1\n",
" if dura_A[ja] == 0:\n",
" ja += 1\n",
" if dura_B[jb] == 0:\n",
" jb += 1\n",
"print(split)\n",
"print({(1, 1): 1, (2, 2): 2, (3, 3): 3, (3, 4): 4, (4, 4): 5})"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[37 39 55 29 0 0 0 0]\n",
"[37 39 25 59 0 0 0 0]\n",
"[ 37 76 131 160 160 160 160 160]\n",
"[ 37 76 101 160 160 160 160 160]\n",
"[ 37 76 101 131 160]\n",
"[37 39 25 30 29]\n",
"{(1, 1): 37, (2, 2): 39, (3, 3): 25, (3, 4): 55, (4, 4): 29, (4, 5): 59, (5, 5): 0, (6, 6): 0, (7, 7): 0, (8, 8): 0}\n"
]
}
],
"source": [
"import numpy as np\n",
"row = self.plan.iloc[0]\n",
"inter_no = row.inter_no\n",
"start_hour = row.start_hour\n",
"start_minute = row.start_minute\n",
"cycle = row.cycle\n",
"\n",
"dura_A = row[[f'dura_A{j}' for j in range(1, 9)]]\n",
"dura_B = row[[f'dura_B{j}' for j in range(1, 9)]]\n",
"\n",
"print(np.array(dura_A))\n",
"print(np.array(dura_B))\n",
"\n",
"cums_A = row[[f'dura_A{j}' for j in range(1,9)]].cumsum()\n",
"cums_B = row[[f'dura_B{j}' for j in range(1,9)]].cumsum()\n",
"\n",
"print(np.array(cums_A))\n",
"print(np.array(cums_B))\n",
"\n",
"detailed_cums = []\n",
"combined_row = np.unique(np.concatenate((cums_A,cums_B)))\n",
"print(combined_row)\n",
"detailed_durations = np.concatenate(([combined_row[0]], np.diff(combined_row)))\n",
"\n",
"print(detailed_durations)\n",
"\n",
"duration_dict = {}\n",
"# 두 시리즈의 길이가 같다고 가정합니다.\n",
"for i in range(len(dura_A)):\n",
" # A와 B의 현시시간이 같은 경우\n",
" if dura_A[i] == dura_B[i]:\n",
" duration_dict[(i+1, i+1)] = dura_A[i]\n",
" # A와 B의 현시시간이 다른 경우\n",
" else:\n",
" duration_dict[(i+1, i+1)] = min(dura_A[i], dura_B[i])\n",
" duration_dict[(i+1, i+2)] = max(dura_A[i], dura_B[i])\n",
"\n",
"print(duration_dict)\n",
"# cums_A = row[[f'dura_A{j}' for j in range(1,9)]].cumsum()\n",
"# cums_B = row[[f'dura_B{j}' for j in range(1,9)]].cumsum()\n",
"# print(cums_A)\n",
"# print(cums_B)\n",
"# detailed_cums = []\n",
"# combined_row = np.unique(np.concatenate((cums_A,cums_B)))\n",
"# print(combined_row)\n",
"# detailed_durations = np.concatenate(([combined_row[0]], np.diff(combined_row)))\n",
"# print(detailed_durations)"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"start_time = time.time()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGfCAYAAAD/BbCUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbFUlEQVR4nO3de5DVdf348dcK7gGRXQJll81F8UrjBRtKXC3T3BHI0TQnRypv43gptDEqhUZFqxk0mySTdGpUcsooZ7xMYjYKiamAQRCRyaADCemuE87uAl9dlH3//ijPr5WLLp7z3j3weMx8Rs/nvM857897dnaf8zmfw6lKKaUAAMhkr96eAACwZxEfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVv17MnjGjBnx4IMPxosvvhgDBw6ME044IW655ZY44ogjimNOPvnkWLBgQbfHXX755XHXXXd9oNfo6uqKV199NQYPHhxVVVU9mR4A0EtSSrFx48ZoaGiIvfba+bmNqp58t8uECRPivPPOi09+8pPxzjvvxHe+851YuXJlvPDCCzFo0KCI+E98HH744fHd7363+Lh99tknampqPtBrrF+/PhobGz/olACAPmTdunVxwAEH7HRMj858PP74491uz549O4YPHx5Lly6Nk046qbh/n332ifr6+p48ddHgwYMj4j+T/6DBAgD0ro6OjmhsbCz+Hd+ZHsXHe7W3t0dExNChQ7vt/9WvfhW//OUvo76+Ps4444y4/vrrY5999tnuc3R2dkZnZ2fx9saNGyMioqamRnwAQIX5IJdM7HJ8dHV1xdVXXx0nnnhiHHXUUcX9X/rSl+LAAw+MhoaGWLFiRVx77bWxatWqePDBB7f7PDNmzIibbrppV6cBAFSYHl3z8b+++tWvxu9///t45plndvrezvz58+PUU0+Nl156KQ455JBt7n/vmY93T9u0t7c78wEAFaKjoyNqa2s/0N/vXTrzceWVV8ajjz4aTz/99PteVDJu3LiIiB3GR6FQiEKhsCvTAAAqUI/iI6UUV111VTz00EPx1FNPxahRo973McuXL4+IiBEjRuzSBAGA3UuP4mPy5Mlx//33xyOPPBKDBw+OlpaWiIiora2NgQMHxssvvxz3339/fO5zn4thw4bFihUr4hvf+EacdNJJccwxx5TlAACAytKjaz52dAXrvffeGxdddFGsW7cuvvKVr8TKlStj8+bN0djYGGeffXZcd911H/j6jZ68ZwQA9A1lu+bj/TqlsbFxm3/dFADgf/luFwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyGqXv9WWfA6aOre3p9Bja28+vbenAEAf5cwHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AIKsexceMGTPik5/8ZAwePDiGDx8eZ511VqxatarbmLfeeismT54cw4YNi3333TfOOeecaG1tLemkAYDK1aP4WLBgQUyePDkWLVoUTzzxRLz99ttx2mmnxebNm4tjvvGNb8Tvfve7eOCBB2LBggXx6quvxhe+8IWSTxwAqEz9ezL48ccf73Z79uzZMXz48Fi6dGmcdNJJ0d7eHnfffXfcf//98dnPfjYiIu6999742Mc+FosWLYrjjz++dDMHACrSh7rmo729PSIihg4dGhERS5cujbfffjuam5uLY0aPHh0jR46MhQsXbvc5Ojs7o6Ojo9sGAOy+djk+urq64uqrr44TTzwxjjrqqIiIaGlpierq6hgyZEi3sXV1ddHS0rLd55kxY0bU1tYWt8bGxl2dEgBQAXY5PiZPnhwrV66MOXPmfKgJTJs2Ldrb24vbunXrPtTzAQB9W4+u+XjXlVdeGY8++mg8/fTTccABBxT319fXx5YtW6Ktra3b2Y/W1taor6/f7nMVCoUoFAq7Mg0AoAL16MxHSimuvPLKeOihh2L+/PkxatSobvePHTs29t5775g3b15x36pVq+KVV16Jpqam0swYAKhoPTrzMXny5Lj//vvjkUceicGDBxev46itrY2BAwdGbW1tXHLJJTFlypQYOnRo1NTUxFVXXRVNTU0+6QIAREQP4+POO++MiIiTTz652/577703LrroooiIuO2222KvvfaKc845Jzo7O2P8+PHx05/+tCSTBQAqX4/iI6X0vmMGDBgQs2bNilmzZu3ypACA3ZfvdgEAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsehwfTz/9dJxxxhnR0NAQVVVV8fDDD3e7/6KLLoqqqqpu24QJE0o1XwCgwvU4PjZv3hxjxoyJWbNm7XDMhAkT4rXXXituv/71rz/UJAGA3Uf/nj5g4sSJMXHixJ2OKRQKUV9f/4Ger7OzMzo7O4u3Ozo6ejolAKCClOWaj6eeeiqGDx8eRxxxRHz1q1+NDRs27HDsjBkzora2trg1NjaWY0oAQB9R8viYMGFC3HfffTFv3ry45ZZbYsGCBTFx4sTYunXrdsdPmzYt2tvbi9u6detKPSUAoA/p8dsu7+e8884r/v/RRx8dxxxzTBxyyCHx1FNPxamnnrrN+EKhEIVCodTTAAD6qLJ/1Pbggw+O/fbbL1566aVyvxQAUAHKHh/r16+PDRs2xIgRI8r9UgBABejx2y6bNm3qdhZjzZo1sXz58hg6dGgMHTo0brrppjjnnHOivr4+Xn755bjmmmvi0EMPjfHjx5d04gBAZepxfCxZsiROOeWU4u0pU6ZERMSFF14Yd955Z6xYsSJ+8YtfRFtbWzQ0NMRpp50W3/ve91zXAQBExC7Ex8knnxwppR3e/4c//OFDTQgA2L35bhcAICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKz69/YE2D0dNHVub0+hx9befHpvTwFgj+DMBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACy6nF8PP3003HGGWdEQ0NDVFVVxcMPP9zt/pRS3HDDDTFixIgYOHBgNDc3x+rVq0s1XwCgwvU4PjZv3hxjxoyJWbNmbff+H/zgB3H77bfHXXfdFYsXL45BgwbF+PHj46233vrQkwUAKl+Pv1hu4sSJMXHixO3el1KKmTNnxnXXXRef//znIyLivvvui7q6unj44YfjvPPO+3CzBQAqXkmv+VizZk20tLREc3NzcV9tbW2MGzcuFi5cuN3HdHZ2RkdHR7cNANh9lTQ+WlpaIiKirq6u2/66urrife81Y8aMqK2tLW6NjY2lnBIA0Mf0+qddpk2bFu3t7cVt3bp1vT0lAKCMShof9fX1ERHR2trabX9ra2vxvvcqFApRU1PTbQMAdl8ljY9Ro0ZFfX19zJs3r7ivo6MjFi9eHE1NTaV8KQCgQvX40y6bNm2Kl156qXh7zZo1sXz58hg6dGiMHDkyrr766vj+978fhx12WIwaNSquv/76aGhoiLPOOquU8wYAKlSP42PJkiVxyimnFG9PmTIlIiIuvPDCmD17dlxzzTWxefPmuOyyy6KtrS0+9alPxeOPPx4DBgwo3awBgIpVlVJKvT2J/9XR0RG1tbXR3t7u+o//Omjq3N6ewh5h7c2n9/YUACpWT/5+9/qnXQCAPYv4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACCr/r09AWDPctDUub09hR5be/PpvT0F2K048wEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZlTw+brzxxqiqquq2jR49utQvAwBUqP7leNIjjzwynnzyyf//Iv3L8jIAQAUqSxX0798/6uvry/HUAECFK8s1H6tXr46GhoY4+OCD48tf/nK88sorOxzb2dkZHR0d3TYAYPdV8vgYN25czJ49Ox5//PG48847Y82aNfHpT386Nm7cuN3xM2bMiNra2uLW2NhY6ikBAH1IyeNj4sSJ8cUvfjGOOeaYGD9+fDz22GPR1tYWv/3tb7c7ftq0adHe3l7c1q1bV+opAQB9SNmvBB0yZEgcfvjh8dJLL233/kKhEIVCodzTAAD6iLL/Ox+bNm2Kl19+OUaMGFHulwIAKkDJ4+Nb3/pWLFiwINauXRvPPfdcnH322dGvX7+YNGlSqV8KAKhAJX/bZf369TFp0qTYsGFD7L///vGpT30qFi1aFPvvv3+pXwoAqEAlj485c+aU+ikBgN2I73YBALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZiQ8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZCU+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgq/69PYHcDpo6t7enQB/lZwMgD2c+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJCV+AAAshIfAEBW4gMAyEp8AABZ9e/tCQBAJTto6tzenkKPrb359F59fWc+AICsxAcAkJX4AACyEh8AQFbiAwDISnwAAFmJDwAgK/EBAGQlPgCArMQHAJBV2eJj1qxZcdBBB8WAAQNi3Lhx8fzzz5frpQCAClKW+PjNb34TU6ZMienTp8df/vKXGDNmTIwfPz5ef/31crwcAFBByvLFcj/60Y/i0ksvjYsvvjgiIu66666YO3du3HPPPTF16tRuYzs7O6Ozs7N4u729PSIiOjo6yjG16Or8v7I8L7D7KtfvI3YPlfh3pRw/0+8+Z0rp/QenEuvs7Ez9+vVLDz30ULf9F1xwQTrzzDO3GT99+vQUETabzWaz2XaDbd26de/bCiU/8/Hvf/87tm7dGnV1dd3219XVxYsvvrjN+GnTpsWUKVOKt7u6uuKNN96IYcOGRVVV1S7NoaOjIxobG2PdunVRU1OzS8/BB2Ot87DO+VjrPKxzPrnWOqUUGzdujIaGhvcdW5a3XXqiUChEoVDotm/IkCElee6amho/1JlY6zyscz7WOg/rnE+Ota6trf1A40p+wel+++0X/fr1i9bW1m77W1tbo76+vtQvBwBUmJLHR3V1dYwdOzbmzZtX3NfV1RXz5s2LpqamUr8cAFBhyvK2y5QpU+LCCy+MT3ziE3HcccfFzJkzY/PmzcVPv5RboVCI6dOnb/N2DqVnrfOwzvlY6zyscz59ca2rUvogn4npuTvuuCNuvfXWaGlpiWOPPTZuv/32GDduXDleCgCoIGWLDwCA7fHdLgBAVuIDAMhKfAAAWYkPACCrioyPN954I7785S9HTU1NDBkyJC655JLYtGnTTh/z1ltvxeTJk2PYsGGx7777xjnnnLPNP4T29a9/PcaOHRuFQiGOPfbYMh5B3zVr1qw46KCDYsCAATFu3Lh4/vnndzr+gQceiNGjR8eAAQPi6KOPjscee6zb/SmluOGGG2LEiBExcODAaG5ujtWrV5fzECpGqdf6wQcfjNNOO6341QTLly8v4+wrRynX+e23345rr702jj766Bg0aFA0NDTEBRdcEK+++mq5D6MilPpn+sYbb4zRo0fHoEGD4iMf+Ug0NzfH4sWLy3kIFaHU6/y/rrjiiqiqqoqZM2eWeNbv8eG/Si6/CRMmpDFjxqRFixalP/3pT+nQQw9NkyZN2uljrrjiitTY2JjmzZuXlixZko4//vh0wgkndBtz1VVXpTvuuCOdf/75acyYMWU8gr5pzpw5qbq6Ot1zzz3p73//e7r00kvTkCFDUmtr63bHP/vss6lfv37pBz/4QXrhhRfSddddl/bee+/0t7/9rTjm5ptvTrW1tenhhx9Of/3rX9OZZ56ZRo0ald58881ch9UnlWOt77vvvnTTTTeln//85yki0rJlyzIdTd9V6nVua2tLzc3N6Te/+U168cUX08KFC9Nxxx2Xxo4dm/Ow+qRy/Ez/6le/Sk888UR6+eWX08qVK9Mll1ySampq0uuvv57rsPqccqzzux588ME0ZsyY1NDQkG677bayHkfFxccLL7yQIiL9+c9/Lu77/e9/n6qqqtK//vWv7T6mra0t7b333umBBx4o7vvHP/6RIiItXLhwm/HTp0/fI+PjuOOOS5MnTy7e3rp1a2poaEgzZszY7vhzzz03nX766d32jRs3Ll1++eUppZS6urpSfX19uvXWW4v3t7W1pUKhkH7961+X4QgqR6nX+n+tWbNGfPxXOdf5Xc8//3yKiPTPf/6zNJOuUDnWur29PUVEevLJJ0sz6QpUrnVev359+uhHP5pWrlyZDjzwwLLHR8W97bJw4cIYMmRIfOITnyjua25ujr322muHp+OWLl0ab7/9djQ3Nxf3jR49OkaOHBkLFy4s+5wrwZYtW2Lp0qXd1mivvfaK5ubmHa7RwoULu42PiBg/fnxx/Jo1a6KlpaXbmNra2hg3btweve7lWGu2lWud29vbo6qqqmRfiFmJcqz1li1b4mc/+1nU1tbGmDFjSjf5ClKude7q6orzzz8/vv3tb8eRRx5Znsm/R8XFR0tLSwwfPrzbvv79+8fQoUOjpaVlh4+prq7e5pdDXV3dDh+zp/n3v/8dW7dujbq6um77d7ZGLS0tOx3/7n978px7gnKsNdvKsc5vvfVWXHvttTFp0qQ9+ptZy7nWjz76aOy7774xYMCAuO222+KJJ56I/fbbr7QHUCHKtc633HJL9O/fP77+9a+XftI70GfiY+rUqVFVVbXT7cUXX+ztaQJExH8uPj333HMjpRR33nlnb09nt3XKKafE8uXL47nnnosJEybEueeeG6+//npvT2u3sXTp0vjxj38cs2fPjqqqqmyvW5YvltsV3/zmN+Oiiy7a6ZiDDz446uvrt/nBe+edd+KNN96I+vr67T6uvr4+tmzZEm1tbd3OfrS2tu7wMXua/fbbL/r167fNJ4B2tkb19fU7Hf/uf1tbW2PEiBHdxuypnyaKKM9as61yrvO74fHPf/4z5s+fv0ef9Ygo71oPGjQoDj300Dj00EPj+OOPj8MOOyzuvvvumDZtWmkPogKUY53/9Kc/xeuvvx4jR44s3r9169b45je/GTNnzoy1a9eW9iD+q8+c+dh///1j9OjRO92qq6ujqakp2traYunSpcXHzp8/P7q6unb4xXVjx46NvffeO+bNm1fct2rVqnjllVeiqamp7MdWCaqrq2Ps2LHd1qirqyvmzZu3wzVqamrqNj4i4oknniiOHzVqVNTX13cb09HREYsXL96j170ca822yrXO74bH6tWr48knn4xhw4aV5wAqSM6f6a6urujs7Pzwk65A5Vjn888/P1asWBHLly8vbg0NDfHtb387/vCHP5TvYMp6OWuZTJgwIX384x9PixcvTs8880w67LDDun3Udv369emII45IixcvLu674oor0siRI9P8+fPTkiVLUlNTU2pqaur2vKtXr07Lli1Ll19+eTr88MPTsmXL0rJly1JnZ2e2Y+tNc+bMSYVCIc2ePTu98MIL6bLLLktDhgxJLS0tKaWUzj///DR16tTi+GeffTb1798//fCHP0z/+Mc/0vTp07f7UdshQ4akRx55JK1YsSJ9/vOf91HbVJ613rBhQ1q2bFmaO3duiog0Z86ctGzZsvTaa69lP76+otTrvGXLlnTmmWemAw44IC1fvjy99tprxW1P+T2xI6Ve602bNqVp06alhQsXprVr16YlS5akiy++OBUKhbRy5cpeOca+oBy/O94rx6ddKjI+NmzYkCZNmpT23XffVFNTky6++OK0cePG4v3vftTwj3/8Y3Hfm2++mb72ta+lj3zkI2mfffZJZ5999ja/lD/zmc+kiNhmW7NmTaYj630/+clP0siRI1N1dXU67rjj0qJFi4r3feYzn0kXXnhht/G//e1v0+GHH56qq6vTkUcemebOndvt/q6urnT99denurq6VCgU0qmnnppWrVqV41D6vFKv9b333rvdn9/p06dnOJq+q5Tr/O7vlu1t//v7Zk9VyrV+880309lnn50aGhpSdXV1GjFiRDrzzDPT888/n+tw+qxS/+54rxzxUZVSSuU7rwIA0F2fueYDANgziA8AICvxAQBkJT4AgKzEBwCQlfgAALISHwBAVuIDAMhKfAAAWYkPACAr8QEAZPX/AEcZxtyoFDu3AAAAAElFTkSuQmCC",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"\n",
"n_nums = 1000\n",
"n_rows = 100000\n",
"# Creating a large DataFrame with thousands of rows\n",
"np.random.seed(0) # Seed for reproducibility\n",
"df = pd.DataFrame({\n",
" 'foo': np.random.choice(n_nums, n_rows), # 1000 unique values repeated over 10000 rows\n",
" 'bar': np.random.rand(n_rows),\n",
" 'baz': np.random.choice(['bonjour','merci','bon nuit', 'enchante'], n_rows),\n",
" 'fruit': np.random.choice(['fraise','banana','orange', 'raisin'], n_rows)\n",
"})\n",
"# display(df)\n",
"\n",
"n_iterations = 50\n",
"improved_times = []\n",
"for i in range(n_iterations):\n",
" # print(i)\n",
" selected_number = np.random.choice(n_nums)\n",
" # Original method: filtering without setting an index\n",
" time0 = time.time()\n",
" df1 = df[df.foo == selected_number]\n",
" # display(df1)\n",
" time1 = time.time()\n",
"\n",
" # Improved method: setting 'foo' as index and then using .loc[]\n",
" df2 = df.set_index('foo').loc[selected_number]\n",
" # display(df2.reset_index(drop=True))\n",
" time2 = time.time()\n",
"\n",
" # Times\n",
" elapsed_time_1 = time1 - time0 # 기존 방법의 소요시간\n",
" elapsed_time_2 = time2 - time1 # 새 방법의 소요시간\n",
" improved_time = elapsed_time_2 - elapsed_time_1 # 이 값이 작을수록 개선된다.\n",
" \n",
" improved_times.append(improved_time)\n",
"# print(improved_times)\n",
"plt.hist(improved_times)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHHCAYAAABZbpmkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/OQEPoAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOSUlEQVR4nO3dd3yNZ/8H8M/JONlblmw7MZsYjVFUKlZLUaNGqMcoqdVS+hhF1SatFV1Baa1arS1ip0aUFhFqRciwkggyz/X7wy/340hCHGfEnc/79cqrPdd93df5Xvc5cj6511EIIQSIiIiIZMrI0AUQERER6RLDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOlSlz5sxBpUqVYGxsjHr16hm6HCLZ2r9/PxQKBfbv32/oUgzmyy+/hEKhKPNj0qtj2KHnWr58ORQKhfRjbm6OatWqITw8HKmpqVp9rt27d2Ps2LFo0qQJoqKi8PXXX2t1/PKmX79+sLa2NnQZ9Bznz5/Hl19+iWvXrhm6lNfW0aNH8eWXXyI9Pd3QpVAZZmLoAuj1MHXqVPj5+SE7OxuHDx/G0qVLsX37dpw9exaWlpZaeY59+/bByMgIP/74I5RKpVbGJCrLzp8/jylTpqBFixbw9fXV63O/9dZbePz48Wv/b+3o0aOYMmUK+vXrB3t7+5dad8KECRg3bpxuCqMyhWGHSqVt27aoX78+AOA///kPnJycMH/+fGzZsgU9e/Z8pbEfPXoES0tLpKWlwcLCQmu/fIUQyM7OhoWFhVbGo1eTn58PlUr12n+4yoWRkRHMzc0NXYZBmZiYwMSEH4PlAQ9jkUbefvttAMDVq1eltlWrViEoKAgWFhZwdHREjx49cOPGDbX1WrRogVq1aiEuLg5vvfUWLC0t8cUXX0ChUCAqKgoPHz6UDpktX74cwJMPyWnTpqFy5cowMzODr68vvvjiC+Tk5KiN7evriw4dOmDXrl2oX78+LCwssGzZMunchHXr1mHKlCnw8PCAjY0NunbtioyMDOTk5GDkyJFwcXGBtbU1+vfvX2TsqKgovP3223BxcYGZmRkCAgKwdOnSItulsIbDhw+jYcOGMDc3R6VKlbBy5coifdPT0zFq1Cj4+vrCzMwMnp6e6Nu3L+7cuSP1ycnJweTJk1GlShWYmZnBy8sLY8eOLVJfaRXWt3//fmkb1a5dWzpvY+PGjahduzbMzc0RFBSEv/76S239wkNjV65cQWhoKKysrFCxYkVMnToVQgip37Vr16BQKDB37lxERERIr9358+cBPNmL16xZM1hZWcHe3h4dO3ZEfHy8tP6GDRugUChw4MCBInNYtmwZFAoFzp49K7VduHABXbt2haOjI8zNzVG/fn1s3bpVbb3CQ7KHDx/G8OHD4ezsDHt7ewwePBi5ublIT09H37594eDgAAcHB4wdO1ZtTgCgUqkQERGBmjVrwtzcHK6urhg8eDDu379f7HZ+3vtg+fLl+OCDDwAALVu2lN73zzuHpkWLFmjRokWR9n79+hXZM7RmzRoEBQXBxsYGtra2qF27Nr755htpeXHn7BT++zx//jxatmwJS0tLeHh4YPbs2UWe8/r163jvvfdgZWUFFxcXjBo1Crt27SrVeUAPHjzAyJEjpfe+i4sL3nnnHZw6dUqt37Fjx9CmTRvY2dnB0tISzZs3x5EjR6TlX375JcaMGQMA8PPzk7ZhaQ8LFnd+jUKhQHh4ODZv3oxatWrBzMwMNWvWxM6dO4usf/jwYTRo0ADm5uaoXLkyli1bVuJzvej3Y1RUFBQKBX766Se19b7++msoFAps3769VHOiEgii54iKihIAxIkTJ9Tav/nmGwFAREZGCiGE+Oqrr4RCoRDdu3cXS5YsEVOmTBEVKlQQvr6+4v79+9J6zZs3F25ubsLZ2Vl88sknYtmyZWLz5s3i559/Fs2aNRNmZmbi559/Fj///LO4fPmyEEKIsLAwAUB07dpVLF68WPTt21cAEJ06dVKrycfHR1SpUkU4ODiIcePGicjISBETEyNiYmIEAFGvXj0RHBwsvv32WzF8+HChUChEjx49xIcffijatm0rFi9eLPr06SMAiClTpqiN3aBBA9GvXz+xYMECsXDhQtG6dWsBQCxatKhIDdWrVxeurq7iiy++EIsWLRKBgYFCoVCIs2fPSv0ePHggatWqJYyNjcXAgQPF0qVLxbRp00SDBg3EX3/9JYQQoqCgQLRu3VpYWlqKkSNHimXLlonw8HBhYmIiOnbs+MLXLiwsTFhZWRVbn7u7u/jyyy/FggULhIeHh7C2tharVq0S3t7eYubMmWLmzJnCzs5OVKlSRRQUFKiNaW5uLqpWrSr69OkjFi1aJDp06CAAiIkTJ0r9rl69KgCIgIAAUalSJTFz5kyxYMECcf36dbFnzx5hYmIiqlWrJmbPni29VxwcHMTVq1eFEEI8evRIWFtbi6FDhxaZV8uWLUXNmjWlx2fPnhV2dnYiICBAzJo1SyxatEi89dZbQqFQiI0bN0r9Ct/L9erVE23atFF7vceOHSuaNm0qPvzwQ7FkyRJpTitWrFB77v/85z/CxMREDBw4UERGRorPP/9cWFlZiQYNGojc3NyXeh9cvnxZDB8+XAAQX3zxhfS+T0lJKfE1bd68uWjevHmxr7WPj4/0ePfu3QKAaNWqlVi8eLFYvHixCA8PFx988IHUp/DfRUxMjNr4FStWFF5eXmLEiBFiyZIl4u233xYAxPbt26V+WVlZolKlSsLCwkKMGzdOREREiIYNG4q6desWGbM4H374oVAqlWL06NHihx9+ELNmzRLvvvuuWLVqldQnOjpaKJVKERwcLObNmycWLFgg6tSpI5RKpTh27JgQQogzZ86Inj17CgBiwYIF0jbMysp67vMXmjx5snj2YxCAqFu3rnB3dxfTpk0TERERolKlSsLS0lLcuXNH6vf3338LCwsL4e3tLWbMmCGmTZsmXF1dRZ06dYqMWdrfjx06dBB2dnYiMTFReg6lUikGDBhQqvlQyRh26LkKPyD27t0rbt++LW7cuCHWrFkjnJychIWFhUhKShLXrl0TxsbGYvr06Wrr/vPPP8LExEStvXnz5moh6WnFfTifPn1aABD/+c9/1No/++wzAUDs27dPavPx8REAxM6dO9X6Fv5Sr1WrltoHUs+ePYVCoRBt27ZV6x8cHKz2wSHEkw/fZ4WGhopKlSqptRXWcPDgQaktLS1NmJmZiU8//VRqmzRpkgCg9mFcSKVSCSGE+Pnnn4WRkZE4dOiQ2vLIyEgBQBw5cqTIuk8rKewAEEePHpXadu3aJQAICwsLcf36dal92bJlRT64CoPnJ598olZv+/bthVKpFLdv3xZC/C/s2NrairS0NLUa6tWrJ1xcXMTdu3eltjNnzggjIyPRt29fqa1nz57CxcVF5OfnS23JycnCyMhITJ06VWpr1aqVqF27tsjOzlarqXHjxqJq1apSW+F7OTQ0VNrGQjx5vRUKhRgyZIjUlp+fLzw9PdWCxaFDhwQAsXr1arX57Ny5s0h7ad8H69evL1U4KFTasDNixAhha2urtu2eVVLYASBWrlwpteXk5Ag3NzfRpUsXqW3evHkCgNi8ebPU9vjxY1GjRo1SzcfOzk4MGzasxOUqlUpUrVq1yGv16NEj4efnJ9555x2pbc6cOQKAFJRfRklhR6lUin///VdqO3PmjAAgFi5cKLV16tRJmJubq/2bOX/+vDA2NlYb82V+PyYnJwtHR0fxzjvviJycHPHGG28Ib29vkZGR8dJzI3U8jEWlEhISAmdnZ3h5eaFHjx6wtrbGpk2b4OHhgY0bN0KlUqFbt264c+eO9OPm5oaqVasiJiZGbSwzMzP079+/VM9buOt29OjRau2ffvopAGDbtm1q7X5+fggNDS12rL59+8LU1FR63KhRIwgh8NFHH6n1a9SoEW7cuIH8/Hyp7enzfjIyMnDnzh00b94cV65cQUZGhtr6AQEBaNasmfTY2dkZ1atXx5UrV6S23377DXXr1sX7779fpM7C3err16+Hv78/atSoobZdCw8hPrtdSysgIADBwcFq8wWeHJr09vYu0v503YXCw8PV6g0PD0dubi727t2r1q9Lly5wdnaWHicnJ+P06dPo168fHB0dpfY6dergnXfeUdtV3717d6SlpakdEtmwYQNUKhW6d+8OALh37x727duHbt264cGDB9I2unv3LkJDQ3Hp0iXcvHlTraYBAwaoHboofB8MGDBAajM2Nkb9+vXV5r5+/XrY2dnhnXfeUXs9goKCYG1tXeT1KM37QFfs7e3x8OFD7Nmz56XXtba2Ru/evaXHSqUSDRs2VKt7586d8PDwwHvvvSe1mZubY+DAgaWu79ixY7h161axy0+fPo1Lly7hww8/xN27d6Vt/fDhQ7Rq1QoHDx6ESqV66bmVVkhICCpXriw9rlOnDmxtbaVtUFBQgF27dqFTp05q/2b8/f2L/P55md+Pbm5uWLx4Mfbs2YNmzZrh9OnT+Omnn2Bra6uzuZYXPDOLSmXx4sWoVq0aTExM4OrqiurVq8PI6ElWvnTpEoQQqFq1arHrPh0wAMDDw6PUJ6lev34dRkZGqFKlilq7m5sb7O3tcf36dbV2Pz+/Esd6+pcSANjZ2QEAvLy8irSrVCpkZGTAyckJAHDkyBFMnjwZsbGxePTokVr/jIwMaazingcAHBwc1M7ruHz5Mrp06VJircCT7RofH68WFp6Wlpb23PVL8jLbAUCR81GMjIxQqVIltbZq1aoBQJFzJZ59PQpfr+rVqxepy9/fH7t27cLDhw9hZWUlnauxdu1atGrVCgCwdu1a1KtXT3q+f//9F0IITJw4ERMnTix2vmlpafDw8NBo/k/P/dKlS8jIyICLi0uJz/O00rwPdGXo0KFYt24d2rZtCw8PD7Ru3RrdunVDmzZtXriup6dnkfNYHBwc8Pfff0uPr1+/jsqVKxfp9+y/05LMnj0bYWFh8PLyQlBQENq1a4e+fftK76tLly4BAMLCwkocIyMjAw4ODqV6vpf1otfu9u3bePz4cbG/86pXr64W2l/292OPHj2watUqbNu2DYMGDZLe+/RqGHaoVBo2bChdjfUslUoFhUKBHTt2wNjYuMjyZ+/1osnVUaW9Sdfzxi6utue1i/8/OfXy5cto1aoVatSogfnz58PLywtKpRLbt2/HggULivyF+aLxSkulUqF27dqYP39+scuf/XAuLU23gyZe5Uo4MzMzdOrUCZs2bcKSJUuQmpqKI0eOqN1/qXDbf/bZZyXu0Xv2A/hl5v/03FUqFVxcXLB69epi1382lOpieyoUimLXLygoUHvs4uKC06dPY9euXdixYwd27NiBqKgo9O3bFytWrHjuc+ii7md169YNzZo1w6ZNm7B7927MmTMHs2bNwsaNG9G2bVvpdZ0zZ06JNxfV5T2ktLkNXvb34927d3Hy5EkAT25NoFKppD8sSXMMO/TKKleuDCEE/Pz8pL+4tcXHxwcqlQqXLl2Cv7+/1J6amor09HT4+Pho9fmK8/vvvyMnJwdbt25V+4tP08NIwJNt9vTVRCX1OXPmDFq1alWm7siqUqlw5coVtdf64sWLAPDCe8UUvl4JCQlFll24cAEVKlSAlZWV1Na9e3esWLEC0dHRiI+PhxBCOoQFQNoTYGpqipCQEI3nVBqVK1fG3r170aRJE63dzuBlX1cHB4diD4M9u4cTeHL46d1338W7774LlUqFoUOHYtmyZZg4cWKp98CUxMfHB+fPn4cQQm0O//77b6nHcHd3x9ChQzF06FCkpaUhMDAQ06dPR9u2baVDSLa2ti98XQ3xb8PZ2RkWFhbSHqinPfveftnfj8OGDcODBw8wY8YMjB8/HhEREUUO49PLY1ykV9a5c2cYGxtjypQpRf7yEULg7t27Go/drl07AEBERIRae+Hejvbt22s8dmkV/jX29NwyMjIQFRWl8ZhdunTBmTNnsGnTpiLLCp+nW7duuHnzJr7//vsifR4/foyHDx9q/PyvatGiRdL/CyGwaNEimJqavnCXu7u7O+rVq4cVK1ao3fH27Nmz2L17t/R6FwoJCYGjoyPWrl2LtWvXomHDhmqHxlxcXNCiRQssW7YMycnJRZ7v9u3bGs6wqG7duqGgoADTpk0rsiw/P1+jO/gWBrvSrlu5cmVcuHBBbV5nzpxRuxwbQJF/c0ZGRqhTpw4AaHzbgqeFhobi5s2bapf3Z2dnF/tefVZBQUGR89xcXFxQsWJFqbagoCBUrlwZc+fORVZWVpExnp7/y25DbTA2NkZoaCg2b96MxMREqT0+Ph67du1S6/syvx83bNiAtWvXYubMmRg3bhx69OiBCRMmSH9MkOa4Z4deWeXKlfHVV19h/PjxuHbtGjp16gQbGxtcvXoVmzZtwqBBg/DZZ59pNHbdunURFhaG7777Dunp6WjevDmOHz+OFStWoFOnTmjZsqWWZ1NU69atpb+SBw8ejKysLHz//fdwcXEp9gO2NMaMGYMNGzbggw8+wEcffYSgoCDcu3cPW7duRWRkJOrWrYs+ffpg3bp1GDJkCGJiYtCkSRMUFBTgwoULWLdunXQ/IX0zNzfHzp07ERYWhkaNGmHHjh3Ytm0bvvjiixLPL3ranDlz0LZtWwQHB2PAgAF4/PgxFi5cCDs7O3z55ZdqfU1NTdG5c2esWbMGDx8+xNy5c4uMt3jxYjRt2hS1a9fGwIEDUalSJaSmpiI2NhZJSUk4c+aMVubdvHlzDB48GDNmzMDp06fRunVrmJqa4tKlS1i/fj2++eYbdO3a9aXGrFevHoyNjTFr1ixkZGTAzMxMup9TcT766CPMnz8foaGhGDBgANLS0hAZGYmaNWsiMzNT6vef//wH9+7dw9tvvw1PT09cv34dCxcuRL169dT2kGpq8ODBWLRoEXr27IkRI0bA3d0dq1evlm5S+Ly9LQ8ePICnpye6du2KunXrwtraGnv37sWJEycwb948AE/C2Q8//IC2bduiZs2a6N+/Pzw8PHDz5k3ExMTA1tYWv//+O4AnwQgA/vvf/6JHjx4wNTXFu+++q7aHUBemTJmCnTt3olmzZhg6dCjy8/OxcOFC1KxZU+38ptL+fkxLS8PHH3+Mli1bShcALFq0CDExMejXrx8OHz7Mw1mvQn8XftHrqKT77BTnt99+E02bNhVWVlbCyspK1KhRQwwbNkwkJCRIfZo3b652j5SnFXeptBBC5OXliSlTpgg/Pz9hamoqvLy8xPjx49UuNRbiyeW+7du3L7J+4SW269evL9XcCi9HLbyMWgghtm7dKurUqSPMzc2Fr6+vmDVrlvjpp5+KXPJaUg3FXTJ89+5dER4eLjw8PIRSqRSenp4iLCxM7V4eubm5YtasWaJmzZrCzMxMODg4iKCgIDFlypQXXo5a0qXnxdUHoMilwIWXj8+ZM6fImJcvX5buAeTq6iomT56sdj+e4tZ92t69e0WTJk2EhYWFsLW1Fe+++644f/58sX337NkjAAiFQiFu3LhRbJ/Lly+Lvn37Cjc3N2Fqaio8PDxEhw4dxIYNG6Q+L/N6Pz3XZ3333XciKChIWFhYCBsbG1G7dm0xduxYcevWLanPy7wPvv/+e1GpUiXpkuUXXba9atUqUalSJaFUKkW9evXErl27ilx6vmHDBtG6dWvh4uIilEql8Pb2FoMHDxbJyclSn5IuPS/u3+ez4wshxJUrV0T79u2FhYWFcHZ2Fp9++qn47bffBADx559/llh/Tk6OGDNmjKhbt66wsbERVlZWom7dumLJkiVF+v7111+ic+fOwsnJSZiZmQkfHx/RrVs3ER0drdZv2rRpwsPDQxgZGb3UZeglXXpe3GXxPj4+IiwsTK3twIEDIigoSCiVSlGpUiURGRlZ7JhCvPj3Y+fOnYWNjY24du2a2npbtmwRAMSsWbNKNScqnkIILZ51RkSy1q9fP2zYsKHYQwtEERERGDVqFJKSktSugCMyNO4TIyKil/b48WO1x9nZ2Vi2bBmqVq3KoENlDs/ZISKil9a5c2d4e3ujXr16yMjIwKpVq3DhwoUSL83Xp4yMjCJh7Flubm56qobKAoYdIiJ6aaGhofjhhx+wevVqFBQUICAgAGvWrFG7NYChjBgx4oX3E+IZHOULz9khIiJZOX/+fIlfRVFI1/dlorKFYYeIiIhkjScoExERkazxnB08uf39rVu3YGNjU6Zuy09EREQlE0LgwYMHqFix4nNvusiwA+DWrVsaf6kiERERGdaNGzfg6elZ4nKGHQA2NjYAnmwsW1tbA1dTDtSoASQnA+7uwIULhq6GiIheU5mZmfDy8pI+x0vCsIP/fY+Lra0tw44+fPklkJUFWFsD3N5ERPSKXnQKCsMO6d+gQYaugIiIyhFejUVERESyxrBDREREssbDWKR/yclAQQFgbPzkJGUiIiId4p4d0r8GDQAvryf/JSIi0jGGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjWGHSIiIpI1hh0iIiKSNYYdIiIikjV+XQTpVWJiIh58+y0UBQUQxsbIOXVKK+NWqFAB3t7eWhmLiIjkhWGH9CYxMRHVq/sjO/uR1sc2N7dEQkI8Aw8RERXBsEN6c+fOnf8POqsA+Gtx5HhkZ/fGnTt3GHaIiKgIhh0yAH8AgYYugoiIygmGHdK7ntgBS5zCI1jiV3xo6HKIiEjmGHZI72bjW3giDUnwYNghIiKd46XnREREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrvKkg6V0KnACYIgVuhi6FiIjKAYYd0rsGWAV+NxYREekLD2MRERGRrDHsEBERkawx7BAREZGs8Zwd0rtITIcjjHAPjhiCZYYuh4iIZI5hh/SuPQ7DE2lIgoehSyEionKAh7GIiIhI1hh2iIiISNYYdoiIiEjWGHaIiIhI1hh2iIiISNYYdoiIiEjWGHaIiIhI1hh2iIiISNZ4U0HSu18RCgcocR8Ohi6FiIjKAYYd0ruxGAkg0NBlEBFROcHDWERERCRrDDtEREQkaww7REREJGsMO6R38eiMDNgiHjUMXQoREZUDDDukd9Z4DFs8gDWyDF0KERGVAww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRr/NZz0rshGA8LeOAxLAxdChERlQMMO6R32/AWgEBDl0FEROUED2MRERGRrDHsEBERkazxMBbpXSDioUQOcqHEKQQZuhwiIpI5hh3Suy0YDU+kIQke8EKSocshIiKZM+hhrIKCAkycOBF+fn6wsLBA5cqVMW3aNAghpD5CCEyaNAnu7u6wsLBASEgILl26pDbOvXv30KtXL9ja2sLe3h4DBgxAVlaWvqdDREREZZBBw86sWbOwdOlSLFq0CPHx8Zg1axZmz56NhQsXSn1mz56Nb7/9FpGRkTh27BisrKwQGhqK7OxsqU+vXr1w7tw57NmzB3/88QcOHjyIQYMGGWJKREREVMYY9DDW0aNH0bFjR7Rv3x4A4Ovri19//RXHjx8H8GSvTkREBCZMmICOHTsCAFauXAlXV1ds3rwZPXr0QHx8PHbu3IkTJ06gfv36AICFCxeiXbt2mDt3LipWrGiYyREREVGZYNA9O40bN0Z0dDQuXrwIADhz5gwOHz6Mtm3bAgCuXr2KlJQUhISESOvY2dmhUaNGiI2NBQDExsbC3t5eCjoAEBISAiMjIxw7dqzY583JyUFmZqbaDxEREcmTQffsjBs3DpmZmahRowaMjY1RUFCA6dOno1evXgCAlJQUAICrq6vaeq6urtKylJQUuLi4qC03MTGBo6Oj1OdZM2bMwJQpU7Q9HSIiIiqDDLpnZ926dVi9ejV++eUXnDp1CitWrMDcuXOxYsUKnT7v+PHjkZGRIf3cuHFDp89HREREhmPQPTtjxozBuHHj0KNHDwBA7dq1cf36dcyYMQNhYWFwc3MDAKSmpsLd3V1aLzU1FfXq1QMAuLm5IS0tTW3c/Px83Lt3T1r/WWZmZjAzM9PBjIiIiKisMeienUePHsHISL0EY2NjqFQqAICfnx/c3NwQHR0tLc/MzMSxY8cQHBwMAAgODkZ6ejri4uKkPvv27YNKpUKjRo30MAsiIiIqywy6Z+fdd9/F9OnT4e3tjZo1a+Kvv/7C/Pnz8dFHHwEAFAoFRo4cia+++gpVq1aFn58fJk6ciIoVK6JTp04AAH9/f7Rp0wYDBw5EZGQk8vLyEB4ejh49evBKLCIiIjJs2Fm4cCEmTpyIoUOHIi0tDRUrVsTgwYMxadIkqc/YsWPx8OFDDBo0COnp6WjatCl27twJc3Nzqc/q1asRHh6OVq1awcjICF26dMG3335riClRKfhjAxSoCwGFoUshIqJyQCGevl1xOZWZmQk7OztkZGTA1tbW0OXI1qlTpxAUFAQgDkCgNkcGEIS4uDgEBmpzXCIiKstK+/nNbz0nIiIiWWPYISIiIlnjt56T3o3CKthiKzJhiwUYbehyiIhI5hh2SO9GYzU8kYYkeDDsEBGRzvEwFhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGm8qSHp3CjVwA5VwG86GLoWIiMoBhh3Su45YAO1+6zkREVHJeBiLiIiIZI1hh4iIiGSNYYeIiIhkjefskN5twSg4Ixe34YyO2GrocoiISOYYdkjvAnEBnkhDEjwMXQoREZUDPIxFREREssawQ0RERLLGsENERESyxrBDREREssawQ0RERLLGsENERESyxrBDREREssawQ0RERLLGmwqS3s1HL9jCFpmwNXQpRERUDjDskN4tQG8AgYYug4iIygkexiIiIiJZY9ghIiIiWeNhLNI7azyEApkQUCALNoYuh4iIZI57dkjv4tEVmbBDPPwNXQoREZUDDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRrDDtEREQkaww7REREJGsMO0RERCRr/LoI0ruOmA8lKiEXSkOXQkRE5QDDDundKfgDCDR0GUREVE7wMBYRERHJGsMOERERyRoPY5HetcdBWOAyHsMC29DB0OUQEZHMMeyQ3kViBjyRhiR4wAtJhi6HiIhkjoexiIiISNYYdoiIiEjWGHaIiIhI1hh2iIiISNYYdoiIiEjWGHaIiIhI1hh2iIiISNYYdoiIiEjWGHZI77JggUzYIAvWhi6FiIjKAd5BmfTOHxvBbz0nIiJ94Z4dIiIikjWGHSIiIpI1hh0iIiKSNZ6zQ3o3GxFwgBL34YCxmGPocoiISOYYdkjvemIXPJGGJHgw7BARkc4Z/DDWzZs30bt3bzg5OcHCwgK1a9fGyZMnpeVCCEyaNAnu7u6wsLBASEgILl26pDbGvXv30KtXL9ja2sLe3h4DBgxAVlaWvqdCREREZZBBw879+/fRpEkTmJqaYseOHTh//jzmzZsHBwcHqc/s2bPx7bffIjIyEseOHYOVlRVCQ0ORnZ0t9enVqxfOnTuHPXv24I8//sDBgwcxaNAgQ0yJiIiIyhiDHsaaNWsWvLy8EBUVJbX5+flJ/y+EQEREBCZMmICOHTsCAFauXAlXV1ds3rwZPXr0QHx8PHbu3IkTJ06gfv36AICFCxeiXbt2mDt3LipWrKjfSREREVGZYtA9O1u3bkX9+vXxwQcfwMXFBW+88Qa+//57afnVq1eRkpKCkJAQqc3Ozg6NGjVCbGwsACA2Nhb29vZS0AGAkJAQGBkZ4dixY/qbDBEREZVJBg07V65cwdKlS1G1alXs2rULH3/8MYYPH44VK1YAAFJSUgAArq6uauu5urpKy1JSUuDi4qK23MTEBI6OjlKfZ+Xk5CAzM1Pth4iIiOTJoIexVCoV6tevj6+//hoA8MYbb+Ds2bOIjIxEWFiYzp53xowZmDJlis7GJyIiorLDoHt23N3dERAQoNbm7++PxMREAICbmxsAIDU1Va1PamqqtMzNzQ1paWlqy/Pz83Hv3j2pz7PGjx+PjIwM6efGjRtamQ8RERGVPQYNO02aNEFCQoJa28WLF+Hj4wPgycnKbm5uiI6OlpZnZmbi2LFjCA4OBgAEBwcjPT0dcXFxUp99+/ZBpVKhUaNGxT6vmZkZbG1t1X6IiIhIngx6GGvUqFFo3Lgxvv76a3Tr1g3Hjx/Hd999h++++w4AoFAoMHLkSHz11VeoWrUq/Pz8MHHiRFSsWBGdOnUC8GRPUJs2bTBw4EBERkYiLy8P4eHh6NGjB6/EKqO2oSkcYYR7cDR0KUREVA4YNOw0aNAAmzZtwvjx4zF16lT4+fkhIiICvXr1kvqMHTsWDx8+xKBBg5Ceno6mTZti586dMDc3l/qsXr0a4eHhaNWqFYyMjNClSxd8++23hpgSlcIQ/BdAoKHLICKickIhhBCGLsLQMjMzYWdnh4yMDB7S0qFTp04hKCgIQBy0G3ZOAQhCXFwcAgMZooiIyovSfn5rdM7OlStXNC6MiIiISJ80CjtVqlRBy5YtsWrVKrWvbSAiIiIqazQKO6dOnUKdOnUwevRouLm5YfDgwTh+/Li2ayOZOoHeuAFPnED9F3cmIiJ6RRqFnXr16uGbb77BrVu38NNPPyE5ORlNmzZFrVq1MH/+fNy+fVvbdZKMuOEuPHETbij+DtdERETa9Er32TExMUHnzp2xfv16zJo1C//++y8+++wzeHl5oW/fvkhOTtZWnUREREQaeaWwc/LkSQwdOhTu7u6YP38+PvvsM1y+fBl79uzBrVu3pG8qJyIiIjIUje6zM3/+fERFRSEhIQHt2rXDypUr0a5dOxgZPclOfn5+WL58OXx9fbVZKxEREdFL0yjsLF26FB999BH69esHd3f3Yvu4uLjgxx9/fKXiiIiIiF6VRmHn0qVLL+yjVCp1+s3lRERERKWh0Tk7UVFRWL9+fZH29evXY8WKFa9cFBEREZG2aBR2ZsyYgQoVKhRpd3Fxwddff/3KRRERERFpi0ZhJzExEX5+fkXafXx8kJiY+MpFEREREWmLRufsuLi44O+//y5ytdWZM2fg5OSkjbpIxsZiOCzhikewNHQpRERUDmgUdnr27Inhw4fDxsYGb731FgDgwIEDGDFiBHr06KHVAkl+fkVbaPdbz4mIiEqmUdiZNm0arl27hlatWsHE5MkQKpUKffv25Tk7REREVKZoFHaUSiXWrl2LadOm4cyZM7CwsEDt2rXh4+Oj7fqIiIiIXolGYadQtWrVUK1aNW3VQuVENVyDCcyQDxNcRHVDl0NERDKnUdgpKCjA8uXLER0djbS0NKhUKrXl+/bt00pxJE/R+BieSEMSPOCFJEOXQ0REMqdR2BkxYgSWL1+O9u3bo1atWlAoFNqui4iIiEgrNAo7a9aswbp169CuXTtt10NERESkVRrdVFCpVKJKlSraroWIiIhI6zQKO59++im++eYbCCG0XQ8RERGRVml0GOvw4cOIiYnBjh07ULNmTZiamqot37hxo1aKIyIiInpVGoUde3t7vP/++9quhYiIiEjrNAo7UVFR2q6DiIiISCc0OmcHAPLz87F3714sW7YMDx48AADcunULWVlZWiuOiIiI6FVptGfn+vXraNOmDRITE5GTk4N33nkHNjY2mDVrFnJychAZGantOomIiIg0otGenREjRqB+/fq4f/8+LCwspPb3338f0dHRWiuO5KkBVsITN9AAJwxdChERlQMa7dk5dOgQjh49CqVSqdbu6+uLmzdvaqUwkq8UOAPwNHQZRERUTmi0Z0elUqGgoKBIe1JSEmxsbF65KCIiIiJt0SjstG7dGhEREdJjhUKBrKwsTJ48mV8hQURERGWKRoex5s2bh9DQUAQEBCA7OxsffvghLl26hAoVKuDXX3/Vdo0kMwOxEdbYjyxY43sMMnQ5REQkcxqFHU9PT5w5cwZr1qzB33//jaysLAwYMAC9evVSO2GZqDiT8D08kYYkeDDsEBGRzmkUdgDAxMQEvXv31mYtRERERFqnUdhZuXLlc5f37dtXo2KIiIiItE2jsDNixAi1x3l5eXj06BGUSiUsLS0ZdoiIiKjM0OhqrPv376v9ZGVlISEhAU2bNuUJykRERFSmaPzdWM+qWrUqZs6cWWSvDxEREZEhaS3sAE9OWr5165Y2hyQiIiJ6JRqds7N161a1x0IIJCcnY9GiRWjSpIlWCiMiIiLSBo3CTqdOndQeKxQKODs74+2338a8efO0URcRERGRVmgUdlQqlbbroHLkIryRgQpIhauhSyEionJA45sKEmmqFZYBCDR0GUREVE5oFHZGjx5d6r7z58/X5CmIiIiItEKjsPPXX3/hr7/+Ql5eHqpXrw4AuHjxIoyNjREY+L+/2BUKhXaqJCIiItKQRmHn3XffhY2NDVasWAEHBwcAT2402L9/fzRr1gyffvqpVoskIiIi0pRGYWfevHnYvXu3FHQAwMHBAV999RVat27NsEPPtQr/RQWocAcV0BurDV0OERHJnEZhJzMzE7dv3y7Sfvv2bTx48OCViyJ5a45T8EQakuBh6FKIiKgc0OgOyu+//z769++PjRs3IikpCUlJSfjtt98wYMAAdO7cWds1EhEREWlMoz07kZGR+Oyzz/Dhhx8iLy/vyUAmJhgwYADmzJmj1QKJiIiIXoVGYcfS0hJLlizBnDlzcPnyZQBA5cqVYWVlpdXiiIiIiF7VK30RaHJyMpKTk1G1alVYWVlBCKGtuoiIiIi0QqOwc/fuXbRq1QrVqlVDu3btkJycDAAYMGAAr8QiIiKiMkWjsDNq1CiYmpoiMTERlpaWUnv37t2xc+dOrRVHRERE9Ko0Omdn9+7d2LVrFzw9PdXaq1atiuvXr2ulMCIiIiJt0GjPzsOHD9X26BS6d+8ezMzMXrkoIiIiIm3RKOw0a9YMK1eulB4rFAqoVCrMnj0bLVu21FpxJE/f433Mxyh8j4GGLoWIiMoBjQ5jzZ49G61atcLJkyeRm5uLsWPH4ty5c7h37x6OHDmi7RpJZqZiEIDAF/YjIiLSBo327NSqVQsXL15E06ZN0bFjRzx8+BCdO3fGX3/9hcqVK2u7RiIiIiKNvfSenby8PLRp0waRkZH473//q4uaiIiIiLTmpffsmJqa4u+//9ZFLURERERap9FhrN69e+PHH3/Udi1UTtxAWwgocAOeL+5MRET0ijQ6QTk/Px8//fQT9u7di6CgoCLfiTV//nytFEdERET0ql4q7Fy5cgW+vr44e/YsAgOfXE1z8eJFtT4KhUJ71RERERG9opc6jFW1alXcuXMHMTExiImJgYuLC9asWSM9jomJwb59+zQqZObMmVAoFBg5cqTUlp2djWHDhsHJyQnW1tbo0qULUlNT1dZLTExE+/btYWlpCRcXF4wZMwb5+fka1UBERETy81Jh59lvNd+xYwcePnz4ykWcOHECy5YtQ506ddTaR40ahd9//x3r16/HgQMHcOvWLXTu3FlaXlBQgPbt2yM3NxdHjx7FihUrsHz5ckyaNOmVayIiIiJ50OgE5ULPhh9NZGVloVevXvj+++/h4OAgtWdkZODHH3/E/Pnz8fbbbyMoKAhRUVE4evQo/vzzTwBPvqPr/PnzWLVqFerVq4e2bdti2rRpWLx4MXJzc1+5NiIiInr9vVTYUSgURc7JedVzdIYNG4b27dsjJCRErT0uLg55eXlq7TVq1IC3tzdiY2MBALGxsahduzZcXV2lPqGhocjMzMS5c+dKfM6cnBxkZmaq/RAREZE8vdQJykII9OvXT/qyz+zsbAwZMqTI1VgbN24s1Xhr1qzBqVOncOLEiSLLUlJSoFQqYW9vr9bu6uqKlJQUqc/TQadweeGyksyYMQNTpkwpVY1ERET0enupsBMWFqb2uHfv3ho/8Y0bNzBixAjs2bMH5ubmGo+jifHjx2P06NHS48zMTHh5eem1BiIiItKPlwo7UVFRWnviuLg4pKWlSZewA09OOD548CAWLVqEXbt2ITc3F+np6Wp7d1JTU+Hm5gYAcHNzw/Hjx9XGLbxaq7BPcczMzKS9U0RERCRvGt1UUBtatWqFf/75R62tf//+qFGjBj7//HN4eXnB1NQU0dHR6NKlCwAgISEBiYmJCA4OBgAEBwdj+vTpSEtLg4uLCwBgz549sLW1RUBAgH4nRKXWG9NgBh/kgIGTiIh0z2Bhx8bGBrVq1VJrs7KygpOTk9Q+YMAAjB49Go6OjrC1tcUnn3yC4OBgvPnmmwCA1q1bIyAgAH369MHs2bORkpKCCRMmYNiwYdxzU4YdQH0AgS/sR0REpA0GCzulsWDBAhgZGaFLly7IyclBaGgolixZIi03NjbGH3/8gY8//hjBwcGwsrJCWFgYpk6dasCqiYiIqCxRCG3cLOc1l5mZCTs7O2RkZMDW1tbQ5cjWqVOnEBQUBCAO2t2zcwpAEOLi4tTOASMiInkr7ed3md6zQ/LUHCdhhtvIgRkOoIWhyyEiIplj2CG9W4WJ8EQakuABLyQZuhwiIpK5V/q6CCIiIqKyjmGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNd1AmvfPCDvBbz4mISF+4Z4eIiIhkjWGHiIiIZI1hh4iIiGSN5+yQ3k3Cd7CDJTJgh6mYbOhyiIhI5hh2SO8GYhM8kYYkeDDsEBGRzvEwFhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RERHJGm8qSHp3AIGoABXuoIKhSyEionKAYYf0rjemAwg0dBlERFRO8DAWERERyRrDDhEREckaww4RERHJGs/ZIb2LxmC44hFS4YpW2GfocoiISOYYdkjvqiERnkiDHTIMXQoREZUDPIxFREREssawQ0RERLLGsENERESyxrBDREREssawQ0RERLLGsENERESyxrBDREREssawQ0RERLLGmwqS3k3FQFjDEVmwNnQpRERUDjDskN59j84AAg1dBhERlRM8jEVERESyxrBDREREssbDWKR3brgNYyShAMZIgbuhyyEiIplj2CG9O4G+8EQakuABLyQZuhwiIpI5HsYiIiIiWWPYISIiIllj2CEiIiJZY9ghIiIiWWPYISIiIllj2CEiIiJZY9ghIiIiWWPYISIiIllj2CEiIiJZ4x2USe9aYSlMUB35fPsREZEe8NOG9O4ifAHUNHQZRERUTvAwFhEREckaww4RERHJGg9jkd71xA5Y4hQewRK/4kNDl0NERDLHsEN6NxvfwhNpSIIHww4REekcD2MRERGRrDHsEBERkawx7BAREZGsGTTszJgxAw0aNICNjQ1cXFzQqVMnJCQkqPXJzs7GsGHD4OTkBGtra3Tp0gWpqalqfRITE9G+fXtYWlrCxcUFY8aMQX5+vj6nQkRERGWUQcPOgQMHMGzYMPz555/Ys2cP8vLy0Lp1azx8+FDqM2rUKPz+++9Yv349Dhw4gFu3bqFz587S8oKCArRv3x65ubk4evQoVqxYgeXLl2PSpEmGmBIRERGVMQohhDB0EYVu374NFxcXHDhwAG+99RYyMjLg7OyMX375BV27dgUAXLhwAf7+/oiNjcWbb76JHTt2oEOHDrh16xZcXV0BAJGRkfj8889x+/ZtKJXKFz5vZmYm7OzskJGRAVtbW53OsTw7deoUgoKCcAMu0tVYXkjSxsgAghAXF4fAwEAtjEdERK+D0n5+l6lzdjIyMgAAjo6OAIC4uDjk5eUhJCRE6lOjRg14e3sjNjYWABAbG4vatWtLQQcAQkNDkZmZiXPnzhX7PDk5OcjMzFT7ISIiInkqM2FHpVJh5MiRaNKkCWrVqgUASElJgVKphL29vVpfV1dXpKSkSH2eDjqFywuXFWfGjBmws7OTfry8vLQ8GyIiIiorykzYGTZsGM6ePYs1a9bo/LnGjx+PjIwM6efGjRs6f076nxQ4IQkeSIGboUshIqJyoEzcQTk8PBx//PEHDh48CE9PT6ndzc0Nubm5SE9PV9u7k5qaCjc3N6nP8ePH1cYrvFqrsM+zzMzMYGZmpuVZUGk1wCoAPLeGiIj0w6B7doQQCA8Px6ZNm7Bv3z74+fmpLQ8KCoKpqSmio6OltoSEBCQmJiI4OBgAEBwcjH/++QdpaWlSnz179sDW1hYBAQH6mQgRERGVWQbdszNs2DD88ssv2LJlC2xsbKRzbOzs7GBhYQE7OzsMGDAAo0ePhqOjI2xtbfHJJ58gODgYb775JgCgdevWCAgIQJ8+fTB79mykpKRgwoQJGDZsGPfeEBERkWHDztKlSwEALVq0UGuPiopCv379AAALFiyAkZERunTpgpycHISGhmLJkiVSX2NjY/zxxx/4+OOPERwcDCsrK4SFhWHq1Kn6mgYRERGVYQYNO6W5xY+5uTkWL16MxYsXl9jHx8cH27dv12ZppEORmA5HGOEeHDEEywxdDhERyVyZOEGZypf2OCzdVJCIiEjXysyl50RERES6wLBDREREssawQ0RERLLGsENERESyxrBDREREssawQ0RERLLGsENERESyxrBDREREssabCpLe/YpQOECJ+3AwdClERFQOMOyQ3o3FSACBhi6DiIjKCR7GIiIiIllj2CEiIiJZY9ghIiIiWWPYIb2LR2dkwBbxqGHoUoiIqBxg2CG9s8Zj2OIBrJFl6FKIiKgcYNghIiIiWWPYISIiIllj2CEiIiJZY9ghIiIiWWPYISIiIllj2CEiIiJZY9ghIiIiWWPYISIiIlnjt56T3g3BeFjAA49hYehSiIioHGDYIb3bhrcABBq6DCIiKid4GIuIiIhkjWGHiIiIZI2HsUjvAhEPJXKQCyVOIcjQ5RARkcwx7JDebcFoeCINSfCAF5IMXQ4REckcD2MRERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGs8Q7KpHf+2AAF6kJAYehSiIioHGDYIb3LghUAW0OXQURE5QQPYxEREZGsMewQERGRrPEwFundKKyCLbYiE7ZYgNGGLoeIiGSOYYf0bjRWwxNpSIIHww4REekcD2MRERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkawx7BAREZGsMewQERGRrDHsEBERkazxpoKkd6dQAzdQCbfhbOhSiIioHGDYIb3riAUAAg1dBhERlRM8jEVERESyxrBDREREssawQ0RERLLGc3ZI77ZgFJyRi9twRkdsNXQ5BpOYmIg7d+7oZOwKFSrA29tbJ2MTEb1uGHZI7wJxAZ5IQxI8DF2KwSQmJqJ6dX9kZz/Syfjm5pZISIhn4CEiAsMOFUNXexzi4+O1Pubr6s6dO/8fdFYB8Nfy6PHIzu6NO3fuMOwQEYFhh56h6z0O9Cx/8DJ8IiLdYtghNbrd47AdwEQtj0lERPR8sgk7ixcvxpw5c5CSkoK6deti4cKFaNiwoaHLeo3pYo8DD2MREZH+ySLsrF27FqNHj0ZkZCQaNWqEiIgIhIaGIiEhAS4uLoYuj15zujiHiecvERHpjyzCzvz58zFw4ED0798fABAZGYlt27bhp59+wrhx4wxaGy8vfr3xHCYiotffax92cnNzERcXh/Hjx0ttRkZGCAkJQWxsrAEr0/0HpZmZOX77bQPc3d21NubrvMdBF7XHx8fr6Bwmnr9ERGWPrv5AN/Qf56992Llz5w4KCgrg6uqq1u7q6ooLFy4Uu05OTg5ycnKkxxkZGQCAzMxMrdZ27dq1//+gHAPAS6tjA+eQk/MdOnTooOVxC8UByNLymE/CyAPkIhPAA+QAOKiFcWMBKNC7d28tjFWSR9Du9sj+///qYjsnPBk5Lg5ZWdoe+8kfEyqV6rUZV5djs2b9jM2a9TN2amoqevcOQ27uY62PbWZmgbi4E/Dy0u5nYeHnthDiuf1e+7CjiRkzZmDKlClF2rX9IvzPHB2Nq0uDdDZyANL////uAGius+fRLl1tD91t50GDdDc2EdHLyMl5jFq1auls/AcPHsDOzq7E5a992KlQoQKMjY2Rmpqq1p6amgo3N7di1xk/fjxGjx4tPVapVLh37x6cnJzw4MEDeHl54caNG7C1tdVp7eVVZmYmt7GOcRvrHrex7nEb697rvo2FEHjw4AEqVqz43H6vfdhRKpUICgpCdHQ0OnXqBOBJeImOjkZ4eHix65iZmcHMzEytzd7eHgCgUCgAALa2tq/lC/864TbWPW5j3eM21j1uY917nbfx8/boFHrtww4AjB49GmFhYahfvz4aNmyIiIgIPHz4ULo6i4iIiMovWYSd7t274/bt25g0aRJSUlJQr1497Ny5s8hJy0RERFT+yCLsAEB4eHiJh61ehpmZGSZPnlzkMBdpD7ex7nEb6x63se5xG+teednGCvGi67WIiIiIXmNGhi6AiIiISJcYdoiIiEjWGHaIiIhI1hh2iIiISNbKXdi5d+8eevXqBVtbW9jb22PAgAEv/P6g7OxsDBs2DE5OTrC2tkaXLl3U7th85swZ9OzZE15eXrCwsIC/vz+++eYbXU+lzFi8eDF8fX1hbm6ORo0a4fjx48/tv379etSoUQPm5uaoXbs2tm/frrZcCIFJkybB3d0dFhYWCAkJwaVLl3Q5hTJPm9s4Ly8Pn3/+OWrXrg0rKytUrFgRffv2xa1bt3Q9jTJN2+/jpw0ZMgQKhQIRERFarvr1oottHB8fj/feew92dnawsrJCgwYNkJiYqKsplHna3sZZWVkIDw+Hp6cnLCwsEBAQgMjISF1OQTdEOdOmTRtRt25d8eeff4pDhw6JKlWqiJ49ez53nSFDhggvLy8RHR0tTp48Kd58803RuHFjafmPP/4ohg8fLvbv3y8uX74sfv75Z2FhYSEWLlyo6+kY3Jo1a4RSqRQ//fSTOHfunBg4cKCwt7cXqampxfY/cuSIMDY2FrNnzxbnz58XEyZMEKampuKff/6R+sycOVPY2dmJzZs3izNnzoj33ntP+Pn5icePH+trWmWKtrdxenq6CAkJEWvXrhUXLlwQsbGxomHDhiIoKEif0ypTdPE+LrRx40ZRt25dUbFiRbFgwQIdz6Ts0sU2/vfff4Wjo6MYM2aMOHXqlPj333/Fli1bShxT7nSxjQcOHCgqV64sYmJixNWrV8WyZcuEsbGx2LJli76mpRXlKuycP39eABAnTpyQ2nbs2CEUCoW4efNmseukp6cLU1NTsX79eqktPj5eABCxsbElPtfQoUNFy5YttVd8GdWwYUMxbNgw6XFBQYGoWLGimDFjRrH9u3XrJtq3b6/W1qhRIzF48GAhhBAqlUq4ubmJOXPmSMvT09OFmZmZ+PXXX3Uwg7JP29u4OMePHxcAxPXr17VT9GtGV9s4KSlJeHh4iLNnzwofH59yHXZ0sY27d+8uevfurZuCX0O62MY1a9YUU6dOVesTGBgo/vvf/2qxct0rV4exYmNjYW9vj/r160ttISEhMDIywrFjx4pdJy4uDnl5eQgJCZHaatSoAW9vb8TGxpb4XBkZGXB0dNRe8WVQbm4u4uLi1LaNkZERQkJCStw2sbGxav0BIDQ0VOp/9epVpKSkqPWxs7NDo0aNnru95UoX27g4GRkZUCgU0nfElSe62sYqlQp9+vTBmDFjULNmTd0U/5rQxTZWqVTYtm0bqlWrhtDQULi4uKBRo0bYvHmzzuZRlunqfdy4cWNs3boVN2/ehBACMTExuHjxIlq3bq2biehIuQo7KSkpcHFxUWszMTGBo6MjUlJSSlxHqVQW+RBwdXUtcZ2jR49i7dq1GDRokFbqLqvu3LmDgoKCIl/L8bxtk5KS8tz+hf99mTHlTBfb+FnZ2dn4/PPP0bNnz9f2iwBfha628axZs2BiYoLhw4drv+jXjC62cVpaGrKysjBz5ky0adMGu3fvxvvvv4/OnTvjwIEDuplIGaar9/HChQsREBAAT09PKJVKtGnTBosXL8Zbb72l/UnokCy+LmLcuHGYNWvWc/vEx8frpZazZ8+iY8eOmDx58muXfKn8ycvLQ7du3SCEwNKlSw1djmzExcXhm2++walTp6BQKAxdjiypVCoAQMeOHTFq1CgAQL169XD06FFERkaiefPmhixPNhYuXIg///wTW7duhY+PDw4ePIhhw4ahYsWKRfYKlWWyCDuffvop+vXr99w+lSpVgpubG9LS0tTa8/Pzce/ePbi5uRW7npubG3Jzc5Genq62dyc1NbXIOufPn0erVq0waNAgTJgwQaO5vE4qVKgAY2NjtSvTgOK3TSE3N7fn9i/8b2pqKtzd3dX61KtXT4vVvx50sY0LFQad69evY9++feVyrw6gm2186NAhpKWlwdvbW1peUFCATz/9FBEREbh27Zp2J1HG6WIbV6hQASYmJggICFDr4+/vj8OHD2ux+teDLrbx48eP8cUXX2DTpk1o3749AKBOnTo4ffo05s6d+1qFHVkcxnJ2dkaNGjWe+6NUKhEcHIz09HTExcVJ6+7btw8qlQqNGjUqduygoCCYmpoiOjpaaktISEBiYiKCg4OltnPnzqFly5YICwvD9OnTdTfZMkSpVCIoKEht26hUKkRHR6ttm6cFBwer9QeAPXv2SP39/Pzg5uam1iczMxPHjh0rcUw508U2Bv4XdC5duoS9e/fCyclJNxN4DehiG/fp0wd///03Tp8+Lf1UrFgRY8aMwa5du3Q3mTJKF9tYqVSiQYMGSEhIUOtz8eJF+Pj4aHkGZZ8utnFeXh7y8vJgZKQeFYyNjaU9a68NQ58hrW9t2rQRb7zxhjh27Jg4fPiwqFq1qtql50lJSaJ69eri2LFjUtuQIUOEt7e32Ldvnzh58qQIDg4WwcHB0vJ//vlHODs7i969e4vk5GTpJy0tTa9zM4Q1a9YIMzMzsXz5cnH+/HkxaNAgYW9vL1JSUoQQQvTp00eMGzdO6n/kyBFhYmIi5s6dK+Lj48XkyZOLvfTc3t5ebNmyRfz999+iY8eO5f7Sc21u49zcXPHee+8JT09Pcfr0abX3bE5OjkHmaGi6eB8/q7xfjaWLbbxx40ZhamoqvvvuO3Hp0iWxcOFCYWxsLA4dOqT3+ZUFutjGzZs3FzVr1hQxMTHiypUrIioqSpibm4slS5bofX6votyFnbt374qePXsKa2trYWtrK/r37y8ePHggLb969aoAIGJiYqS2x48fi6FDhwoHBwdhaWkp3n//fZGcnCwtnzx5sgBQ5MfHx0ePMzOchQsXCm9vb6FUKkXDhg3Fn3/+KS1r3ry5CAsLU+u/bt06Ua1aNaFUKkXNmjXFtm3b1JarVCoxceJE4erqKszMzESrVq1EQkKCPqZSZmlzGxe+x4v7efp9X95o+338rPIedoTQzTb+8ccfRZUqVYS5ubmoW7eu2Lx5s66nUaZpexsnJyeLfv36iYoVKwpzc3NRvXp1MW/ePKFSqfQxHa1RCCGEYfYpEREREemeLM7ZISIiIioJww4RERHJGsMOERERyRrDDhEREckaww4RERHJGsMOERERyRrDDhEREckaww4RkY74+voiIiLCIM+dm5uLKlWq4OjRo6Xq6+vri5MnT+qhMiL9Y9gheg3169cPnTp1MnQZsvWikLJ//34oFIrn/uzfvx8nTpzAoEGD9Ff4UyIjI+Hn54fGjRu/sK9SqcRnn32Gzz//XA+VEekfww4RlVpBQcHr9wWAOtC4cWMkJydLP926dUObNm3U2ho3bgxnZ2dYWlrqvT4hBBYtWoQBAwaUep1evXrh8OHDOHfunA4rIzIMhh0iGWjRogU++eQTjBw5Eg4ODnB1dcX333+Phw8fon///rCxsUGVKlWwY8cOaZ3CvRPbtm1DnTp1YG5ujjfffBNnz56V+ixfvhz29vbYunUrAgICYGZmhsTERNy/fx99+/aFg4MDLC0t0bZtW1y6dAnAk2+pt7CwUHsuANi0aRNsbGzw6NEjAMCNGzfQrVs32Nvbw9HRER07dsS1a9ek/oV7r77++mu4urrC3t4eU6dORX5+PsaMGQNHR0d4enoiKipK7XlKO+7cuXPh7u4OJycnDBs2DHl5edK2vH79OkaNGiXtpXmWUqmEm5ub9GNhYQEzMzO1NqVSWWQPkUKhwLJly9ChQwdYWlrC398fsbGx+Pfff9GiRQtYWVmhcePGuHz5strzbdmyBYGBgTA3N0elSpUwZcoU5Ofnl/h+iIuLw+XLl9G+fXupLTc3F+Hh4XB3d4e5uTl8fHwwY8YMabmDgwOaNGmCNWvWlDgu0euKYYdIJlasWIEKFSrg+PHj+OSTT/Dxxx/jgw8+QOPGjXHq1Cm0bt0affr0kcJGoTFjxmDevHk4ceIEnJ2d8e6770of/ADw6NEjzJo1Cz/88APOnTsHFxcX9OvXDydPnsTWrVsRGxsLIQTatWuHvLw82NraokOHDvjll1/Unmf16tXo1KkTLC0tkZeXh9DQUNjY2ODQoUM4cuQIrK2t0aZNG+Tm5krr7Nu3D7du3cLBgwcxf/58TJ48GR06dICDgwOOHTuGIUOGYPDgwUhKSgKAUo8bExODy5cvIyYmBitWrMDy5cuxfPlyAMDGjRvh6emJqVOnSntptGnatGno27cvTp8+jRo1auDDDz/E4MGDMX78eJw8eRJCCISHh0v9Dx06hL59+2LEiBE4f/48li1bhuXLl2P69OklPsehQ4dQrVo12NjYSG3ffvsttm7dinXr1iEhIQGrV6+Gr6+v2noNGzbEoUOHtDpfojLBoF9DSkQaCQsLEx07dpQeN2/eXDRt2lR6nJ+fL6ysrESfPn2ktuTkZAFAxMbGCiGEiImJEQDEmjVrpD53794VFhYWYu3atUIIIaKiogQAcfr0aanPxYsXBQBx5MgRqe3OnTvCwsJCrFu3TgghxKZNm4S1tbV4+PChEEKIjIwMYW5uLnbs2CGEEOLnn38W1atXV/vm5JycHGFhYSF27dolzdHHx0cUFBRIfapXry6aNWtWZJ6//vrrS4+bn58v9fnggw9E9+7dpccv+w3lz74eJY0DQEyYMEF6HBsbKwCIH3/8UWr79ddfhbm5ufS4VatW4uuvv1Yb9+effxbu7u4l1jNixAjx9ttvq7V98skn4u23337ut1V/8803wtfXt8TlRK8r7tkhkok6depI/29sbAwnJyfUrl1banN1dQUApKWlqa0XHBws/b+joyOqV6+O+Ph4qU2pVKqNHR8fDxMTEzRq1Ehqc3JyUluvXbt2MDU1xdatWwEAv/32G2xtbRESEgIAOHPmDP7991/Y2NjA2toa1tbWcHR0RHZ2ttohnJo1a8LI6H+/plxdXdXmVDjPwjm9zLjGxsbSY3d39yLbRVee3paFr8mzr1N2djYyMzMBPJnT1KlTpflYW1tj4MCBSE5OLrKXrtDjx49hbm6u1tavXz+cPn0a1atXx/Dhw7F79+4i61lYWJQ4JtHrzMTQBRCRdpiamqo9VigUam2F55687AnGFhYWxZ638jxKpRJdu3bFL7/8gh49euCXX35B9+7dYWLy5FdOVlYWgoKCsHr16iLrOjs7S///ojkVthXO6VXG1deJ18W9Js97nbKysjBlyhR07ty5yFjPBppCFSpUwD///KPWFhgYiKtXr2LHjh3Yu3cvunXrhpCQEGzYsEHqc+/ePbXtRCQXDDtE5dyff/4Jb29vAMD9+/dx8eJF+Pv7l9jf398f+fn5OHbsmHRZ8927d5GQkICAgACpX69evfDOO+/g3Llz2LdvH7766itpWWBgINauXQsXFxfY2tpqbS7aGlepVKKgoEBrdb2KwMBAJCQkoEqVKqVe54033sDSpUshhFALqra2tujevTu6d++Orl27ok2bNrh37x4cHR0BAGfPnsUbb7yh9TkQGRoPYxGVc1OnTkV0dDTOnj2Lfv36oUKFCs+9h0/VqlXRsWNHDBw4EIcPH8aZM2fQu3dveHh4oGPHjlK/t956C25ubujVqxf8/PzUDnv16tULFSpUQMeOHXHo0CFcvXoV+/fvx/Dhw6WTjTWhrXF9fX1x8OBB3Lx5E3fu3NG4Hm2YNGkSVq5ciSlTpuDcuXOIj4/HmjVrMGHChBLXadmyJbKystQuI58/fz5+/fVXXLhwARcvXsT69evh5uYGe3t7qc+hQ4fQunVrXU6HyCAYdojKuZkzZ2LEiBEICgpCSkoKfv/9dyiVyueuExUVhaCgIHTo0AHBwcEQQmD79u1FDsf07NkTZ86cQa9evdTWt7S0xMGDB+Ht7Y3OnTvD398fAwYMQHZ29ivtkdHWuFOnTsW1a9dQuXJlgx/WCQ0NxR9//IHdu3ejQYMGePPNN7FgwQL4+PiUuI6TkxPef/99tcN5NjY2mD17NurXr48GDRrg2rVr2L59u3ROVGxsLDIyMtC1a1edz4lI3xRCCGHoIohI//bv34+WLVvi/v37an/dkzz8/fffeOedd3D58mVYW1u/sH/37t1Rt25dfPHFF3qojki/uGeHiEiG6tSpg1mzZuHq1asv7Jubm4vatWtj1KhReqiMSP+4Z4eonOKeHSIqLxh2iIiISNZ4GIuIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGSNYYeIiIhkjWGHiIiIZI1hh4iIiGTt/wCt2AhW8u02CAAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"\n",
"n_nums = 1000\n",
"n_rows = 100000\n",
"# Creating a large DataFrame with thousands of rows\n",
"np.random.seed(0) # Seed for reproducibility\n",
"df = pd.DataFrame({\n",
" 'foo': np.random.choice(n_nums, n_rows), # 1000 unique values repeated over 100000 rows\n",
" 'bar': np.random.rand(n_rows),\n",
" 'baz': np.random.choice(['bonjour','merci','bon nuit', 'enchante'], n_rows),\n",
" 'fruit': np.random.choice(['fraise','banana','orange', 'raisin'], n_rows)\n",
"})\n",
"\n",
"# Set 'foo' as index once before the loop\n",
"df.set_index('foo', inplace=True)\n",
"\n",
"n_iterations = 1000\n",
"improved_times = []\n",
"for i in range(n_iterations):\n",
" selected_number = np.random.choice(n_nums)\n",
"\n",
" # Original method: filtering without setting an index\n",
" # Since we have already set the index, we need to reset it without inplace modification\n",
" df_reset = df.reset_index()\n",
" time0 = time.time()\n",
" df1 = df_reset[df_reset.foo == selected_number]\n",
" time1 = time.time()\n",
"\n",
" # Improved method: using .loc[] with 'foo' already set as index\n",
" time2 = time.time()\n",
" df2 = df.loc[selected_number]\n",
" time3 = time.time()\n",
"\n",
" # Times\n",
" elapsed_time_1 = time1 - time0 # Original method time\n",
" elapsed_time_2 = time3 - time2 # Improved method time\n",
" improved_time = elapsed_time_1 - elapsed_time_2 # The lower this value, the better the improvement\n",
" \n",
" improved_times.append(improved_time)\n",
"\n",
"# Plotting the histogram of improved times\n",
"plt.hist(improved_times, bins=20, color='blue', edgecolor='black')\n",
"plt.axvline(x=0, color='red', linestyle='dashed', linewidth=2) # Add a vertical line at x=0 for reference\n",
"plt.xlabel('Improvement Time (s)')\n",
"plt.ylabel('Frequency')\n",
"plt.title('Performance Improvement using set_index')\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0010004043579101562, 0.0010085105895996094, False)"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Let's create a new MWE where `set_index` and using `.loc[]` shows better performance compared to the basic slicing method.\n",
"\n",
"# Seed for reproducibility\n",
"np.random.seed(0)\n",
"\n",
"# Constants for the DataFrame\n",
"n_nums = 10 # Reduce the number of unique values to increase the likelihood of matches and make the index more effective\n",
"n_rows = 100000\n",
"\n",
"# Create a large DataFrame with thousands of rows\n",
"df = pd.DataFrame({\n",
" 'foo': np.random.choice(n_nums, n_rows), # 10 unique values repeated over 100000 rows\n",
" 'bar': np.random.rand(n_rows),\n",
"})\n",
"\n",
"# Original method: Measure the time taken for filtering without setting an index\n",
"time0 = time.time()\n",
"df1 = df[df.foo == 5] # Choose a number that is guaranteed to be in 'foo'\n",
"time1 = time.time()\n",
"original_time = time1 - time0\n",
"\n",
"# Improved method: Set 'foo' as index and measure the time taken for filtering using .loc[]\n",
"df.set_index('foo', inplace=True)\n",
"time2 = time.time()\n",
"df2 = df.loc[5] # Using .loc[] on the indexed DataFrame\n",
"time3 = time.time()\n",
"improved_time = time3 - time2\n",
"\n",
"# Results\n",
"original_time, improved_time, original_time > improved_time\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.06691646575927734, 0.08325934410095215, False)"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To make the operations take longer, we can increase the size of the data and the complexity of the operation.\n",
"\n",
"# Constants for the DataFrame\n",
"n_nums = 10\n",
"n_rows = 10000000 # Increase the number of rows to 10 million\n",
"\n",
"# Create a large DataFrame with millions of rows\n",
"df = pd.DataFrame({\n",
" 'foo': np.random.choice(n_nums, n_rows), # 10 unique values repeated over 10 million rows\n",
" 'bar': np.random.rand(n_rows),\n",
"})\n",
"\n",
"# Measure the time for the original method without setting an index\n",
"time0 = time.time()\n",
"df1 = df[df.foo == 5]\n",
"time1 = time.time()\n",
"original_time = time1 - time0\n",
"\n",
"# Measure the time for the improved method using set_index and .loc[]\n",
"df_indexed = df.set_index('foo')\n",
"time2 = time.time()\n",
"df2 = df_indexed.loc[5]\n",
"time3 = time.time()\n",
"improved_time = time3 - time2\n",
"\n",
"# Results\n",
"original_time, improved_time, original_time > improved_time\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"기본 슬라이싱 시간: 0.003000초\n",
"set_index 및 .loc[] 사용 시간: 0.007908초\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import time\n",
"\n",
"# 데이터 생성\n",
"n = 1000000 # 데이터 포인트 수\n",
"df = pd.DataFrame({\n",
" 'foo': np.random.randint(0, 1000, size=n), # 0에서 999 사이의 임의의 정수\n",
" 'bar': np.random.rand(n) # 0과 1 사이의 임의의 부동 소수점 수\n",
"})\n",
"\n",
"selected_number = 500 # 선택할 값\n",
"\n",
"# 기본 슬라이싱\n",
"start_time = time.time()\n",
"df1 = df[df.foo == selected_number]\n",
"basic_slicing_time = time.time() - start_time\n",
"\n",
"# set_index 및 .loc[] 사용\n",
"start_time = time.time()\n",
"df.set_index('foo', inplace=True)\n",
"df2 = df.loc[selected_number]\n",
"set_index_time = time.time() - start_time\n",
"\n",
"# 성능 비교\n",
"print(f\"기본 슬라이싱 시간: {basic_slicing_time:.6f}초\")\n",
"print(f\"set_index 및 .loc[] 사용 시간: {set_index_time:.6f}초\")\n"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1. 데이터를 준비합니다.\n",
"1-1. 네트워크가 로드되었습니다.\n",
"1-2. 테이블들이 로드되었습니다.\n",
"2. 신호이력 테이블을 변환합니다.\n",
"3. 이동류정보 테이블을 변환합니다.\n",
"4. 통합 테이블을 생성합니다.\n",
" node_id start_unix phase_sumo duration state \\\n",
"98 c30 1704436790 0 38 rrrrrr \n",
"99 c30 1704436790 1 39 GGGGGG \n",
"100 c30 1704436790 2 43 GGGGGG \n",
"101 c30 1704436790 3 50 GGGGGG \n",
"164 c30 1704436960 0 38 rrrrrr \n",
"165 c30 1704436960 1 39 GGGGGG \n",
"166 c30 1704436960 2 43 GGGGGG \n",
"167 c30 1704436960 3 50 GGGGGG \n",
"228 c30 1704437130 0 38 rrrrrr \n",
"229 c30 1704437130 1 39 GGGGGG \n",
"230 c30 1704437130 2 43 GGGGGG \n",
"231 c30 1704437130 3 50 GGGGGG \n",
"280 c30 1704437300 0 38 rrrrrr \n",
"281 c30 1704437300 1 39 GGGGGG \n",
"282 c30 1704437300 2 43 GGGGGG \n",
"283 c30 1704437300 3 50 GGGGGG \n",
"65 i0 1704436701 0 43 gGGrgrrrgGGGGrgrr \n",
"66 i0 1704436701 1 45 grrGgrrrgrrrrGgrr \n",
"67 i0 1704436701 2 56 grrrgGGGgrrrrrgrr \n",
"68 i0 1704436701 3 0 grrrgGGrgrrrrrgGr \n",
"69 i0 1704436701 4 37 grrrgrrrgrrrrrgGG \n",
"124 i0 1704436880 0 43 gGGrgrrrgGGGGrgrr \n",
"125 i0 1704436880 1 45 grrGgrrrgrrrrGgrr \n",
"126 i0 1704436880 2 54 grrrgGGGgrrrrrgrr \n",
"127 i0 1704436880 3 0 grrrgGGrgrrrrrgGr \n",
"128 i0 1704436880 4 37 grrrgrrrgrrrrrgGG \n",
"191 i0 1704437060 0 43 gGGrgrrrgGGGGrgrr \n",
"192 i0 1704437060 1 45 grrGgrrrgrrrrGgrr \n",
"193 i0 1704437060 2 55 grrrgGGGgrrrrrgrr \n",
"194 i0 1704437060 3 0 grrrgGGrgrrrrrgGr \n",
"195 i0 1704437060 4 37 grrrgrrrgrrrrrgGG \n",
"62 i1 1704436700 0 37 gGGGGGrgrr \n",
"63 i1 1704436700 1 102 grrGGGGgrr \n",
"64 i1 1704436700 2 40 grrrrrrgGG \n",
"129 i1 1704436880 0 37 gGGGGGrgrr \n",
"130 i1 1704436880 1 103 grrGGGGgrr \n",
"131 i1 1704436880 2 40 grrrrrrgGG \n",
"196 i1 1704437060 0 37 gGGGGGrgrr \n",
"197 i1 1704437060 1 103 grrGGGGgrr \n",
"198 i1 1704437060 2 40 grrrrrrgGG \n",
"258 i1 1704437240 0 37 gGGGGGrgrr \n",
"259 i1 1704437240 1 103 grrGGGGgrr \n",
"260 i1 1704437240 2 40 grrrrrrgGG \n",
"75 i2 1704436709 0 43 GGggGGG \n",
"76 i2 1704436709 1 27 rrggrrr \n",
"77 i2 1704436709 2 69 rrggrrr \n",
"78 i2 1704436709 3 40 rrggGGG \n",
"142 i2 1704436890 0 43 GGggGGG \n",
"143 i2 1704436890 1 27 rrggrrr \n",
"144 i2 1704436890 2 71 rrggrrr \n",
"145 i2 1704436890 3 40 rrggGGG \n",
"204 i2 1704437070 0 43 GGggGGG \n",
"205 i2 1704437070 1 27 rrggrrr \n",
"206 i2 1704437070 2 70 rrggrrr \n",
"207 i2 1704437070 3 40 rrggGGG \n",
"266 i2 1704437250 0 43 GGggGGG \n",
"267 i2 1704437250 1 27 rrggrrr \n",
"268 i2 1704437250 2 70 rrggrrr \n",
"269 i2 1704437250 3 40 rrggGGG \n",
"102 i3 1704436790 0 38 gGGGrgrrrrgGGGrgrrrr \n",
"103 i3 1704436790 1 39 grrrGgrrrrgrrrGgrrrr \n",
"104 i3 1704436790 2 43 grrrrgrrrrgrrrrgGGGG \n",
"105 i3 1704436790 3 50 grrrrgGGGGgrrrrgrrrr \n",
"168 i3 1704436960 0 38 gGGGrgrrrrgGGGrgrrrr \n",
"169 i3 1704436960 1 39 grrrGgrrrrgrrrGgrrrr \n",
"170 i3 1704436960 2 43 grrrrgrrrrgrrrrgGGGG \n",
"171 i3 1704436960 3 50 grrrrgGGGGgrrrrgrrrr \n",
"232 i3 1704437130 0 38 gGGGrgrrrrgGGGrgrrrr \n",
"233 i3 1704437130 1 39 grrrGgrrrrgrrrGgrrrr \n",
"234 i3 1704437130 2 43 grrrrgrrrrgrrrrgGGGG \n",
"235 i3 1704437130 3 50 grrrrgGGGGgrrrrgrrrr \n",
"284 i3 1704437300 0 38 gGGGrgrrrrgGGGrgrrrr \n",
"285 i3 1704437300 1 39 grrrGgrrrrgrrrGgrrrr \n",
"286 i3 1704437300 2 43 grrrrgrrrrgrrrrgGGGG \n",
"287 i3 1704437300 3 50 grrrrgGGGGgrrrrgrrrr \n",
"88 i6 1704436760 0 43 grrrgGGGrgrrgrrr \n",
"89 i6 1704436760 1 0 grrrgGGGrgrrgGGr \n",
"90 i6 1704436760 2 43 grrrgrrrrgrrgGGG \n",
"91 i6 1704436760 3 70 gGGGgrrrrgrrgrrr \n",
"92 i6 1704436760 4 24 grrrgrrrrgGGgrrr \n",
"150 i6 1704436940 0 43 grrrgGGGrgrrgrrr \n",
"151 i6 1704436940 1 0 grrrgGGGrgrrgGGr \n",
"152 i6 1704436940 2 43 grrrgrrrrgrrgGGG \n",
"153 i6 1704436940 3 70 gGGGgrrrrgrrgrrr \n",
"154 i6 1704436940 4 24 grrrgrrrrgGGgrrr \n",
"216 i6 1704437120 0 43 grrrgGGGrgrrgrrr \n",
"217 i6 1704437120 1 0 grrrgGGGrgrrgGGr \n",
"218 i6 1704437120 2 43 grrrgrrrrgrrgGGG \n",
"219 i6 1704437120 3 70 gGGGgrrrrgrrgrrr \n",
"220 i6 1704437120 4 24 grrrgrrrrgGGgrrr \n",
"288 i6 1704437300 0 43 grrrgGGGrgrrgrrr \n",
"289 i6 1704437300 1 0 grrrgGGGrgrrgGGr \n",
"290 i6 1704437300 2 43 grrrgrrrrgrrgGGG \n",
"291 i6 1704437300 3 70 gGGGgrrrrgrrgrrr \n",
"292 i6 1704437300 4 24 grrrgrrrrgGGgrrr \n",
"58 i7 1704436650 0 45 GGrggGG \n",
"59 i7 1704436650 1 53 rrrggrr \n",
"60 i7 1704436650 2 26 GGrggGG \n",
"61 i7 1704436650 3 26 rrrggrr \n",
"118 i7 1704436800 0 45 GGrggGG \n",
"119 i7 1704436800 1 53 rrrggrr \n",
"120 i7 1704436800 2 26 GGrggGG \n",
"121 i7 1704436800 3 26 rrrggrr \n",
"160 i7 1704436950 0 45 GGrggGG \n",
"161 i7 1704436950 1 53 rrrggrr \n",
"162 i7 1704436950 2 26 GGrggGG \n",
"163 i7 1704436950 3 26 rrrggrr \n",
"212 i7 1704437100 0 45 GGrggGG \n",
"213 i7 1704437100 1 53 rrrggrr \n",
"214 i7 1704437100 2 26 GGrggGG \n",
"215 i7 1704437100 3 26 rrrggrr \n",
"83 i8 1704436710 0 33 grrrrrrrgGGGgrrr \n",
"84 i8 1704436710 1 36 grrrrrrrgrrrgGGG \n",
"85 i8 1704436710 2 25 grrrGGGrgrrrgGGr \n",
"86 i8 1704436710 3 58 grrrGGGGgrrrgrrr \n",
"87 i8 1704436710 4 18 gGGGrrrrgrrrgrrr \n",
"132 i8 1704436880 0 33 grrrrrrrgGGGgrrr \n",
"133 i8 1704436880 1 36 grrrrrrrgrrrgGGG \n",
"134 i8 1704436880 2 25 grrrGGGrgrrrgGGr \n",
"135 i8 1704436880 3 58 grrrGGGGgrrrgrrr \n",
"136 i8 1704436880 4 18 gGGGrrrrgrrrgrrr \n",
"186 i8 1704437050 0 33 grrrrrrrgGGGgrrr \n",
"187 i8 1704437050 1 36 grrrrrrrgrrrgGGG \n",
"188 i8 1704437050 2 25 grrrGGGrgrrrgGGr \n",
"189 i8 1704437050 3 58 grrrGGGGgrrrgrrr \n",
"190 i8 1704437050 4 18 gGGGrrrrgrrrgrrr \n",
"248 i8 1704437220 0 33 grrrrrrrgGGGgrrr \n",
"249 i8 1704437220 1 36 grrrrrrrgrrrgGGG \n",
"250 i8 1704437220 2 25 grrrGGGrgrrrgGGr \n",
"251 i8 1704437220 3 58 grrrGGGGgrrrgrrr \n",
"252 i8 1704437220 4 18 gGGGrrrrgrrrgrrr \n",
"56 i9 1704436640 0 46 GGGG \n",
"57 i9 1704436640 1 115 rrrr \n",
"122 i9 1704436800 0 46 GGGG \n",
"123 i9 1704436800 1 114 rrrr \n",
"172 i9 1704436960 0 46 GGGG \n",
"173 i9 1704436960 1 114 rrrr \n",
"221 i9 1704437120 0 46 GGGG \n",
"222 i9 1704437120 1 114 rrrr \n",
"70 u00 1704436701 0 43 ggggrgggg \n",
"71 u00 1704436701 1 45 ggggGgggg \n",
"72 u00 1704436701 2 56 ggggrgggg \n",
"73 u00 1704436701 3 0 ggggrgggg \n",
"74 u00 1704436701 4 37 ggggrgggg \n",
"137 u00 1704436880 0 43 ggggrgggg \n",
"138 u00 1704436880 1 45 ggggGgggg \n",
"139 u00 1704436880 2 54 ggggrgggg \n",
"140 u00 1704436880 3 0 ggggrgggg \n",
"141 u00 1704436880 4 37 ggggrgggg \n",
"199 u00 1704437060 0 43 ggggrgggg \n",
"200 u00 1704437060 1 45 ggggGgggg \n",
"201 u00 1704437060 2 55 ggggrgggg \n",
"202 u00 1704437060 3 0 ggggrgggg \n",
"203 u00 1704437060 4 37 ggggrgggg \n",
"79 u20 1704436709 0 43 ggrggg \n",
"80 u20 1704436709 1 27 ggrggg \n",
"81 u20 1704436709 2 69 ggGggg \n",
"82 u20 1704436709 3 40 ggrggg \n",
"146 u20 1704436890 0 43 ggrggg \n",
"147 u20 1704436890 1 27 ggrggg \n",
"148 u20 1704436890 2 71 ggGggg \n",
"149 u20 1704436890 3 40 ggrggg \n",
"208 u20 1704437070 0 43 ggrggg \n",
"209 u20 1704437070 1 27 ggrggg \n",
"210 u20 1704437070 2 70 ggGggg \n",
"211 u20 1704437070 3 40 ggrggg \n",
"274 u20 1704437250 0 43 ggrggg \n",
"275 u20 1704437250 1 27 ggrggg \n",
"276 u20 1704437250 2 70 ggGggg \n",
"277 u20 1704437250 3 40 ggrggg \n",
"106 u30 1704436790 0 38 ggggrggg \n",
"107 u30 1704436790 1 39 ggggrggg \n",
"108 u30 1704436790 2 43 ggggrggg \n",
"109 u30 1704436790 3 50 ggggGggg \n",
"174 u30 1704436960 0 38 ggggrggg \n",
"175 u30 1704436960 1 39 ggggrggg \n",
"176 u30 1704436960 2 43 ggggrggg \n",
"177 u30 1704436960 3 50 ggggGggg \n",
"236 u30 1704437130 0 38 ggggrggg \n",
"237 u30 1704437130 1 39 ggggrggg \n",
"238 u30 1704437130 2 43 ggggrggg \n",
"239 u30 1704437130 3 50 ggggGggg \n",
"293 u30 1704437300 0 38 ggggrggg \n",
"294 u30 1704437300 1 39 ggggrggg \n",
"295 u30 1704437300 2 43 ggggrggg \n",
"296 u30 1704437300 3 50 ggggGggg \n",
"110 u31 1704436790 0 38 ggggGggg \n",
"111 u31 1704436790 1 39 ggggrggg \n",
"112 u31 1704436790 2 43 ggggrggg \n",
"113 u31 1704436790 3 50 ggggrggg \n",
"178 u31 1704436960 0 38 ggggGggg \n",
"179 u31 1704436960 1 39 ggggrggg \n",
"180 u31 1704436960 2 43 ggggrggg \n",
"181 u31 1704436960 3 50 ggggrggg \n",
"240 u31 1704437130 0 38 ggggGggg \n",
"241 u31 1704437130 1 39 ggggrggg \n",
"242 u31 1704437130 2 43 ggggrggg \n",
"243 u31 1704437130 3 50 ggggrggg \n",
"297 u31 1704437300 0 38 ggggGggg \n",
"298 u31 1704437300 1 39 ggggrggg \n",
"299 u31 1704437300 2 43 ggggrggg \n",
"300 u31 1704437300 3 50 ggggrggg \n",
"114 u32 1704436790 0 38 gggggggG \n",
"115 u32 1704436790 1 39 gggggggr \n",
"116 u32 1704436790 2 43 gggggggr \n",
"117 u32 1704436790 3 50 gggggggr \n",
"182 u32 1704436960 0 38 gggggggG \n",
"183 u32 1704436960 1 39 gggggggr \n",
"184 u32 1704436960 2 43 gggggggr \n",
"185 u32 1704436960 3 50 gggggggr \n",
"244 u32 1704437130 0 38 gggggggG \n",
"245 u32 1704437130 1 39 gggggggr \n",
"246 u32 1704437130 2 43 gggggggr \n",
"247 u32 1704437130 3 50 gggggggr \n",
"301 u32 1704437300 0 38 gggggggG \n",
"302 u32 1704437300 1 39 gggggggr \n",
"303 u32 1704437300 2 43 gggggggr \n",
"304 u32 1704437300 3 50 gggggggr \n",
"93 u60 1704436760 0 43 ggggggggr \n",
"94 u60 1704436760 1 0 ggggggggr \n",
"95 u60 1704436760 2 43 ggggggggG \n",
"96 u60 1704436760 3 70 ggggggggr \n",
"97 u60 1704436760 4 24 ggggggggr \n",
"155 u60 1704436940 0 43 ggggggggr \n",
"156 u60 1704436940 1 0 ggggggggr \n",
"157 u60 1704436940 2 43 ggggggggG \n",
"158 u60 1704436940 3 70 ggggggggr \n",
"159 u60 1704436940 4 24 ggggggggr \n",
"223 u60 1704437120 0 43 ggggggggr \n",
"224 u60 1704437120 1 0 ggggggggr \n",
"225 u60 1704437120 2 43 ggggggggG \n",
"226 u60 1704437120 3 70 ggggggggr \n",
"227 u60 1704437120 4 24 ggggggggr \n",
"305 u60 1704437300 0 43 ggggggggr \n",
"306 u60 1704437300 1 0 ggggggggr \n",
"307 u60 1704437300 2 43 ggggggggG \n",
"308 u60 1704437300 3 70 ggggggggr \n",
"309 u60 1704437300 4 24 ggggggggr \n",
"\n",
" start_dt \n",
"98 2024-01-05 15:39:50 \n",
"99 2024-01-05 15:39:50 \n",
"100 2024-01-05 15:39:50 \n",
"101 2024-01-05 15:39:50 \n",
"164 2024-01-05 15:42:40 \n",
"165 2024-01-05 15:42:40 \n",
"166 2024-01-05 15:42:40 \n",
"167 2024-01-05 15:42:40 \n",
"228 2024-01-05 15:45:30 \n",
"229 2024-01-05 15:45:30 \n",
"230 2024-01-05 15:45:30 \n",
"231 2024-01-05 15:45:30 \n",
"280 2024-01-05 15:48:20 \n",
"281 2024-01-05 15:48:20 \n",
"282 2024-01-05 15:48:20 \n",
"283 2024-01-05 15:48:20 \n",
"65 2024-01-05 15:38:21 \n",
"66 2024-01-05 15:38:21 \n",
"67 2024-01-05 15:38:21 \n",
"68 2024-01-05 15:38:21 \n",
"69 2024-01-05 15:38:21 \n",
"124 2024-01-05 15:41:20 \n",
"125 2024-01-05 15:41:20 \n",
"126 2024-01-05 15:41:20 \n",
"127 2024-01-05 15:41:20 \n",
"128 2024-01-05 15:41:20 \n",
"191 2024-01-05 15:44:20 \n",
"192 2024-01-05 15:44:20 \n",
"193 2024-01-05 15:44:20 \n",
"194 2024-01-05 15:44:20 \n",
"195 2024-01-05 15:44:20 \n",
"62 2024-01-05 15:38:20 \n",
"63 2024-01-05 15:38:20 \n",
"64 2024-01-05 15:38:20 \n",
"129 2024-01-05 15:41:20 \n",
"130 2024-01-05 15:41:20 \n",
"131 2024-01-05 15:41:20 \n",
"196 2024-01-05 15:44:20 \n",
"197 2024-01-05 15:44:20 \n",
"198 2024-01-05 15:44:20 \n",
"258 2024-01-05 15:47:20 \n",
"259 2024-01-05 15:47:20 \n",
"260 2024-01-05 15:47:20 \n",
"75 2024-01-05 15:38:29 \n",
"76 2024-01-05 15:38:29 \n",
"77 2024-01-05 15:38:29 \n",
"78 2024-01-05 15:38:29 \n",
"142 2024-01-05 15:41:30 \n",
"143 2024-01-05 15:41:30 \n",
"144 2024-01-05 15:41:30 \n",
"145 2024-01-05 15:41:30 \n",
"204 2024-01-05 15:44:30 \n",
"205 2024-01-05 15:44:30 \n",
"206 2024-01-05 15:44:30 \n",
"207 2024-01-05 15:44:30 \n",
"266 2024-01-05 15:47:30 \n",
"267 2024-01-05 15:47:30 \n",
"268 2024-01-05 15:47:30 \n",
"269 2024-01-05 15:47:30 \n",
"102 2024-01-05 15:39:50 \n",
"103 2024-01-05 15:39:50 \n",
"104 2024-01-05 15:39:50 \n",
"105 2024-01-05 15:39:50 \n",
"168 2024-01-05 15:42:40 \n",
"169 2024-01-05 15:42:40 \n",
"170 2024-01-05 15:42:40 \n",
"171 2024-01-05 15:42:40 \n",
"232 2024-01-05 15:45:30 \n",
"233 2024-01-05 15:45:30 \n",
"234 2024-01-05 15:45:30 \n",
"235 2024-01-05 15:45:30 \n",
"284 2024-01-05 15:48:20 \n",
"285 2024-01-05 15:48:20 \n",
"286 2024-01-05 15:48:20 \n",
"287 2024-01-05 15:48:20 \n",
"88 2024-01-05 15:39:20 \n",
"89 2024-01-05 15:39:20 \n",
"90 2024-01-05 15:39:20 \n",
"91 2024-01-05 15:39:20 \n",
"92 2024-01-05 15:39:20 \n",
"150 2024-01-05 15:42:20 \n",
"151 2024-01-05 15:42:20 \n",
"152 2024-01-05 15:42:20 \n",
"153 2024-01-05 15:42:20 \n",
"154 2024-01-05 15:42:20 \n",
"216 2024-01-05 15:45:20 \n",
"217 2024-01-05 15:45:20 \n",
"218 2024-01-05 15:45:20 \n",
"219 2024-01-05 15:45:20 \n",
"220 2024-01-05 15:45:20 \n",
"288 2024-01-05 15:48:20 \n",
"289 2024-01-05 15:48:20 \n",
"290 2024-01-05 15:48:20 \n",
"291 2024-01-05 15:48:20 \n",
"292 2024-01-05 15:48:20 \n",
"58 2024-01-05 15:37:30 \n",
"59 2024-01-05 15:37:30 \n",
"60 2024-01-05 15:37:30 \n",
"61 2024-01-05 15:37:30 \n",
"118 2024-01-05 15:40:00 \n",
"119 2024-01-05 15:40:00 \n",
"120 2024-01-05 15:40:00 \n",
"121 2024-01-05 15:40:00 \n",
"160 2024-01-05 15:42:30 \n",
"161 2024-01-05 15:42:30 \n",
"162 2024-01-05 15:42:30 \n",
"163 2024-01-05 15:42:30 \n",
"212 2024-01-05 15:45:00 \n",
"213 2024-01-05 15:45:00 \n",
"214 2024-01-05 15:45:00 \n",
"215 2024-01-05 15:45:00 \n",
"83 2024-01-05 15:38:30 \n",
"84 2024-01-05 15:38:30 \n",
"85 2024-01-05 15:38:30 \n",
"86 2024-01-05 15:38:30 \n",
"87 2024-01-05 15:38:30 \n",
"132 2024-01-05 15:41:20 \n",
"133 2024-01-05 15:41:20 \n",
"134 2024-01-05 15:41:20 \n",
"135 2024-01-05 15:41:20 \n",
"136 2024-01-05 15:41:20 \n",
"186 2024-01-05 15:44:10 \n",
"187 2024-01-05 15:44:10 \n",
"188 2024-01-05 15:44:10 \n",
"189 2024-01-05 15:44:10 \n",
"190 2024-01-05 15:44:10 \n",
"248 2024-01-05 15:47:00 \n",
"249 2024-01-05 15:47:00 \n",
"250 2024-01-05 15:47:00 \n",
"251 2024-01-05 15:47:00 \n",
"252 2024-01-05 15:47:00 \n",
"56 2024-01-05 15:37:20 \n",
"57 2024-01-05 15:37:20 \n",
"122 2024-01-05 15:40:00 \n",
"123 2024-01-05 15:40:00 \n",
"172 2024-01-05 15:42:40 \n",
"173 2024-01-05 15:42:40 \n",
"221 2024-01-05 15:45:20 \n",
"222 2024-01-05 15:45:20 \n",
"70 2024-01-05 15:38:21 \n",
"71 2024-01-05 15:38:21 \n",
"72 2024-01-05 15:38:21 \n",
"73 2024-01-05 15:38:21 \n",
"74 2024-01-05 15:38:21 \n",
"137 2024-01-05 15:41:20 \n",
"138 2024-01-05 15:41:20 \n",
"139 2024-01-05 15:41:20 \n",
"140 2024-01-05 15:41:20 \n",
"141 2024-01-05 15:41:20 \n",
"199 2024-01-05 15:44:20 \n",
"200 2024-01-05 15:44:20 \n",
"201 2024-01-05 15:44:20 \n",
"202 2024-01-05 15:44:20 \n",
"203 2024-01-05 15:44:20 \n",
"79 2024-01-05 15:38:29 \n",
"80 2024-01-05 15:38:29 \n",
"81 2024-01-05 15:38:29 \n",
"82 2024-01-05 15:38:29 \n",
"146 2024-01-05 15:41:30 \n",
"147 2024-01-05 15:41:30 \n",
"148 2024-01-05 15:41:30 \n",
"149 2024-01-05 15:41:30 \n",
"208 2024-01-05 15:44:30 \n",
"209 2024-01-05 15:44:30 \n",
"210 2024-01-05 15:44:30 \n",
"211 2024-01-05 15:44:30 \n",
"274 2024-01-05 15:47:30 \n",
"275 2024-01-05 15:47:30 \n",
"276 2024-01-05 15:47:30 \n",
"277 2024-01-05 15:47:30 \n",
"106 2024-01-05 15:39:50 \n",
"107 2024-01-05 15:39:50 \n",
"108 2024-01-05 15:39:50 \n",
"109 2024-01-05 15:39:50 \n",
"174 2024-01-05 15:42:40 \n",
"175 2024-01-05 15:42:40 \n",
"176 2024-01-05 15:42:40 \n",
"177 2024-01-05 15:42:40 \n",
"236 2024-01-05 15:45:30 \n",
"237 2024-01-05 15:45:30 \n",
"238 2024-01-05 15:45:30 \n",
"239 2024-01-05 15:45:30 \n",
"293 2024-01-05 15:48:20 \n",
"294 2024-01-05 15:48:20 \n",
"295 2024-01-05 15:48:20 \n",
"296 2024-01-05 15:48:20 \n",
"110 2024-01-05 15:39:50 \n",
"111 2024-01-05 15:39:50 \n",
"112 2024-01-05 15:39:50 \n",
"113 2024-01-05 15:39:50 \n",
"178 2024-01-05 15:42:40 \n",
"179 2024-01-05 15:42:40 \n",
"180 2024-01-05 15:42:40 \n",
"181 2024-01-05 15:42:40 \n",
"240 2024-01-05 15:45:30 \n",
"241 2024-01-05 15:45:30 \n",
"242 2024-01-05 15:45:30 \n",
"243 2024-01-05 15:45:30 \n",
"297 2024-01-05 15:48:20 \n",
"298 2024-01-05 15:48:20 \n",
"299 2024-01-05 15:48:20 \n",
"300 2024-01-05 15:48:20 \n",
"114 2024-01-05 15:39:50 \n",
"115 2024-01-05 15:39:50 \n",
"116 2024-01-05 15:39:50 \n",
"117 2024-01-05 15:39:50 \n",
"182 2024-01-05 15:42:40 \n",
"183 2024-01-05 15:42:40 \n",
"184 2024-01-05 15:42:40 \n",
"185 2024-01-05 15:42:40 \n",
"244 2024-01-05 15:45:30 \n",
"245 2024-01-05 15:45:30 \n",
"246 2024-01-05 15:45:30 \n",
"247 2024-01-05 15:45:30 \n",
"301 2024-01-05 15:48:20 \n",
"302 2024-01-05 15:48:20 \n",
"303 2024-01-05 15:48:20 \n",
"304 2024-01-05 15:48:20 \n",
"93 2024-01-05 15:39:20 \n",
"94 2024-01-05 15:39:20 \n",
"95 2024-01-05 15:39:20 \n",
"96 2024-01-05 15:39:20 \n",
"97 2024-01-05 15:39:20 \n",
"155 2024-01-05 15:42:20 \n",
"156 2024-01-05 15:42:20 \n",
"157 2024-01-05 15:42:20 \n",
"158 2024-01-05 15:42:20 \n",
"159 2024-01-05 15:42:20 \n",
"223 2024-01-05 15:45:20 \n",
"224 2024-01-05 15:45:20 \n",
"225 2024-01-05 15:45:20 \n",
"226 2024-01-05 15:45:20 \n",
"227 2024-01-05 15:45:20 \n",
"305 2024-01-05 15:48:20 \n",
"306 2024-01-05 15:48:20 \n",
"307 2024-01-05 15:48:20 \n",
"308 2024-01-05 15:48:20 \n",
"309 2024-01-05 15:48:20 \n"
]
}
],
"source": [
"from generate_signals import SignalGenerator\n",
"self = SignalGenerator()\n",
"\n",
"self.prepare_data()\n",
"self.process_history()\n",
"self.process_movement()\n",
"self.make_histids()\n",
"\n",
"self.initialize_states()\n",
"self.assign_signals()\n",
"self.set_timepoints()\n",
"\n",
"with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n",
" print(self.Sigtable)"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" node_id start_unix phase_sumo duration state start_dt\n",
"0 True True True True True True\n",
"1 True True True True True True\n",
"2 True True False False False True\n",
"3 True True False False False True\n",
"4 True True True True True True\n",
".. ... ... ... ... ... ...\n",
"679 True True False False True True\n",
"680 True True False False True True\n",
"681 True True True True True True\n",
"682 True True False False True True\n",
"683 True True False False True True\n",
"\n",
"[684 rows x 6 columns]\n"
]
}
],
"source": [
"self.SIGTABLE = []\n",
"for _, group in self.Sigtable.groupby('node_id'):\n",
" new_rows_list = []\n",
" for i in range(1, len(group)):\n",
" prev_row = group.iloc[i-1:i].copy()\n",
" next_row = group.iloc[i:i+1].copy()\n",
" new_rows = pd.concat([prev_row, prev_row, next_row]).reset_index(drop=True)\n",
" new_rows.loc[0, 'phase_sumo'] = str(prev_row.phase_sumo.iloc[0]) + '_g'\n",
" new_rows.loc[0, 'duration'] = new_rows.loc[0, 'duration'] - 5\n",
" new_rows.loc[1, 'phase_sumo'] = str(prev_row.phase_sumo.iloc[0]) + '_y'\n",
" new_rows.loc[1, 'duration'] = 4\n",
" yellow_state = ''\n",
" red_state = ''\n",
" for a, b in zip(prev_row.state.iloc[0], next_row.state.iloc[0]):\n",
" if a == 'G' and b == 'r':\n",
" yellow_state += 'y'\n",
" red_state += 'r'\n",
" else:\n",
" yellow_state += a\n",
" red_state += a\n",
" new_rows.loc[2, 'phase_sumo'] = str(next_row.phase_sumo.iloc[0]) + '__r'\n",
" new_rows.loc[2, 'duration'] = 1\n",
" new_rows.loc[1, 'state'] = yellow_state\n",
" new_rows.loc[2, 'state'] = red_state\n",
" new_rows_list.append(new_rows)\n",
" next_row['phase_sumo'] = str(next_row.phase_sumo.iloc[0]) + '_g'\n",
" next_row['duration'] -= 5\n",
" new_rows_list.append(next_row)\n",
" new_rows = pd.concat(new_rows_list)\n",
" self.SIGTABLE.append(new_rows)\n",
"self.SIGTABLE = pd.concat(self.SIGTABLE).sort_values(by=['node_id', 'start_unix', 'phase_sumo']).reset_index(drop=True)\n",
"df1 = self.SIGTABLE\n",
"\n",
"import pandas as pd\n",
"\n",
"# 원본 데이터프레임: self.Sigtable\n",
"# 예제에서는 self.Sigtable이 이미 정의되어 있다고 가정합니다.\n",
"\n",
"# 변환 과정을 최적화하기 위해 먼저 필요한 새로운 행을 계산하는 함수를 정의합니다.\n",
"def create_transformed_rows(group):\n",
" transformed_rows = []\n",
" for i in range(1, len(group)):\n",
" prev_row = group.iloc[i-1]\n",
" next_row = group.iloc[i]\n",
"\n",
" # 노란색과 빨간색 상태 계산\n",
" yellow_state = ''.join(['y' if a == 'G' and b == 'r' else a for a, b in zip(prev_row['state'], next_row['state'])])\n",
" red_state = ''.join(['r' if a == 'G' and b == 'r' else a for a, b in zip(prev_row['state'], next_row['state'])])\n",
"\n",
" # 새로운 행들을 생성\n",
" new_rows = [\n",
" {**prev_row, 'phase_sumo': f\"{prev_row['phase_sumo']}_g\", 'duration': prev_row['duration'] - 5},\n",
" {**prev_row, 'phase_sumo': f\"{prev_row['phase_sumo']}_y\", 'duration': 4, 'state': yellow_state},\n",
" {**next_row, 'phase_sumo': f\"{next_row['phase_sumo']}_r\", 'duration': 1, 'state': red_state}\n",
" ]\n",
" transformed_rows.extend(new_rows)\n",
"\n",
" # 마지막 행에 대한 처리\n",
" last_row = group.iloc[-1].copy()\n",
" last_row['phase_sumo'] = f\"{last_row['phase_sumo']}_g\"\n",
" last_row['duration'] -= 5\n",
" transformed_rows.append(last_row)\n",
"\n",
" return pd.DataFrame(transformed_rows)\n",
"\n",
"# 각 그룹별로 변환 함수 적용\n",
"transformed_groups = [create_transformed_rows(group) for _, group in self.Sigtable.groupby('node_id')]\n",
"\n",
"# 변환된 그룹들을 하나의 DataFrame으로 결합\n",
"self.SIGTABLE = pd.concat(transformed_groups).reset_index(drop=True)\n",
"\n",
"# 결과 DataFrame 정렬\n",
"self.SIGTABLE = self.SIGTABLE.sort_values(by=['node_id', 'start_unix', 'phase_sumo']).reset_index(drop=True)\n",
"\n",
"\n",
"df2 = self.SIGTABLE\n",
"\n",
"print(df1==df2)"
]
},
{
"cell_type": "code",
"execution_count": 216,
"metadata": {},
"outputs": [],
"source": [
"# from generate_signals import SignalGenerator\n",
"# self = SignalGenerator()\n",
"\n",
"# self.prepare_data()\n",
"# self.process_history()\n",
"# self.process_movement()\n",
"# self.merge_dfs()\n",
"\n",
"# with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n",
"# display(self.histid)\n",
"# display(self.match6)\n",
"# print(self.parent_ids)\n",
"# print(self.pa2ch)\n",
"\n",
"new_histids = []\n",
"for parent_id in self.parent_ids:\n",
" for child_id in self.pa2ch[parent_id]:\n",
" new_histid = self.histid.copy()[self.histid.node_id==parent_id]\n",
" new_histid[['inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']] = np.nan\n",
" for row in new_histid.itertuples(index=True):\n",
" phas_A = row.phas_A\n",
" phas_B = row.phas_B\n",
" new_match = self.match6[self.match6.node_id==child_id]\n",
" Arow = new_match[(new_match.phase_no==phas_A) & (new_match.ring_type=='A')]\n",
" if not Arow[['inc_edge', 'out_edge']].isna().all().all():\n",
" inc_edge = Arow.iloc[0].inc_edge\n",
" out_edge = Arow.iloc[0].out_edge\n",
" new_histid.loc[row.Index, ['inc_edge_A', 'out_edge_A']] = [inc_edge, out_edge]\n",
" Brow = new_match[(new_match.phase_no==phas_B) & (new_match.ring_type=='B')]\n",
" if not Brow[['inc_edge', 'out_edge']].isna().all().all():\n",
" inc_edge = Brow.iloc[0].inc_edge\n",
" out_edge = Brow.iloc[0].out_edge\n",
" new_histid.loc[row.Index, ['inc_edge_B', 'out_edge_B']] = [inc_edge, out_edge]\n",
" new_histid.loc[row.Index, 'node_id'] = child_id\n",
" new_histids.append(new_histid)\n",
"new_histids = pd.concat(new_histids)\n",
"self.histids = pd.concat([self.histid.copy(), new_histids])\n",
"self.histids = self.histids.sort_values(by=['start_unix', 'node_id', 'phas_A', 'phas_B']).reset_index(drop=True)\n",
"\n",
"# df1 = self.histids\n",
"\n",
"# # self.match6에 대해 set_index를 사용해 인덱스 설정\n",
"# new_match_indexed = new_match.set_index(['phase_no', 'ring_type'])\n",
"\n",
"# new_histids = []\n",
"# for parent_id in self.parent_ids:\n",
"# for child_id in self.pa2ch[parent_id]:\n",
"# new_histid = self.histid.copy()[self.histid.node_id==parent_id]\n",
"# new_histid[['inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']] = np.nan\n",
"# for row in new_histid.itertuples(index=True):\n",
"# phas_A = row.phas_A\n",
"# phas_B = row.phas_B\n",
"# new_match = self.match6[self.match6.node_id==child_id]\n",
"# Arow = new_match_indexed.loc[(phas_A, 'A')]\n",
"# if len(Arow.shape) == 1:\n",
"# Arow.inc_edge\n",
"# else:\n",
"# Arow = Arow.reset_index(\n",
"# Arow.at[idx,'inc_edge'] \n",
"# )\n",
"# print(Arow)\n",
"# if not Arow[['inc_edge', 'out_edge']].isna().all().all():\n",
"# inc_edge = Arow.inc_edge Arow['inc_edge']\n",
"# out_edge = Arow.out_edge\n",
"# new_histid.loc[row.Index, ['inc_edge_A', 'out_edge_A']] = [inc_edge, out_edge]\n",
"# Brow = new_match_indexed.loc[(phas_B, 'B')]\n",
"# if not Brow[['inc_edge', 'out_edge']].isna().all().all():\n",
"# inc_edge = Brow.inc_edge\n",
"# out_edge = Brow.out_edge\n",
"# new_histid.loc[row.Index, ['inc_edge_B', 'out_edge_B']] = [inc_edge, out_edge]\n",
"# new_histid.loc[row.Index, 'node_id'] = child_id\n",
"# new_histids.append(new_histid)\n",
"# new_histids = pd.concat(new_histids)\n",
"# self.histids = pd.concat([self.histid.copy(), new_histids])\n",
"# self.histids = self.histids.sort_values(by=['start_unix', 'node_id', 'phas_A', 'phas_B']).reset_index(drop=True)\n",
"# df2 = self.histids\n",
"# # 인덱스 재설정 전에 원래 상태로 되돌림\n",
"# self.match6.reset_index(inplace=True)\n",
"# display(df1)\n",
"# display(df2)\n",
"# df1 == df2"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"nan\n"
]
}
],
"source": [
"new_histids = []\n",
"match_set = self.match6.set_index(['phase_no','ring_type','node_id'])\n",
"for parent_id in self.parent_ids:\n",
" for child_id in self.pa2ch[parent_id]:\n",
" new_histid = self.histid.copy()[self.histid.node_id==parent_id]\n",
" new_histid[['inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']] = np.nan\n",
" for row in new_histid.itertuples(index=True):\n",
" phas_A = row.phas_A\n",
" phas_B = row.phas_B\n",
" Arow = match_set.loc[(phas_A,'A',child_id)]\n",
" if (Arow.inc_edge == np.NaN)&(Arow.out_edge==np.NaN):\n",
" inc_edge = Arow.iloc[0].inc_edge\n",
" out_edge = Arow.iloc[0].out_edge\n",
" new_histid.loc[row.Index, ['inc_edge_A', 'out_edge_A']] = [inc_edge, out_edge]\n",
" Brow = match_set.loc[(phas_B,'B',child_id)]\n",
" if (Brow.inc_edge == np.NaN)&(Brow.out_edge==np.NaN):\n",
" inc_edge = Brow.iloc[0].inc_edge\n",
" out_edge = Brow.iloc[0].out_edge\n",
" new_histid.loc[row.Index, ['inc_edge_B', 'out_edge_B']] = [inc_edge, out_edge]\n",
" new_histid.loc[row.Index, 'node_id'] = child_id\n",
" new_histids.append(new_histid)\n",
" # break\n",
"new_histids = pd.concat(new_histids)\n",
"self.histids = pd.concat([self.histid.copy(), new_histids])\n",
"self.histids = self.histids.sort_values(by=['start_unix', 'node_id', 'phas_A', 'phas_B']).reset_index(drop=True)\n",
"\n",
"df1 = self.histids\n",
"\n",
"print(Arow.inc_edge)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "rts",
"language": "python",
"name": "rts"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}