deleted archive folder. Maybe git can restore all the past information and archive folder is unnecessary

1 year ago · bbd6096015
--- a/Archives/Scripts/fetch_tables_1.py
+++ b/Archives/Scripts/fetch_tables_1.py
@ -1,40 +0,0 @@
 # (siggen) PS C:\Github\siggen> python .\Scripts\fetch_tables.py
 import pyodbc
 import os, json, csv
 from tqdm import tqdm
 from datetime import datetime

 starting_time = datetime.now()

 credentials_path = os.path.join(os.getcwd(), 'Scripts', 'credentials.json')
 with open(credentials_path, 'r') as file:
    credentials = json.load(file)

 DSNNAME = credentials["DSNNAME"]
 DBUSER = credentials["DBUSER"]
 DBPWD = credentials["DBPWD"]

 # 데이터베이스 연결
 cnxn = pyodbc.connect(f'DSN={DSNNAME};UID={DBUSER};PWD={DBPWD};charset=utf-8')
 cursor = cnxn.cursor()

 schema = 'snits_siggen'
 tables = ['inter_info', 'plan']

 base_dir = os.path.join(os.getcwd(), 'Data', 'fetched_tables')

 for table in tables:
    # 테이블 데이터 다운로드
    cursor.execute(f"SELECT * FROM {schema}.{table}")

    csv_file_path = os.path.join(base_dir, f"{table}.csv")
    with open(csv_file_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
        csv_writer = csv.writer(csvfile)
        columns = [column[0] for column in cursor.description]
        csv_writer.writerow(columns)
        for row in cursor.fetchall():
            csv_writer.writerow(row)

    cnxn.close()

 print("elapsed time :", datetime.now() - starting_time)
--- a/Archives/Scripts/generate_signals.ipynb
+++ b/Archives/Scripts/generate_signals.ipynb
--- a/Archives/Scripts/generate_signals_2.py
+++ b/Archives/Scripts/generate_signals_2.py
@ -1,826 +0,0 @@
 # (siggen) PS C:\Github\snits_siggen> python .\Scripts\generate_signals.py
 import pandas as pd
 import numpy as np
 import os, sys
 import json
 import copy
 from tqdm import tqdm
 import sumolib, traci
 from datetime import datetime
 import time

 class SignalGenerator():
    def __init__(self):
        # 루트폴더 지정
        self.path_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        with open(os.path.join(self.path_root, 'Scripts', 'config.json'), 'r') as config_file:
            config = json.load(config_file)
        # 주요 폴더 경로 지정
        self.paths = config['paths']
        self.path_data = os.path.join(self.path_root, *self.paths['data'])
        self.path_intermediates = os.path.join(self.path_root, *self.paths['intermediates'])
        self.path_results = os.path.join(self.path_root, *self.paths['results'])
        self.path_tables = os.path.join(self.path_root, *self.paths['tables'])
        self.path_networks = os.path.join(self.path_root, *self.paths['networks'])
        self.path_scripts = os.path.join(self.path_root, *self.paths['scripts'])
        # 이슈사항 목록
        self.issues = []

        self.midnight = int(datetime(2024, 1, 5, 0, 0, 0).timestamp())
        self.next_day = int(datetime(2024, 1, 6, 0, 0, 0).timestamp())
        self.fsecs = range(self.midnight, self.next_day, 5) # fsecs : unix time by Five SECondS
        self.fmins = range(self.midnight, self.next_day, 300) # fmins : unix time by Five MINuteS

        self.present_time = datetime.now().replace(month=1, day=5).timestamp()
        self.present_time = max([fmin for fmin in list(self.fmins) if fmin <= self.present_time])

        self.adder = 600

    # 1. 데이터 준비
    def prepare_data(self):
        print("1. 데이터를 준비합니다.")
        self.load_networks()
        self.load_tables()
        # self.check_networks()
        # self.check_tables()
        self.prepare_auxiliaries()

    # 1-1. 네트워크 불러오기
    def load_networks(self):
        self.net = sumolib.net.readNet(os.path.join(self.path_networks, 'sn.net.xml'))
        print("1-1. 네트워크가 로드되었습니다.")

    # 1-2. 테이블 불러오기
    def load_tables(self):
        # 모든 컬럼에 대하여 데이터타입 지정
        loading_dtype = {
            'inter_no':'int', 'start_hour':'int', 'start_minute':'int', 'cycle':'int','offset':'int',
            'node_id':'str', 'inter_type':'str', 'parent_id':'str','child_id':'str',
            'direction':'str', 'condition':'str', 'inc_edge':'str', 'out_edge':'str',
            'end_unix':'int', 'inter_name':'str', 'inter_lat':'float', 'inter_lon':'float',
            'group_no':'int', 'main_phase_no':'int', 'phase_no':'int','ring_type':'str'
            }
        for alph in ['A', 'B']:
            for j in range(1,9):
                loading_dtype[f'angle_{alph}{j}'] = 'str'
                loading_dtype[f'dura_{alph}{j}'] = 'int'

        # 테이블 불러오기
        self.inter_info = pd.read_csv(os.path.join(self.path_tables, 'inter_info.csv'), dtype=loading_dtype)
        self.plan       = pd.read_csv(os.path.join(self.path_tables, 'plan.csv'), dtype=loading_dtype)
        self.history    = pd.read_csv(os.path.join(self.path_tables, 'history.csv'), dtype=loading_dtype)
        self.inter_node = pd.read_csv(os.path.join(self.path_tables, 'inter_node.csv'), dtype=loading_dtype)
        self.matching   = pd.read_csv(os.path.join(self.path_intermediates, 'matching.csv'), dtype=loading_dtype)
        self.match1  = pd.read_csv(os.path.join(self.path_intermediates, 'match1.csv'), dtype=loading_dtype)
        self.match6     = pd.read_csv(os.path.join(self.path_intermediates, 'match6.csv'), dtype=loading_dtype)
        self.match6     = self.match6[['node_id', 'phase_no', 'ring_type', 'inc_edge', 'out_edge']].reset_index(drop=True)

        # 교차로목록 정의
        self.inter_nos = sorted(self.inter_info.inter_no.unique())
        print("1-2. 테이블들이 로드되었습니다.")
    
    # 1-3. 네트워크 무결성 검사
    def check_networks(self):
        # https://sumo.dlr.de/docs/Netedit/neteditUsageExamples.html#simplify_tls_program_state_after_changing_connections
        if 'SUMO_HOME' in os.environ:
            tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
            if tools not in sys.path:
                sys.path.append(tools)
        else:
            raise EnvironmentError("please declare environment variable 'SUMO_HOME'")
        traci.start([sumolib.checkBinary('sumo'), "-n", os.path.join(self.path_networks, 'sn.net.xml')])
        nodes = [node for node in self.net.getNodes() if node.getType()=='traffic_light']
        for node in nodes:
            node_id = node.getID()
            from_xml   = len([c for c in node.getConnections() if c.getTLLinkIndex() >= 0])
            from_traci = len(traci.trafficlight.getRedYellowGreenState(node_id))
            if from_xml != from_traci:
                sub = {'id': node_id, 'type': 'node', 'note': '유효하지 않은 연결이있음. netedit에서 clean states 필요.'}
                self.issues.append(sub)
        traci.close()
        print("1-3. 네트워크의 모든 clean state requirement들을 체크했습니다.")

    # 1-4. 테이블 무결성 검사
    def check_tables(self):
        self.check_history()
        # 교차로정보, 방위각정보, 신호계획에 대해서는 preprocess_daily.py에서
        # 무결성검사를 완료했으므로 여기에서는 따로 검사하지 않음.
        # self.check_moves() # 이동류번호에 대한 무결성검사 필요하나 아직 작성하지 않음. (24. 2. 5 화)
        print("1-4. 테이블들의 무결성 검사를 완료했습니다.")

    # 1-4-1. 신호이력(history) 검사
    def check_history(self):
        # 1-4-1-1. inter_no 검사
        # self.history.loc[0, 'inter_no'] = '4' # 에러 발생을 위한 코드
        missing_inter_nos = set(self.history.inter_no) - set(self.inter_nos)
        if missing_inter_nos:
            msg = f"1-4-1-1. history의 inter_no 중 교차로 목록(inter_nos)에 포함되지 않는 항목이 있습니다: {missing_inter_nos}"
            self.issues.append(msg)

        # 1-4-1-2. 종료유닉스 검사
        # self.history.loc[0, 'end_unix'] = 38.0 # 에러 발생을 위한 코드
        self.min_unix, self.max_unix = int(datetime(2020, 1, 1).timestamp()), int(datetime(2038, 1, 1).timestamp())
        for _, row in self.history.iterrows():
            unixbool = self.min_unix <= row['end_unix'] <= self.max_unix
            if not unixbool:
                msg = f"1-4-1-2. 적정 범위를 벗어난 유닉스시각(end_unix)이 존재합니다 : inter_no : {row['inter_no']}"
                self.issues.append(msg)

        # 1-4-1-3. 현시시간 검사
        # self.history.loc[0, 'dura_A1'] = -2 # 에러 발생을 위한 코드
        durations = self.history[[f'dura_{alph}{j}' for alph in ['A','B'] for j in range(1, 9)]]
        valid_indices = ((durations >= 0) & (durations <= 200)).all(axis=1)
        invalid_inter_nos = sorted(self.history[~ valid_indices].inter_no.unique())
        if invalid_inter_nos:
            msg = f"1-4-1-3. 음수이거나 200보다 큰 현시시간이 존재합니다. : {invalid_inter_nos}"

    # 1-5. 보조 딕셔너리, 데이터프레임, 리스트 등 만들기
    def prepare_auxiliaries(self):
        # inter2node : a dictionary that maps inter_no to the node_id
        inter_node_p = self.inter_node[self.inter_node.inter_type=='parent']
        self.inter2node = dict(zip(inter_node_p['inter_no'], inter_node_p['node_id']))
        self.node2inter = dict(zip(self.inter_node['node_id'], self.inter_node['inter_no']))

        # hours : 정각에 해당하는 시각들 목록
        self.hours = np.array(range(self.midnight - 7200, self.next_day + 1, 3600))

        # split, isplit : A,B 분리 혹은 통합시 사용될 수 있는 딕셔너리 
        self.splits = {} # splits maps (inter_no, start_hour, start_minute) to split 
        for i, row in self.plan.iterrows():
            inter_no = row.inter_no
            start_hour = row.start_hour
            start_minute = row.start_minute
            cycle = row.cycle
            cums_A = row[[f'dura_A{j}' for j in range(1,9)]].cumsum()
            cums_B = row[[f'dura_B{j}' for j in range(1,9)]].cumsum()
            self.splits[(inter_no, start_hour, start_minute)] = {} # split maps (phas_A, phas_B) to k
            k = 0
            for t in range(cycle):
                new_phas_A = len(cums_A[cums_A < t]) + 1
                new_phas_B = len(cums_B[cums_B < t]) + 1
                if k == 0 or ((new_phas_A, new_phas_B) != (phas_A, phas_B)):
                    k += 1
                phas_A = new_phas_A
                phas_B = new_phas_B
                self.splits[(inter_no, start_hour, start_minute)][(phas_A, phas_B)] = k
        self.isplits = {} # the inverse of splits
        for i in self.splits:
            self.isplits[i] = {self.splits[i][k]:k for k in self.splits[i]} # isplit maps k to (phas_A, phas_B)

        # timetable : 교차로별 프로그램 시작시각
        self.timetable = self.plan[['start_hour', 'start_minute']].drop_duplicates()
        self.timetable['start_seconds'] = self.midnight + self.timetable['start_hour'] * 3600 + self.timetable['start_minute'] * 60

        # A dictionary that maps parent_id to a list of child_ids
        self.pa2ch = {'i0':['u00'], 'i1':[], 'i2':['u20'], 'i3':['c30', 'u30', 'u31', 'u32'], 'i6':['u60'], 'i7':[], 'i8':[], 'i9':[]}
        self.node_ids = sorted(self.inter_node.node_id.unique())
        self.parent_ids = sorted(self.inter_node[self.inter_node.inter_type=='parent'].node_id.unique())
        self.nodes = [self.net.getNode(node_id) for node_id in self.node_ids]

        # node2num_cycles : A dictionary that maps a node_id to the number of cycles
        with open(os.path.join(self.path_intermediates, 'node2num_cycles.json'), 'r') as file:
            # json.load() 함수를 사용해 파일 내용을 Python 딕셔너리로 불러옵니다.
            self.node2num_cycles = json.load(file)

    # 2. 신호이력 전처리
    def process_history(self):
        print("2. 신호이력 테이블을 변환합니다.")
        self.make_rhistory()
        self.time21 = datetime.now()
        self.make_rhists()
        self.time22 = datetime.now()
        self.make_hrhists()
        self.time23 = datetime.now()
    
    # 2-1. rhistory
    def make_rhistory(self):
        # 1. 조회시점의 유닉스 타임 이전의 신호이력 수집
        self.rhistory = self.history.copy() # recent history
        self.rhistory = self.rhistory[(self.rhistory.end_unix <= self.present_time) & (self.rhistory.end_unix > self.present_time - 9000)] # 두 시간 반 전부터 현재까지의 신호이력을 가져옴. 9000 = 3600 * 2.5

        # rhistory에 모든 교차로번호가 존재하지 않으면 해당 교차로번호에 대한 신호이력을 추가함 (at 최근 프로그램 시작시각)
        whole_inter_nos = sorted(self.history.inter_no.unique())
        recent_inter_nos = sorted(self.rhistory.inter_no.unique())
        if not whole_inter_nos==recent_inter_nos:
            for inter_no in set(whole_inter_nos) - set(recent_inter_nos):
                program_start, prow = self.load_prow(inter_no, self.present_time - 9000)
                cycle = prow.cycle.iloc[0]
                row1 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()
                row2 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()
                # prow에서 필요한 부분을 rhistory에 추가
                row1['end_unix'] = program_start
                row2['end_unix'] = program_start + cycle
                self.rhistory = pd.concat([self.rhistory, row1, row2]).reset_index(drop=True)
        # present_time + adder 의 시각에 한 주기의 신호 추가
        for inter_no in set(whole_inter_nos):
            program_start, prow = self.load_prow(inter_no, self.present_time)
            cycle = prow.cycle.iloc[0]
            row3 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()
            # prow에서 필요한 부분을 rhistory에 추가
            row3['end_unix'] = self.present_time + self.adder
            self.rhistory = pd.concat([self.rhistory, row3]).reset_index(drop=True)

        # 2. 시작 유닉스 타임컬럼 생성 후 종류 유닉스 타임에서 현시별 현시기간 컬럼의 합을 뺀 값으로 입력
        # - 현시시간의 합을 뺀 시간의 +- 10초 이내에 이전 주기정보가 존재하면 그 유닉스 시간을 시작 유닉스시간 값으로 하고, 존재하지 않으면 현시시간의 합을 뺀 유닉스 시간을 시작 유닉스 시간으로 지정
        for i, row in self.rhistory.iterrows():
            inter_no = row.inter_no
            end_unix = row.end_unix
            elapsed_time = row[[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]].sum() // 2 # 현시시간 합
            # 이전 유닉스 존재하지 않음 : 현시시간 합의 차
            start_unix = end_unix - elapsed_time
            pre_rows = self.history[:i] # previous rows
            if inter_no in pre_rows.inter_no.unique(): # 이전 유닉스 존재
                pre_unix = pre_rows[pre_rows.inter_no == inter_no]['end_unix'].iloc[-1] # previous unix time
                # 이전 유닉스 존재, abs < 10 : 이전 유닉스
                if abs(pre_unix - start_unix) < 10:
                    start_unix = pre_unix
                # 이전 유닉스 존재, abs >=10 : 현시시간 합의 차
                else:
                    pass
            self.rhistory.loc[i, 'start_unix'] = start_unix
        self.rhistory[self.rhistory.isna()] = 0
        self.rhistory['start_unix'] = self.rhistory['start_unix'].astype(int)
        self.rhistory = self.rhistory[['inter_no', 'start_unix'] + [f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)] + ['cycle']]

    def load_prow(self, inter_no, time):
        '''
        load planned row
        '''
        # 프로그램 시작시각
        program_starts = np.array(self.timetable.start_seconds)
        idx = (program_starts <= time).sum() - 1
        program_start = program_starts[idx]

        # 최근 프로그램 시작시각에 대한 신호계획
        start_hour = self.timetable.iloc[idx].start_hour
        start_minute = self.timetable.iloc[idx].start_minute
        prow = self.plan[(self.plan.inter_no==inter_no) & (self.plan.start_hour==start_hour) & (self.plan.start_minute==start_minute)] # planned row
        return program_start, prow

    # 2-2. rhists
    def make_rhists(self):
        self.rhists = []
        for inter_no in sorted(self.rhistory.inter_no.unique()):
            filtered_rhist = self.rhistory[self.rhistory.inter_no == inter_no].drop_duplicates(subset=['start_unix']).reset_index(drop=True)
            self.rhist = filtered_rhist

            # D_n 및 S_n 값 정의
            self.rhist['D_n'] = 0 # D_n : 시간차이
            self.rhist['S_n'] = 0 # S_n : 현시시간합
            for n in range(len(self.rhist)):
                curr_unix = self.rhist.iloc[n].start_unix # current start_unix
                self.rhist.loc[n, ['D_n', 'S_n']] = self.calculate_DS(self.rhist, curr_unix)

            # 이전시각, 현재시각
            prev_unix = self.rhist.loc[0, 'start_unix'] # previous start_unix
            curr_unix = self.rhist.loc[1, 'start_unix'] # current start_unix

            # rhist의 마지막 행에 도달할 때까지 반복
            while True:
                n = self.rhist[self.rhist.start_unix==curr_unix].index[0]
                cycle = self.rhist.loc[n, 'cycle']
                D_n = self.rhist.loc[n, 'D_n']
                S_n = self.rhist.loc[n, 'S_n']
                # 참값인 경우
                if (abs(D_n - S_n) <= 5):
                    pass
                # 참값이 아닌 경우
                else:
                    # 2-1-1. 결측치 처리 : 인접한 두 start_unix의 차이가 계획된 주기의 두 배보다 크면 결측이 일어났다고 판단, 신호계획의 현시시간으로 "대체"
                    if curr_unix - prev_unix >= 2 * cycle:
                        # prev_unix를 계획된 주기만큼 늘려가면서 한 행씩 채워나간다.
                        # (curr_unix와의 차이가 계획된 주기보다 작거나 같아질 때까지)
                        while curr_unix - prev_unix > cycle:
                            prev_unix += cycle
                            # 신호 계획(prow) 불러오기
                            start_seconds = np.array(self.timetable.start_seconds)
                            idx = (start_seconds <= prev_unix).sum() - 1
                            start_hour = self.timetable.iloc[idx].start_hour
                            start_minute = self.timetable.iloc[idx].start_minute
                            prow = self.plan.copy()[(self.plan.inter_no==inter_no) & (self.plan.start_hour==start_hour) & (self.plan.start_minute==start_minute)] # planned row
                            # prow에서 필요한 부분을 rhist에 추가
                            prow['start_unix'] = prev_unix
                            prow = prow.drop(['start_hour', 'start_minute', 'offset'], axis=1)
                            cycle = prow.iloc[0].cycle
                            self.rhist = pd.concat([self.rhist, prow])
                            self.rhist = self.rhist.sort_values(by='start_unix').reset_index(drop=True)
                            n += 1

                    # 2-1-2. 이상치 처리 : 비율에 따라 해당 행을 "삭제"(R_n <= 0.5) 또는 "조정"(R_n > 0.5)한다
                    R_n = (curr_unix - prev_unix) / cycle # R_n : 비율
                    # R_n이 0.5보다 작거나 같으면 해당 행을 삭제
                    if R_n <= 0.5:
                        self.rhist = self.rhist.drop(index=n).reset_index(drop=True)
                        if n >= self.rhist.index[-1]:
                            break
                        # 행삭제에 따른 curr_unix, R_n 재정의
                        curr_unix = self.rhist.loc[n, 'start_unix']
                        R_n = (curr_unix - prev_unix) / cycle # R_n : 비율

                    # R_n이 0.5보다 크면 해당 행 조정 (비율을 유지한 채로 현시시간 대체)
                    if R_n > 0.5:
                        # 신호 계획(prow) 불러오기
                        start_seconds = np.array(self.timetable.start_seconds)
                        idx = (start_seconds <= curr_unix).sum() - 1
                        start_hour = self.timetable.iloc[idx].start_hour
                        start_minute = self.timetable.iloc[idx].start_minute
                        prow = self.plan[(self.plan.inter_no==inter_no) & (self.plan.start_hour==start_hour) & (self.plan.start_minute==start_minute)] # planned row
                        # 조정된 현시시간 (prow에 R_n을 곱하고 정수로 바꿈)
                        adjusted_dur = prow.copy()[[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]] * R_n
                        int_parts = adjusted_dur.iloc[0].apply(lambda x: int(x))
                        frac_parts = adjusted_dur.iloc[0] - int_parts
                        difference = round(adjusted_dur.iloc[0].sum()) - int_parts.sum()
                        for _ in range(difference): # 소수 부분이 가장 큰 상위 'difference'개의 값에 대해 올림 처리
                            max_frac_index = frac_parts.idxmax()
                            int_parts[max_frac_index] += 1
                            frac_parts[max_frac_index] = 0  # 이미 처리된 항목은 0으로 설정
                        # rhist에 조정된 현시시간을 반영
                        self.rhist.loc[n, [f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]] = int_parts.values
                        self.rhist.loc[n, 'cycle'] = int_parts.sum().sum() // 2

                if n >= self.rhist.index[-1]:
                    break
                prev_unix = curr_unix
                curr_unix = self.rhist.loc[n+1, 'start_unix']

            self.rhists.append(self.rhist)
        self.rhists = pd.concat(self.rhists).sort_values(by=['start_unix','inter_no'])
        self.rhists = self.rhists[self.rhists.start_unix >= self.present_time - 3600]
        self.rhists = self.rhists.drop(columns=['D_n', 'S_n'])

    def calculate_DS(self, rhist, curr_unix):
        program_starts = np.array(self.timetable.start_seconds)
        idx = (program_starts <= self.present_time).sum() - 1
        program_start = program_starts[idx]
        if list(self.hours[self.hours <= curr_unix]):
            ghour_lt_curr_unix = self.hours[self.hours <= curr_unix].max() # the greatest hour less than or equal to curr_unix
        else:
            ghour_lt_curr_unix = program_start
        start_unixes = rhist.start_unix.unique()
        start_unixes_lt_ghour = np.sort(start_unixes[start_unixes < ghour_lt_curr_unix]) # start unixes less than ghour_lt_curr_unix
        # 기준유닉스(base_unix) : curr_unix보다 작은 hour 중에서 가장 큰 값으로부터 다섯 번째로 작은 start_unix
        if len(start_unixes_lt_ghour) > 5:
            base_unix = start_unixes_lt_ghour[-5]
        # start_unixes_lt_ghour의 길이가 5 미만일 경우에는 맨 앞 start_unix로 base_unix를 지정
        else:
            base_unix = rhist.start_unix.min()
        D_n = curr_unix - base_unix
        S_n_durs = rhist[(rhist.start_unix > base_unix) & (rhist.start_unix <= curr_unix)] \
            [[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]]
        S_n = S_n_durs.values.sum() // 2
        return D_n, S_n

    # 2-2. hrhists
    def make_hrhists(self):
        # 계층화된 형태로 변환
        self.hrhists = [] # hierarchied recent history
        for i, row in self.rhists.iterrows():
            inter_no = row.inter_no
            start_unix = row.start_unix

            ind = (self.timetable['start_seconds'] <= row.start_unix).sum() - 1
            start_hour = self.timetable.iloc[ind].start_hour
            start_minute = self.timetable.iloc[ind].start_minute
            self.isplit = self.isplits[(inter_no, start_hour, start_minute)]
            phas_As = [self.isplit[j][0] for j in self.isplit.keys()]
            phas_Bs = [self.isplit[j][1] for j in self.isplit.keys()]
            durs_A = row[[f'dura_A{j}' for j in range(1,9)]]
            durs_B = row[[f'dura_B{j}' for j in range(1,9)]]
            durations = []
            for j in range(1, len(self.isplit)+1):
                ja = self.isplit[j][0]
                jb = self.isplit[j][1]
                if ja == jb:
                    durations.append(min(durs_A[ja-1], durs_B[jb-1]))
                else:
                    durations.append(abs(durs_A[ja-1] - durs_B[ja-1]))
            new_rows = pd.DataFrame({'inter_no':[inter_no] * len(durations), 'start_unix':[start_unix] * len(durations),
                                    'phas_A':phas_As, 'phas_B':phas_Bs, 'duration':durations})
            self.hrhists.append(new_rows)
        self.hrhists = pd.concat(self.hrhists)
        self.hrhists = self.hrhists.sort_values(by = ['start_unix', 'inter_no', 'phas_A', 'phas_B']).reset_index(drop=True)

    # 3. 이동류정보 전처리
    def process_movement(self):
        print("3. 이동류정보 테이블을 변환합니다.")
        self.make_movement()
        self.update_movement()

    # 3-1. movement
    def make_movement(self):
        # - 아래 절차를 5초마다 반복
        for fsec in range(self.present_time - 300, self.present_time + 1, 5): # fsec : unix time by Five SECond
            # 1. 상태 테이블 조회해서 전체 데이터중 필요데이터(교차로번호, A링 현시번호, A링 이동류번호, B링 현시번호, B링 이동류번호)만 수집 : A
            # move = time2move[fsec]
            move = pd.read_csv(os.path.join(self.path_tables, 'move', f'move_{fsec}.csv'), index_col=0)
            # 2. 이력 테이블 조회해서 교차로별로 유닉스시간 최대인 데이터(교차로변호, 종료유닉스타임)만 수집 : B
            recent_histories = [group.iloc[-1:] for _, group in self.history[self.history['end_unix'] < fsec].groupby('inter_no')] # 교차로별로 유닉스시간이 최대인 행들
            if not recent_histories:
                rhistory = pd.DataFrame({'inter_no':[], 'end_unix':[]}) # recent history
            else:
                rhistory = pd.concat(recent_histories)
            recent_unix = rhistory[['inter_no', 'end_unix']]
            # 3. 상태 테이블 조회정보(A)와 이력 테이블 조회정보(B) 조인(키값 : 교차로번호) : C
            move = pd.merge(move, recent_unix, how='left', on='inter_no')
            move['end_unix'] = move['end_unix'].fillna(0).astype(int)
            move = move.drop_duplicates()
            # 4. C데이터 프레임에 신규 컬럼(시작 유닉스타임) 생성 후 종료유닉스 타임 값 입력, 종료 유닉스 타임 컬럼 제거
            move = move.rename(columns = {'end_unix':'start_unix'})
            # 5. 이동류 이력정보 READ
            #     - CSV 파일로 서버에 저장된 이동류정보를 읽어옴(파일이 없는 경우에는 데이터가 없는 프레임 D 생성)
            try:
                if isinstance(movement, pd.DataFrame): # movement가 존재할 경우 그걸 그대로 씀.
                    pass
                else: 
                    movement = pd.DataFrame()
            except NameError: # movement가 존재하지 않는 경우 생성
                movement = pd.DataFrame()
            # 6. 이동류 이력정보 데이터테이블(D)에 C데이터 add
            movement = pd.concat([movement, move])
            # 7. D데이터 프레임에서 중복데이터 제거(교차로번호, 시작 유닉스타임, A링 현시번호, B링 현시번호 같은 행은 제거)
            movement = movement.drop_duplicates(['inter_no','phas_A','phas_B','start_unix'])
            # 8. D데이터 보관 시간 기준시간을 시작 유닉스 타임의 최대값 - 3600을 값으로 산출하고, 보관 시간 기준시간보다 작은 시작 유닉스 타임을 가진 행은 모두 제거(1시간 데이터만 보관)
            movement = movement[movement.start_unix > fsec - 3600]
            movement = movement.sort_values(by=['start_unix','inter_no','phas_A','phas_B']).reset_index(drop=True)
        self.movement = pd.read_csv(os.path.join(self.path_intermediates, 'movement', f'movement_{self.present_time}.csv'), index_col=0)

    # 3-2. movement_updated
    def update_movement(self):
        # 중복을 제거하고 (inter_no, start_unix) 쌍을 만듭니다.
        hrhists_inter_unix = set(self.hrhists[['inter_no', 'start_unix']].drop_duplicates().itertuples(index=False, name=None))
        movement_inter_unix = set(self.movement[['inter_no', 'start_unix']].drop_duplicates().itertuples(index=False, name=None))

        # hrhists에는 있지만 movement에는 없는 (inter_no, start_unix) 쌍을 찾습니다.
        missing_in_movement = hrhists_inter_unix - movement_inter_unix

        # 새로운 행들을 생성합니다.
        new_rows = []
        if missing_in_movement:
            for inter_no, start_unix in missing_in_movement:
                # match1에서 해당 inter_no의 데이터를 찾습니다.
                new_row = self.match1[self.match1['inter_no'] == inter_no].copy()
                # start_unix 값을 설정합니다.
                new_row['start_unix'] = start_unix
                new_rows.append(new_row)

            # 새로운 데이터프레임을 생성하고 기존 movement 데이터프레임과 합칩니다.
            new_movement = pd.concat(new_rows, ignore_index=True)
            self.movement_updated = pd.concat([self.movement, new_movement], ignore_index=True)
        else:
            self.movement_updated = self.movement

    # 4. 통합테이블 생성
    def make_histids(self):
        print("4. 통합 테이블을 생성합니다.")
        self.merge_dfs()
        self.time41 = datetime.now()
        self.attach_children()
        self.time42 = datetime.now()

    # 4-1. histid
    def merge_dfs(self):
        # movements and durations
        movedur = pd.merge(self.hrhists, self.movement_updated, how='inner', on=['inter_no', 'start_unix', 'phas_A', 'phas_B'])
        movedur = movedur.sort_values(by=['start_unix', 'inter_no', 'phas_A','phas_B'])
        movedur = movedur[['inter_no', 'start_unix', 'phas_A', 'phas_B', 'move_A', 'move_B', 'duration']]

        # matching DataFrame에 대해 multi-index 설정
        self.matching.set_index(['inter_no', 'move_no'], inplace=True)
        self.matching.sort_index(inplace=True)

        for row in movedur.itertuples(index=True):
            inter_no = row.inter_no
            start_unix = row.start_unix
            move_A = row.move_A
            move_B = row.move_B
            
            # incoming and outgoing edges A
            if move_A in [17, 18]:
                inc_edge_A = np.nan
                out_edge_A = np.nan
            else:
                match_A = self.matching.loc[(inter_no, move_A)]
                inc_edge_A = match_A.inc_edge.values[0]
                out_edge_A = match_A.out_edge.values[0]
            movedur.at[row.Index, 'inc_edge_A'] = inc_edge_A
            movedur.at[row.Index, 'out_edge_A'] = out_edge_A
            
            # incoming and outgoing edges B
            if move_B in [17, 18]:
                inc_edge_B = np.nan
                out_edge_B = np.nan
            else:
                match_B = self.matching.loc[(inter_no, move_B)]
                inc_edge_B = match_B.inc_edge.values[0]
                out_edge_B = match_B.out_edge.values[0]
            movedur.at[row.Index, 'inc_edge_B'] = inc_edge_B
            movedur.at[row.Index, 'out_edge_B'] = out_edge_B

        # 이동류 컬럼 제거
        movedur = movedur.drop(['move_A', 'move_B'], axis=1)

        self.histid = movedur.copy() # history with edge ids (incoming and outgoing edge ids)
        self.histid['node_id'] = self.histid['inter_no'].map(self.inter2node)
        self.histid = self.histid[['inter_no', 'node_id', 'start_unix', 'phas_A', 'phas_B', 'duration', 'inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']]
        histid_start = self.present_time - 600
        self.histid = self.histid[self.histid.start_unix > histid_start]
    
    # 4-2. histids
    def attach_children(self):
        '''
        자식교차로에 대한 진입·진출 엣지 정보를 붙여주는 함수

        input :
        (1) histid
        - 각 교차로에 대한 (시작유닉스, A현시, B현시)별 현시시간, 진입·진출엣지
        - 부모교차로(주교차로)에 대해서만 값이 지정되어 있음
        (2) match6
        - (현시, 링)별 진입·진출엣지
        - 자식교차로(유턴 및 연동교차로)에 대해서도 값이 지정되어 있음
        (3) parent_ids : 부모교차로 목록
        (4) pa2ch : 각 부모교차로id를 부모교차로가 포함하고 있는 자식교차로들의 id들의 리스트로 대응시키는 딕셔너리

        output : histids
        - 모든(부모 및 자식) 교차로에 대한 시작유닉스 (시작유닉스, A현시, B현시)별 현시시간, 진입·진출엣지
        '''
        new_histids = []
        for parent_id in self.parent_ids:
            for child_id in self.pa2ch[parent_id]:
                new_histid = self.histid.copy()[self.histid.node_id==parent_id]
                new_histid[['inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']] = np.nan
                for row in new_histid.itertuples(index=True):
                    phas_A = row.phas_A
                    phas_B = row.phas_B
                    new_match = self.match6[self.match6.node_id==child_id]
                    Arow = new_match[(new_match.phase_no==phas_A) & (new_match.ring_type=='A')]
                    if not Arow[['inc_edge', 'out_edge']].isna().all().all():
                        inc_edge = Arow.iloc[0].inc_edge
                        out_edge = Arow.iloc[0].out_edge
                        new_histid.loc[row.Index, ['inc_edge_A', 'out_edge_A']] = [inc_edge, out_edge]
                    Brow = new_match[(new_match.phase_no==phas_B) & (new_match.ring_type=='B')]
                    if not Brow[['inc_edge', 'out_edge']].isna().all().all():
                        inc_edge = Brow.iloc[0].inc_edge
                        out_edge = Brow.iloc[0].out_edge
                        new_histid.loc[row.Index, ['inc_edge_B', 'out_edge_B']] = [inc_edge, out_edge]
                    new_histid.loc[row.Index, 'node_id'] = child_id
                new_histids.append(new_histid)
        new_histids = pd.concat(new_histids)
        self.histids = pd.concat([self.histid.copy(), new_histids])
        self.histids = self.histids.sort_values(by=['start_unix', 'node_id', 'phas_A', 'phas_B']).reset_index(drop=True)

    # 5. 신호 생성
    def get_signals(self):
        print("5. 신호를 생성합니다.")
        self.initialize_states()
        self.assign_signals()
        self.set_timepoints()
        self.assign_red_yellow()
        self.make_tl_file()

    # 5-1. 신호초기화
    def initialize_states(self):
        '''
        신호 초기화

        input :
        (1) net : 네트워크
        (2) nodes : 노드 목록
        (3) histids : 모든 교차로에 대한 시작유닉스 (시작유닉스, A현시, B현시)별 현시시간, 진입·진출엣지

        output : node2init
        - 각 노드를 초기화된 신호로 맵핑하는 딕셔너리
        - 초기화된 신호란, 우회전을 g로 나머지는 r로 지정한 신호를 말함.
        '''
        self.node2init = {}
        for node in self.nodes:
            node_id = node.getID()
            conns = [(c.getJunctionIndex(), c) for c in node.getConnections()]
            conns = [c for c in conns if c[0] >= 0]
            conns = sorted(conns, key=lambda x: x[0])
            state = []
            for i, ci in conns:
                if ci.getTLLinkIndex() < 0:
                    continue
                are_foes = False
                for j, cj in conns:
                    if ci.getTo() == cj.getTo():
                        continue
                    if node.areFoes(i, j):
                        are_foes = True
                        break
                state.append('r' if are_foes else 'g')
            self.node2init[node_id] = state

        # 어떤 연결과도 상충이 일어나지는 않지만, 신호가 부여되어 있는 경우에는 r을 부여
        for _, row in self.histids.iterrows():
            node_id = row['node_id']
            inc_edge_A = row.inc_edge_A
            inc_edge_B = row.inc_edge_B
            out_edge_A = row.out_edge_A
            out_edge_B = row.out_edge_B

            if pd.isna(inc_edge_A) or pd.isna(out_edge_A):
                pass
            else:
                inc_edge_A = self.net.getEdge(inc_edge_A)
                out_edge_A = self.net.getEdge(out_edge_A)
                for conn in inc_edge_A.getConnections(out_edge_A):
                    index = conn.getTLLinkIndex()
                    if index >= 0:
                        self.node2init[node_id][index] = 'r'

            if pd.isna(inc_edge_B) or pd.isna(out_edge_B):
                pass
            else:
                inc_edge_B = self.net.getEdge(inc_edge_B)
                out_edge_B = self.net.getEdge(out_edge_B)
                for conn in inc_edge_B.getConnections(out_edge_B):
                    index = conn.getTLLinkIndex()
                    if index >= 0:
                        self.node2init[node_id][index] = 'r'

    # 5-2. 녹색신호 부여
    def assign_signals(self):
        '''
        진입·진출엣지를 신호문자열로 배정

        input :
        (1) histids : 모든 교차로에 대한 (시작유닉스, A현시, B현시)별 현시시간, 진입·진출엣지
        (2) node2init : 각 노드를 초기화된 신호로 맵핑하는 딕셔너리
        (3) net : 네트워크

        output : sigtable
        - 모든 교차로에 대한 (시작유닉스, A현시, B현시)별 현시시간, 신호문자열
        - 황색 및 적색신호는 아직 반영되지 않았음.
        '''
        self.sigtable = self.histids.copy()
        self.sigtable['init_state'] = self.sigtable['node_id'].map(self.node2init)
        self.sigtable['state'] = self.sigtable['init_state'].map(lambda x:''.join(x))
        for i, row in self.sigtable.iterrows():
            node_id = row.node_id
            inc_edge_A = row.inc_edge_A
            inc_edge_B = row.inc_edge_B
            out_edge_A = row.out_edge_A
            out_edge_B = row.out_edge_B
            state = copy.deepcopy(self.node2init)[node_id]
            if pd.isna(inc_edge_A) or pd.isna(out_edge_A):
                pass
            else:
                inc_edge_A = self.net.getEdge(inc_edge_A)
                out_edge_A = self.net.getEdge(out_edge_A)
                for conn in inc_edge_A.getConnections(out_edge_A):
                    index = conn.getTLLinkIndex()
                    if index >= 0:
                        state[index] = 'G'
                self.sigtable.at[i, 'state'] = ''.join(state)

            if pd.isna(inc_edge_B) or pd.isna(out_edge_B):
                pass
            else:
                inc_edge_B = self.net.getEdge(inc_edge_B)
                out_edge_B = self.net.getEdge(out_edge_B)
                for conn in inc_edge_B.getConnections(out_edge_B):
                    index = conn.getTLLinkIndex()
                    if index >= 0:
                        state[index] = 'G'
                self.sigtable.at[i, 'state'] = ''.join(state)
        self.sigtable = self.sigtable.dropna(subset='state')
        self.sigtable = self.sigtable.reset_index(drop=True)
        self.sigtable['phase_sumo'] = self.sigtable.groupby(['node_id', 'start_unix']).cumcount()
        self.sigtable = self.sigtable[['node_id', 'start_unix', 'phase_sumo', 'duration', 'state']]
        self.sigtable = self.sigtable.sort_values(by=['start_unix', 'node_id'])
        self.sigtable['start_dt'] = self.sigtable['start_unix'].apply(lambda x:datetime.fromtimestamp(x))

    # 5-3. 신호 파일의 시작 및 종료시각 설정
    def set_timepoints(self):
        self.offsets = {}
        self.Sigtable = []
        sim_start = self.present_time - 300
        for node_id, group in self.sigtable.groupby('node_id'):
            lsbs = group[group['start_unix'] < sim_start]['start_unix'].max() # the last start_unix before sim_start
            self.offsets[node_id] = lsbs - sim_start
            group = group[group.start_unix >= lsbs]
            start_unixes = np.array(group.start_unix)
            start_unixes = np.sort(np.unique(start_unixes))[:self.node2num_cycles[node_id]]
            group = group[group.start_unix.isin(start_unixes)]
            self.Sigtable.append(group)
        self.Sigtable = pd.concat(self.Sigtable)

    # 5-4. 적색 및 황색신호 부여
    def assign_red_yellow(self):
        '''
        적색, 황색신호를 반영한 신호문자열 배정

        input : Sigtable
        - 모든 교차로에 대한 (시작유닉스, 세부현시번호)별 현시시간, 신호문자열, 진입·진출엣지
        * 세부현시란 오버랩을 반영한 현시번호를 뜻함.

        output : SIGTABLE
        - 모든 교차로에 대한 (시작유닉스, 녹황적세부현시번호)별 현시시간, (황·적색신호가 포함된) 신호문자열
        * 녹황적세부현시번호란 세부현시번호에 r, g, y 옵션까지 포함된 현시번호를 뜻함.
        '''
        self.SIGTABLE = []
        for node_id, group in self.Sigtable.groupby('node_id'):
            new_rows_list = []
            for i in range(1, len(group)):
                prev_row = group.iloc[i-1:i].copy()
                next_row = group.iloc[i:i+1].copy()
                new_rows = pd.concat([prev_row, prev_row, next_row]).reset_index(drop=True)
                new_rows.loc[0, 'phase_sumo'] = str(prev_row.phase_sumo.iloc[0]) + '_g'
                new_rows.loc[0, 'duration'] = new_rows.loc[0, 'duration'] - 5
                new_rows.loc[1, 'phase_sumo'] = str(prev_row.phase_sumo.iloc[0]) + '_y'
                new_rows.loc[1, 'duration'] = 4
                yellow_state = ''
                red_state = ''
                for a, b in zip(prev_row.state.iloc[0], next_row.state.iloc[0]):
                    if a == 'G' and b == 'r':
                        yellow_state += 'y'
                        red_state += 'r'
                    else:
                        yellow_state += a
                        red_state += a
                new_rows.loc[2, 'phase_sumo'] = str(next_row.phase_sumo.iloc[0]) + '__r'
                new_rows.loc[2, 'duration'] = 1
                new_rows.loc[1, 'state'] = yellow_state
                new_rows.loc[2, 'state'] = red_state
                new_rows_list.append(new_rows)
            next_row['phase_sumo'] = str(next_row.phase_sumo.iloc[0]) + '_g'
            next_row['duration'] -= 5
            # next_row.loc['duration'] -= 5
            new_rows_list.append(next_row)
            new_rows = pd.concat(new_rows_list)
            self.SIGTABLE.append(new_rows)
        self.SIGTABLE = pd.concat(self.SIGTABLE).sort_values(by=['node_id', 'start_unix', 'phase_sumo']).reset_index(drop=True)

    # 5-5. 신호파일 생성
    def make_tl_file(self):
        strings = ['<additional>\n']
        for node_id, group in self.SIGTABLE.groupby('node_id'):
            strings.append(f'    <tlLogic id="{node_id}" type="static" programID="{node_id}_prog" offset="{self.offsets[node_id]}">\n')
            for i, row in group.iterrows():
                duration = row.duration
                state = row.state
                strings.append(f'      <phase duration="{duration}" state="{state}"/>\n')
            strings.append('    </tlLogic>\n')
        strings.append('</additional>')
        strings = ''.join(strings)
        # 저장
        self.path_output = os.path.join(self.path_results, f'sn_{self.present_time}.add.xml')
        with open(self.path_output, 'w') as f:
            f.write(strings)

    # 6. 이슈사항 저장
    def write_issues(self):
        print('6. 이슈사항을 저장합니다.')
        path_issues = os.path.join(self.path_results, "issues_generate_signals.txt")
        with open(path_issues, "w", encoding="utf-8") as file:
            for item in self.issues:
                file.write(item + "\n")
        if self.issues:
            print("데이터 처리 중 발생한 특이사항은 다음과 같습니다. :")
            for review in self.issues:
                print(review)

    def main(self):
        self.time0 = datetime.now()
        # 1. 데이터 준비
        self.prepare_data()
        self.time1 = datetime.now()
        # 2. 신호이력 전처리
        self.process_history()
        self.time2 = datetime.now()
        # 3. 이동류정보 전처리
        self.process_movement()
        self.time3 = datetime.now()
        # 4. 통합테이블 생성
        self.make_histids()
        self.time4 = datetime.now()
        # 5. 신호 생성
        self.get_signals()
        self.time5 = datetime.now()
        # 6. 이슈사항 저장
        self.write_issues()
        self.time6 = datetime.now()

        print('(1)', self.time1 - self.time0)
        print('(2-1)', self.time21 - self.time1)
        print('(2-2)', self.time22 - self.time21)
        print('(2-3)', self.time23 - self.time22)
        print('(2)', self.time2 - self.time1)
        print('(3)', self.time3 - self.time2)
        print('(4)', self.time4 - self.time3)
        print('(4-1)', self.time41 - self.time3)
        print('(4-2)', self.time42 - self.time41)
        print('(5)', self.time5 - self.time4)
        print('(6)', self.time6 - self.time5)
        print('total time :', self.time6 - self.time0)

 if __name__ == '__main__':
    self = SignalGenerator()
    self.main()
    self.path_unit = os.path.join(self.path_root, 'Analysis', '0207_unit_test')
    self.hrhists.to_csv(os.path.join(self.path_unit, 'hrhists.csv'))
    self.histids.to_csv(os.path.join(self.path_unit, 'histids.csv'))
    self.sigtable.to_csv(os.path.join(self.path_unit, 'sigtable.csv'))
    self.Sigtable.to_csv(os.path.join(self.path_unit, 'ssigtable.csv'))
    # print("elapsed time :", datetime.now() - starting_time)
--- a/Archives/Scripts/preprocess.ipynb
+++ b/Archives/Scripts/preprocess.ipynb
--- a/Archives/Scripts/preprocess_5min.ipynb
+++ b/Archives/Scripts/preprocess_5min.ipynb
@ -1,985 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "datetime.datetime(2024, 1, 5, 11, 55, 13, 99135)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "now = datetime.now()\n",
    "now = now.replace(month=1, day=5)\n",
    "now"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "midnight = int(datetime(2024, 1, 5, 0, 0, 0).timestamp())\n",
    "next_day = int(datetime(2024, 1, 6, 0, 0, 0).timestamp())\n",
    "fsecs = range(midnight, next_day, 5) # fsecs : unix time by Five SECondS\n",
    "fmins = range(midnight, next_day, 300) # fmins : unix time by Five MINuteS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>inter_no</th>\n",
       "      <th>phas_A</th>\n",
       "      <th>phas_B</th>\n",
       "      <th>move_A</th>\n",
       "      <th>move_B</th>\n",
       "      <th>start_unix</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>177</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>1704408330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>177</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>1704408330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>177</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>1704408330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>177</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1704408330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1704408330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>700</th>\n",
       "      <td>178</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1704411830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>701</th>\n",
       "      <td>201</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>1704411850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>702</th>\n",
       "      <td>201</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1704411850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>703</th>\n",
       "      <td>201</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>1704411850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>704</th>\n",
       "      <td>206</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>17</td>\n",
       "      <td>18</td>\n",
       "      <td>1704411880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>705 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     inter_no  phas_A  phas_B  move_A  move_B  start_unix\n",
       "0         177       1       1       8       4  1704408330\n",
       "1         177       2       2       7       3  1704408330\n",
       "2         177       3       3      17      18  1704408330\n",
       "3         177       4       4       5       1  1704408330\n",
       "4         201       1       1       8       3  1704408330\n",
       "..        ...     ...     ...     ...     ...         ...\n",
       "700       178       4       4       6       1  1704411830\n",
       "701       201       1       1       8       3  1704411850\n",
       "702       201       4       4       6       1  1704411850\n",
       "703       201       5       5       7       4  1704411850\n",
       "704       206       2       2      17      18  1704411880\n",
       "\n",
       "[705 rows x 6 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "history = pd.read_csv('../Data/tables/history.csv', index_col=0)\n",
    "m = 105\n",
    "present_time = fmins[m]\n",
    "\n",
    "# - 아래 절차를 5초마다 반복\n",
    "for fsec in range(midnight, present_time + 1, 5): # fsec : unix time by Five SECond\n",
    "    # 1. 상태 테이블 조회해서 전체 데이터중 필요데이터(교차로번호, A링 현시번호, A링 이동류번호, B링 현시번호, B링 이동류번호)만 수집 : A\n",
    "    # move = time2move[fsec]\n",
    "    move = pd.read_csv(f'../Data/tables/move/move_{fsec}.csv', index_col=0)\n",
    "    # 2. 이력 테이블 조회해서 교차로별로 유닉스시간 최대인 데이터(교차로변호, 종료유닉스타임)만 수집 : B\n",
    "    recent_histories = [group.iloc[-1:] for _, group in history[history['end_unix'] < fsec].groupby('inter_no')] # 교차로별로 유닉스시간이 최대인 행들\n",
    "    if not recent_histories:\n",
    "        rhistory = pd.DataFrame({'inter_no':[], 'end_unix':[]}) # recent history\n",
    "    else:\n",
    "        rhistory = pd.concat(recent_histories)\n",
    "    recent_unix = rhistory[['inter_no', 'end_unix']]\n",
    "    # 3. 상태 테이블 조회정보(A)와 이력 테이블 조회정보(B) 조인(키값 : 교차로번호) : C\n",
    "    move = pd.merge(move, recent_unix, how='left', on='inter_no')\n",
    "    move['end_unix'] = move['end_unix'].fillna(0).astype(int)\n",
    "    move = move.drop_duplicates()\n",
    "    # 4. C데이터 프레임에 신규 컬럼(시작 유닉스타임) 생성 후 종료유닉스 타임 값 입력, 종료 유닉스 타임 컬럼 제거\n",
    "    move = move.rename(columns = {'end_unix':'start_unix'})\n",
    "    # 5. 이동류 이력정보 READ\n",
    "    #     - CSV 파일로 서버에 저장된 이동류정보를 읽어옴(파일이 없는 경우에는 데이터가 없는 프레임 D 생성)\n",
    "    try:\n",
    "        if isinstance(movement, pd.DataFrame): # movement가 존재할 경우 그걸 그대로 씀.\n",
    "            pass\n",
    "        else: \n",
    "            movement = pd.DataFrame()\n",
    "    except NameError: # movement가 존재하지 않는 경우 생성\n",
    "        movement = pd.DataFrame()\n",
    "    # 6. 이동류 이력정보 데이터테이블(D)에 C데이터 add\n",
    "    movement = pd.concat([movement, move])\n",
    "    # 7. D데이터 프레임에서 중복데이터 제거(교차로번호, 시작 유닉스타임, A링 현시번호, B링 현시번호 같은 행은 제거)\n",
    "    movement = movement.drop_duplicates(['inter_no','phas_A','phas_B','start_unix'])\n",
    "    # 8. D데이터 보관 시간 기준시간을 시작 유닉스 타임의 최대값 - 3600을 값으로 산출하고, 보관 시간 기준시간보다 작은 시작 유닉스 타임을 가진 행은 모두 제거(1시간 데이터만 보관)\n",
    "    movement = movement[movement.start_unix > fsec - 3600]\n",
    "    movement = movement.sort_values(by=['start_unix','inter_no','phas_A','phas_B']).reset_index(drop=True)\n",
    "\n",
    "display(movement)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_splits(plan):\n",
    "    # split, isplit : A,B 분리 혹은 통합시 사용될 수 있는 딕셔너리 \n",
    "    splits = {} # splits maps (inter_no, start_hour, start_minute) to split \n",
    "    for i, row in plan.iterrows():\n",
    "        inter_no = row.inter_no\n",
    "        start_hour = row.start_hour\n",
    "        start_minute = row.start_minute\n",
    "        cycle = row.cycle\n",
    "        cums_A = row[[f'dura_A{j}' for j in range(1,9)]].cumsum()\n",
    "        cums_B = row[[f'dura_B{j}' for j in range(1,9)]].cumsum()\n",
    "        splits[(inter_no, start_hour, start_minute)] = {} # split maps (phas_A, phas_B) to k\n",
    "        k = 0\n",
    "        for t in range(cycle):\n",
    "            new_phas_A = len(cums_A[cums_A < t]) + 1\n",
    "            new_phas_B = len(cums_B[cums_B < t]) + 1\n",
    "            if k == 0 or ((new_phas_A, new_phas_B) != (phas_A, phas_B)):\n",
    "                k += 1\n",
    "            phas_A = new_phas_A\n",
    "            phas_B = new_phas_B\n",
    "            splits[(inter_no, start_hour, start_minute)][(phas_A, phas_B)] = k\n",
    "\n",
    "    isplits = {} # the inverse of splits\n",
    "    for i in splits:\n",
    "        isplits[i] = {splits[i][k]:k for k in splits[i]} # isplit maps k to (phas_A, phas_B)\n",
    "    return splits, isplits\n",
    "\n",
    "def make_timetable(plan):\n",
    "    # timetable\n",
    "    timetable = plan[['start_hour', 'start_minute']].drop_duplicates()\n",
    "    timetable['start_seconds'] = midnight + timetable['start_hour'] * 3600 + timetable['start_minute'] * 60\n",
    "    return timetable\n",
    "\n",
    "# inter2node\n",
    "inter_node = pd.read_csv('../Data/tables/inter_node.csv', index_col=0)\n",
    "inter_node = inter_node[inter_node.inter_type=='parent']\n",
    "inter2node = dict(zip(inter_node['inter_no'], inter_node['node_id']))\n",
    "\n",
    "hours = np.array(range(midnight - 7200, next_day + 1, 3600)) # 정각에 해당하는 시각들 목록"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_DS(rhist, curr_unix, hours, timetable):\n",
    "    program_starts = np.array(timetable.start_seconds)\n",
    "    idx = (program_starts <= present_time).sum() - 1\n",
    "    program_start = program_starts[idx]\n",
    "    if list(hours[hours <= curr_unix]):\n",
    "        ghour_lt_curr_unix = hours[hours <= curr_unix].max() # the greatest hour less than or equal to curr_unix\n",
    "    else:\n",
    "        ghour_lt_curr_unix = program_start\n",
    "    start_unixes = rhist.start_unix.unique()\n",
    "    start_unixes_lt_ghour = np.sort(start_unixes[start_unixes < ghour_lt_curr_unix]) # start unixes less than ghour_lt_curr_unix\n",
    "    # 기준유닉스(base_unix) : curr_unix보다 작은 hour 중에서 가장 큰 값으로부터 다섯 번째로 작은 start_unix\n",
    "    if len(start_unixes_lt_ghour) > 5:\n",
    "        base_unix = start_unixes_lt_ghour[-5]\n",
    "    # start_unixes_lt_ghour의 길이가 5 미만일 경우에는 맨 앞 start_unix로 base_unix를 지정\n",
    "    else:\n",
    "        base_unix = rhist.start_unix.min()\n",
    "    D_n = curr_unix - base_unix\n",
    "    S_n_durs = rhist[(rhist.start_unix > base_unix) & (rhist.start_unix <= curr_unix)] \\\n",
    "        [[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]]\n",
    "    S_n = S_n_durs.values.sum() // 2\n",
    "    return D_n, S_n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_prow(plan, timetable, inter_no, time):\n",
    "    '''\n",
    "    load planned row\n",
    "    '''\n",
    "    # 프로그램 시작시각\n",
    "    program_starts = np.array(timetable.start_seconds)\n",
    "    idx = (program_starts <= time).sum() - 1\n",
    "    program_start = program_starts[idx]\n",
    "\n",
    "    # 최근 프로그램 시작시각에 대한 신호계획\n",
    "    start_hour = timetable.iloc[idx].start_hour\n",
    "    start_minute = timetable.iloc[idx].start_minute\n",
    "    prow = plan[(plan.inter_no==inter_no) & (plan.start_hour==start_hour) & (plan.start_minute==start_minute)] # planned row\n",
    "    return program_start, prow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_rhistory(plan, timetable, history, present_time, adder):\n",
    "    # 1. 조회시점의 유닉스 타임 이전의 신호이력 수집\n",
    "    rhistory = history.copy() # recent history\n",
    "    rhistory = rhistory[(rhistory.end_unix <= present_time) & (rhistory.end_unix > present_time - 9000)] # 두 시간 반 전부터 현재까지의 신호이력을 가져옴. 9000 = 3600 * 2.5\n",
    "\n",
    "    # rhistory에 모든 교차로번호가 존재하지 않으면 해당 교차로번호에 대한 신호이력을 추가함 (at 최근 프로그램 시작시각)\n",
    "    whole_inter_nos = sorted(history.inter_no.unique())\n",
    "    recent_inter_nos = sorted(rhistory.inter_no.unique())\n",
    "    if not whole_inter_nos==recent_inter_nos:\n",
    "        for inter_no in set(whole_inter_nos) - set(recent_inter_nos):\n",
    "            program_start, prow = load_prow(plan, timetable, inter_no, present_time - 9000)\n",
    "            cycle = prow.cycle.iloc[0]\n",
    "            row1 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()\n",
    "            row2 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()\n",
    "            # prow에서 필요한 부분을 rhistory에 추가\n",
    "            row1['end_unix'] = program_start\n",
    "            row2['end_unix'] = program_start + cycle\n",
    "            rhistory = pd.concat([rhistory, row1, row2]).reset_index(drop=True)\n",
    "    # present_time + adder 의 시각에 한 주기의 신호 추가\n",
    "    for inter_no in set(whole_inter_nos):\n",
    "        program_start, prow = load_prow(plan, timetable, inter_no, present_time)\n",
    "        cycle = prow.cycle.iloc[0]\n",
    "        row3 = prow.drop(['start_hour', 'start_minute'], axis=1).copy()\n",
    "        # prow에서 필요한 부분을 rhistory에 추가\n",
    "        row3['end_unix'] = present_time + adder\n",
    "        rhistory = pd.concat([rhistory, row3]).reset_index(drop=True)\n",
    "\n",
    "    # 2. 시작 유닉스 타임컬럼 생성 후 종류 유닉스 타임에서 현시별 현시기간 컬럼의 합을 뺀 값으로 입력\n",
    "    # - 현시시간의 합을 뺀 시간의 +- 10초 이내에 이전 주기정보가 존재하면 그 유닉스 시간을 시작 유닉스시간 값으로 하고, 존재하지 않으면 현시시간의 합을 뺀 유닉스 시간을 시작 유닉스 시간으로 지정\n",
    "    for i, row in rhistory.iterrows():\n",
    "        inter_no = row.inter_no\n",
    "        end_unix = row.end_unix\n",
    "        elapsed_time = row[[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]].sum() // 2 # 현시시간 합\n",
    "        # 이전 유닉스 존재하지 않음 : 현시시간 합의 차\n",
    "        start_unix = end_unix - elapsed_time\n",
    "        pre_rows = history[:i] # previous rows\n",
    "        if inter_no in pre_rows.inter_no.unique(): # 이전 유닉스 존재\n",
    "            pre_unix = pre_rows[pre_rows.inter_no == inter_no]['end_unix'].iloc[-1] # previous unix time\n",
    "            # 이전 유닉스 존재, abs < 10 : 이전 유닉스\n",
    "            if abs(pre_unix - start_unix) < 10:\n",
    "                start_unix = pre_unix\n",
    "            # 이전 유닉스 존재, abs >=10 : 현시시간 합의 차\n",
    "            else:\n",
    "                pass\n",
    "        rhistory.loc[i, 'start_unix'] = start_unix \n",
    "    rhistory[rhistory.isna()] = 0\n",
    "    rhistory['start_unix'] = rhistory['start_unix'].astype(int)\n",
    "    rhistory = rhistory[['inter_no', 'start_unix'] + [f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)] + ['cycle']]\n",
    "    return rhistory\n",
    "adder = 600"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def processing(plan, rhistory, timetable, hours):\n",
    "    rhists = []\n",
    "    for inter_no in sorted(rhistory.inter_no.unique()):\n",
    "        rhist = rhistory.copy()[rhistory.inter_no==inter_no]\n",
    "        rhist = rhist.drop_duplicates(subset=['start_unix']).reset_index(drop=True)\n",
    "\n",
    "        # D_n 및 S_n 값 정의\n",
    "        rhist['D_n'] = 0 # D_n : 시간차이\n",
    "        rhist['S_n'] = 0 # S_n : 현시시간합\n",
    "        for n in range(len(rhist)):\n",
    "            curr_unix = rhist.iloc[n].start_unix # current start_unix\n",
    "            rhist.loc[n, ['D_n', 'S_n']] = calculate_DS(rhist, curr_unix, hours, timetable)\n",
    "\n",
    "        # 이전시각, 현재시각\n",
    "        prev_unix = rhist.loc[0, 'start_unix'] # previous start_unix\n",
    "        curr_unix = rhist.loc[1, 'start_unix'] # current start_unix\n",
    "\n",
    "        # rhist의 마지막 행에 도달할 때까지 반복\n",
    "        while True:\n",
    "            n = rhist[rhist.start_unix==curr_unix].index[0]\n",
    "            cycle = rhist.loc[n, 'cycle']\n",
    "            D_n = rhist.loc[n, 'D_n']\n",
    "            S_n = rhist.loc[n, 'S_n']\n",
    "            # 참값인 경우\n",
    "            if (abs(D_n - S_n) <= 5):\n",
    "                pass\n",
    "            # 참값이 아닌 경우\n",
    "            else:\n",
    "                # 2-1-1. 결측치 처리 : 인접한 두 start_unix의 차이가 계획된 주기의 두 배보다 크면 결측이 일어났다고 판단, 신호계획의 현시시간으로 \"대체\"\n",
    "                if curr_unix - prev_unix >= 2 * cycle:\n",
    "                    # prev_unix를 계획된 주기만큼 늘려가면서 한 행씩 채워나간다.\n",
    "                    # (curr_unix와의 차이가 계획된 주기보다 작거나 같아질 때까지)\n",
    "                    while curr_unix - prev_unix > cycle:\n",
    "                        prev_unix += cycle\n",
    "                        # 신호 계획(prow) 불러오기\n",
    "                        start_seconds = np.array(timetable.start_seconds)\n",
    "                        idx = (start_seconds <= prev_unix).sum() - 1\n",
    "                        start_hour = timetable.iloc[idx].start_hour\n",
    "                        start_minute = timetable.iloc[idx].start_minute\n",
    "                        prow = plan.copy()[(plan.inter_no==inter_no) & (plan.start_hour==start_hour) & (plan.start_minute==start_minute)] # planned row\n",
    "                        # prow에서 필요한 부분을 rhist에 추가\n",
    "                        prow['start_unix'] = prev_unix\n",
    "                        prow = prow.drop(['start_hour', 'start_minute', 'offset'], axis=1)\n",
    "                        cycle = prow.iloc[0].cycle\n",
    "                        rhist = pd.concat([rhist, prow])\n",
    "                        rhist = rhist.sort_values(by='start_unix').reset_index(drop=True)\n",
    "                        n += 1\n",
    "\n",
    "                # 2-1-2. 이상치 처리 : 비율에 따라 해당 행을 \"삭제\"(R_n <= 0.5) 또는 \"조정\"(R_n > 0.5)한다\n",
    "                R_n = (curr_unix - prev_unix) / cycle # R_n : 비율\n",
    "                # R_n이 0.5보다 작거나 같으면 해당 행을 삭제\n",
    "                if R_n <= 0.5:\n",
    "                    rhist = rhist.drop(index=n).reset_index(drop=True)\n",
    "                    if n >= rhist.index[-1]:\n",
    "                        break\n",
    "                    # 행삭제에 따른 curr_unix, R_n 재정의\n",
    "                    curr_unix = rhist.loc[n, 'start_unix']\n",
    "                    R_n = (curr_unix - prev_unix) / cycle # R_n : 비율\n",
    "\n",
    "                # R_n이 0.5보다 크면 해당 행 조정 (비율을 유지한 채로 현시시간 대체)\n",
    "                if R_n > 0.5:\n",
    "                    # 신호 계획(prow) 불러오기\n",
    "                    start_seconds = np.array(timetable.start_seconds)\n",
    "                    idx = (start_seconds <= curr_unix).sum() - 1\n",
    "                    start_hour = timetable.iloc[idx].start_hour\n",
    "                    start_minute = timetable.iloc[idx].start_minute\n",
    "                    prow = plan[(plan.inter_no==inter_no) & (plan.start_hour==start_hour) & (plan.start_minute==start_minute)] # planned row\n",
    "                    # 조정된 현시시간 (prow에 R_n을 곱하고 정수로 바꿈)\n",
    "                    adjusted_dur = prow.copy()[[f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]] * R_n\n",
    "                    int_parts = adjusted_dur.iloc[0].apply(lambda x: int(x))\n",
    "                    frac_parts = adjusted_dur.iloc[0] - int_parts\n",
    "                    difference = round(adjusted_dur.iloc[0].sum()) - int_parts.sum()\n",
    "                    for _ in range(difference): # 소수 부분이 가장 큰 상위 'difference'개의 값에 대해 올림 처리\n",
    "                        max_frac_index = frac_parts.idxmax()\n",
    "                        int_parts[max_frac_index] += 1\n",
    "                        frac_parts[max_frac_index] = 0  # 이미 처리된 항목은 0으로 설정\n",
    "                    # rhist에 조정된 현시시간을 반영\n",
    "                    rhist.loc[n, [f'dura_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]] = int_parts.values\n",
    "                    rhist.loc[n, 'cycle'] = int_parts.sum().sum() // 2\n",
    "\n",
    "            if n >= rhist.index[-1]:\n",
    "                break\n",
    "            prev_unix = curr_unix\n",
    "            curr_unix = rhist.loc[n+1, 'start_unix']\n",
    "\n",
    "        # 생략해도 무방할 코드\n",
    "        rhist = rhist.reset_index(drop=True)\n",
    "        rhist = rhist.sort_values(by=['start_unix'])\n",
    "\n",
    "        # D_n 및 S_n 값 재정의\n",
    "        for n in range(len(rhist)):\n",
    "            curr_unix = rhist.iloc[n].start_unix # current start_unix\n",
    "            rhist.loc[n, ['D_n', 'S_n']] = calculate_DS(rhist, curr_unix, hours, timetable)\n",
    "        rhists.append(rhist)\n",
    "    rhists = pd.concat(rhists).sort_values(by=['start_unix','inter_no'])\n",
    "    rhists = rhists[rhists.start_unix >= present_time - 3600]\n",
    "    rhists = rhists.drop(columns=['D_n', 'S_n'])\n",
    "    return rhists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_hrhists(rhists, isplits, timetable):\n",
    "    # 계층화된 형태로 변환\n",
    "    hrhists = [] # hierarchied recent history\n",
    "    for i, row in rhists.iterrows():\n",
    "        inter_no = row.inter_no\n",
    "        start_unix = row.start_unix\n",
    "\n",
    "        ind = (timetable['start_seconds'] <= row.start_unix).sum() - 1\n",
    "        start_hour = timetable.iloc[ind].start_hour\n",
    "        start_minute = timetable.iloc[ind].start_minute\n",
    "        isplit = isplits[(inter_no, start_hour, start_minute)]\n",
    "        phas_As = [isplit[j][0] for j in isplit.keys()]\n",
    "        phas_Bs = [isplit[j][1] for j in isplit.keys()]\n",
    "        durs_A = row[[f'dura_A{j}' for j in range(1,9)]]\n",
    "        durs_B = row[[f'dura_B{j}' for j in range(1,9)]]\n",
    "        durations = []\n",
    "        for j in range(1, len(isplit)+1):\n",
    "            ja = isplit[j][0]\n",
    "            jb = isplit[j][1]\n",
    "            if ja == jb:\n",
    "                durations.append(min(durs_A[ja-1], durs_B[jb-1]))\n",
    "            else:\n",
    "                durations.append(abs(durs_A[ja-1] - durs_B[ja-1]))\n",
    "        new_rows = pd.DataFrame({'inter_no':[inter_no] * len(durations), 'start_unix':[start_unix] * len(durations),\n",
    "                                'phas_A':phas_As, 'phas_B':phas_Bs, 'duration':durations})\n",
    "        hrhists.append(new_rows)\n",
    "    hrhists = pd.concat(hrhists)\n",
    "    hrhists = hrhists.sort_values(by = ['start_unix', 'inter_no', 'phas_A', 'phas_B']).reset_index(drop=True)\n",
    "    return hrhists"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def update_movement(hrhists, movement, movements):\n",
    "    # 중복을 제거하고 (inter_no, start_unix) 쌍을 만듭니다.\n",
    "    hrhists_inter_unix = set(hrhists[['inter_no', 'start_unix']].drop_duplicates().itertuples(index=False, name=None))\n",
    "    movement_inter_unix = set(movement[['inter_no', 'start_unix']].drop_duplicates().itertuples(index=False, name=None))\n",
    "\n",
    "    # hrhists에는 있지만 movement에는 없는 (inter_no, start_unix) 쌍을 찾습니다.\n",
    "    missing_in_movement = hrhists_inter_unix - movement_inter_unix\n",
    "\n",
    "    # 새로운 행들을 생성합니다.\n",
    "    new_rows = []\n",
    "    if missing_in_movement:\n",
    "        for inter_no, start_unix in missing_in_movement:\n",
    "            # movements에서 해당 inter_no의 데이터를 찾습니다.\n",
    "            new_row = movements[movements['inter_no'] == inter_no].copy()\n",
    "            # start_unix 값을 설정합니다.\n",
    "            new_row['start_unix'] = start_unix\n",
    "            new_rows.append(new_row)\n",
    "\n",
    "        # 새로운 데이터프레임을 생성하고 기존 movement 데이터프레임과 합칩니다.\n",
    "        new_movement = pd.concat(new_rows, ignore_index=True)\n",
    "        movement_updated = pd.concat([movement, new_movement], ignore_index=True)\n",
    "    else:\n",
    "        movement_updated = movement\n",
    "    return movement_updated"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_histid(present_time, hrhists, movement_updated, inter2node, matching):\n",
    "    # movements and durations\n",
    "    movedur = pd.merge(hrhists, movement_updated, how='inner', on=['inter_no', 'start_unix', 'phas_A', 'phas_B'])\n",
    "    movedur = movedur.sort_values(by=['start_unix', 'inter_no', 'phas_A','phas_B'])\n",
    "    movedur = movedur[['inter_no', 'start_unix', 'phas_A', 'phas_B', 'move_A', 'move_B', 'duration']]\n",
    "\n",
    "    # 이동류 매칭 테이블에서 진입id, 진출id를 가져와서 붙임.\n",
    "    for i, row in movedur.iterrows():\n",
    "        inter_no = row.inter_no\n",
    "        start_unix = row.start_unix\n",
    "        # incoming and outgoing edges A\n",
    "        move_A = row.move_A\n",
    "        if move_A in [17, 18]:\n",
    "            inc_edge_A = np.nan\n",
    "            outhedge_A = np.nan\n",
    "        else:\n",
    "            match_A = matching[(matching.inter_no == inter_no) & (matching.move_no == move_A)].iloc[0]\n",
    "            inc_edge_A = match_A.inc_edge\n",
    "            out_edge_A = match_A.out_edge\n",
    "        movedur.loc[i, ['inc_edge_A', 'out_edge_A']] = [inc_edge_A, out_edge_A]\n",
    "        # incoming and outgoing edges B\n",
    "        move_B = row.move_B\n",
    "        if move_B in [17, 18]:\n",
    "            inc_edge_B = np.nan\n",
    "            out_edge_B = np.nan\n",
    "        else:\n",
    "            match_B = matching[(matching.inter_no == inter_no) & (matching.move_no == move_B)].iloc[0]\n",
    "            inc_edge_B = match_B.inc_edge\n",
    "            out_edge_B = match_B.out_edge\n",
    "        movedur.loc[i, ['inc_edge_B', 'out_edge_B']] = [inc_edge_B, out_edge_B]\n",
    "\n",
    "    # 이동류 컬럼 제거\n",
    "    movedur = movedur.drop(['move_A', 'move_B'], axis=1)\n",
    "\n",
    "    histid = movedur.copy() # history with edge ids (incoming and outgoing edge ids)\n",
    "    histid['node_id'] = histid['inter_no'].map(inter2node)\n",
    "    histid = histid[['inter_no', 'node_id', 'start_unix', 'phas_A', 'phas_B', 'duration', 'inc_edge_A', 'out_edge_A', 'inc_edge_B', 'out_edge_B']]\n",
    "    histid_start = present_time - 600\n",
    "    histid = histid[histid.start_unix > histid_start]\n",
    "    return histid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess(m):\n",
    "   '''\n",
    "   통합테이블(histid)를 만드는 함수\n",
    "\n",
    "   input : m\n",
    "    - m ranges from 0 to 287, but 0 makes an error where 288 = 86400//300\n",
    "    - present_time = fmins[m] : 현재시점\n",
    "\n",
    "   output : histid (통합테이블, HISTory with edge_IDs)\n",
    "    - 컬럼 : inter_no, node_id, start_unix, phas_A, phas_B, duration, inc_edge_A, out_edge_A, inc_edge_B, out_edge_B\n",
    "\n",
    "   주요 데이터, 중간산출물 및 결과물 :\n",
    "   # 데이터\n",
    "    - history : 신호이력 (inter_no, end_unix, dura_Aj, dura_Bj, cycle, offset)\n",
    "    - plan : 신호계획 (inter_no, start_hour, start_minute, dura_Aj, dura_Bj cycle, offset)\n",
    "   # 중간산출물\n",
    "    - rhists (recent history)\n",
    "       - history에서 현재 시각 이전의 데이터를 가져옴.\n",
    "       - end_unix를 start_unix로 변환\n",
    "       - 참값판단 프로세스(결측·이상치 처리)\n",
    "       - 컬럼 : inter_no, start_unix, dura_Aj, dura_Bj, cycle\n",
    "    - hrhists (hierarchized recent history)\n",
    "       - rhists를 계층화\n",
    "       - 컬럼 : inter_no, start_unix, phas_A, phas_B, duration\n",
    "    - movements\n",
    "       - 각 교차로에 대하여 현시별로 이동류를 정해놓음.\n",
    "       - join시 사용하기 위함.\n",
    "       - 한 번 만들어놓고 두고두고 사용함.\n",
    "       - 컬럼 : inter_no, phas_A, phas_B, move_A, move_B\n",
    "    - movement\n",
    "       - 현재 시점에서의 이동류정보\n",
    "       - 컬럼 : inter_no, phas_A, phas_B, move_A, move_B, start_unix\n",
    "    - movement_updated\n",
    "       - movement와 hrhists를 join하기 전에, movement에는 없지만 hrhists에는 있는 start_unix에 대한 이동류 정보를 가져와 movement에 붙임\n",
    "       - 이동류정보는 앞서 정의한 movements에서 가져옴.\n",
    "       - 컬럼 : inter_no, phas_A, phas_B, move_A, move_B, start_unix\n",
    "    - movedur\n",
    "       - hrhists와 movement_updated를 join\n",
    "       - 컬럼 : inter_no, phas_A, phas_B, move_A, move_B, start_unix, duration\n",
    "   # 결과 : histid\n",
    "       - 신호생성에 직접적으로 사용되는 데이터프레임\n",
    "       - 컬럼 : inter_no, node_id, start_unix, phas_A, phas_B, duration, inc_edge_A, out_edge_A, inc_edge_B, out_edge_B\n",
    "       - 한글컬럼 : 교차로번호, 노드id, 시작유닉스, A현시번호, B현시번호, 현시시간, 진입엣지(A), 진출엣지(A), 진입엣지(B), 진출엣지(B)\n",
    "   '''\n",
    "   midnight = int(datetime(2024, 1, 5, 0, 0, 0).timestamp())\n",
    "   next_day = int(datetime(2024, 1, 6, 0, 0, 0).timestamp())\n",
    "   fmins = range(midnight, next_day, 300) # fmins : unix time by Five MINuteS\n",
    "   # 현재시각\n",
    "   present_time = fmins[m]\n",
    "   print(datetime.fromtimestamp(present_time))\n",
    "   # 사용할 표준 테이블 목록\n",
    "   plan = pd.read_csv('../Data/tables/plan.csv', index_col=0)\n",
    "   history = pd.read_csv('../Data/tables/history.csv', index_col=0)\n",
    "   matching = pd.read_csv('../Intermediates/matching.csv', index_col=0)\n",
    "   # 참고할 딕셔너리, 데이터프레임, 리스트 등 목록\n",
    "   splits, isplits = make_splits(plan)\n",
    "   timetable = make_timetable(plan)\n",
    "   inter_node = pd.read_csv('../Data/tables/inter_node.csv', index_col=0)\n",
    "   inter_node = inter_node[inter_node.inter_type=='parent']\n",
    "   inter2node = dict(zip(inter_node['inter_no'], inter_node['node_id']))\n",
    "   hours = np.array(range(midnight - 7200, next_day + 1, 3600)) # 정각에 해당하는 시각들 목록\n",
    "   # rhistory, rhists, hrhists\n",
    "   adder = 600\n",
    "   rhistory = make_rhistory(plan, timetable, history, present_time, adder)\n",
    "   rhists = processing(plan, rhistory, timetable, hours)\n",
    "   hrhists = make_hrhists(rhists, isplits, timetable)\n",
    "   # movements, movement, movement_updated\n",
    "   movements = pd.read_csv('../Intermediates/movements.csv')\n",
    "   movement = pd.read_csv(f'../Intermediates/movement/movement_{present_time}.csv', index_col=0)\n",
    "   movement_updated = update_movement(hrhists, movement, movements)\n",
    "   # movedur\n",
    "   movedur = pd.merge(movement_updated, hrhists, how='inner', on=['inter_no', 'start_unix', 'phas_A', 'phas_B']) # movements and durations\n",
    "   movedur = movedur.sort_values(by=['start_unix', 'inter_no', 'phas_A','phas_B'])\n",
    "   movedur = movedur[['inter_no', 'start_unix', 'phas_A', 'phas_B', 'move_A', 'move_B', 'duration']]\n",
    "   # histid\n",
    "   histid = make_histid(present_time, hrhists, movement_updated, inter2node, matching)\n",
    "   histid.to_csv(f'../Intermediates/histid/histid_{fmins[m]}.csv')\n",
    "   return histid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-01-05 08:45:00\n",
      "2024-01-05 08:50:00\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>inter_no</th>\n",
       "      <th>node_id</th>\n",
       "      <th>start_unix</th>\n",
       "      <th>phas_A</th>\n",
       "      <th>phas_B</th>\n",
       "      <th>duration</th>\n",
       "      <th>inc_edge_A</th>\n",
       "      <th>out_edge_A</th>\n",
       "      <th>inc_edge_B</th>\n",
       "      <th>out_edge_B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>202</td>\n",
       "      <td>i9</td>\n",
       "      <td>1704411610</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>46</td>\n",
       "      <td>571510152_02</td>\n",
       "      <td>-571510152_01</td>\n",
       "      <td>571510152_01</td>\n",
       "      <td>571510152_01.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656</th>\n",
       "      <td>202</td>\n",
       "      <td>i9</td>\n",
       "      <td>1704411610</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>114</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-571510152_01</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>657</th>\n",
       "      <td>175</td>\n",
       "      <td>i0</td>\n",
       "      <td>1704411629</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>-571542797_02</td>\n",
       "      <td>571500487_01</td>\n",
       "      <td>-571500487_01</td>\n",
       "      <td>571542797_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>658</th>\n",
       "      <td>175</td>\n",
       "      <td>i0</td>\n",
       "      <td>1704411629</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>42</td>\n",
       "      <td>-571500487_01</td>\n",
       "      <td>571545870_01</td>\n",
       "      <td>-571542797_02</td>\n",
       "      <td>571510153_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>659</th>\n",
       "      <td>175</td>\n",
       "      <td>i0</td>\n",
       "      <td>1704411629</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>29</td>\n",
       "      <td>571545870_02</td>\n",
       "      <td>571510153_01</td>\n",
       "      <td>571545870_02</td>\n",
       "      <td>571542797_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>871</th>\n",
       "      <td>201</td>\n",
       "      <td>i8</td>\n",
       "      <td>1704412640</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>17</td>\n",
       "      <td>571500583_01</td>\n",
       "      <td>571500617_01</td>\n",
       "      <td>571500583_01</td>\n",
       "      <td>571500569_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>872</th>\n",
       "      <td>206</td>\n",
       "      <td>i7</td>\n",
       "      <td>1704412660</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>25</td>\n",
       "      <td>-571511538_02</td>\n",
       "      <td>571542073_02</td>\n",
       "      <td>571542073_01</td>\n",
       "      <td>571511538_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>873</th>\n",
       "      <td>206</td>\n",
       "      <td>i7</td>\n",
       "      <td>1704412660</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>571542073_02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>874</th>\n",
       "      <td>206</td>\n",
       "      <td>i7</td>\n",
       "      <td>1704412660</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>15</td>\n",
       "      <td>-571511538_02</td>\n",
       "      <td>571542073_02</td>\n",
       "      <td>571542073_01</td>\n",
       "      <td>571511538_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>875</th>\n",
       "      <td>206</td>\n",
       "      <td>i7</td>\n",
       "      <td>1704412660</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>571542073_02</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>221 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     inter_no node_id  start_unix  phas_A  phas_B  duration     inc_edge_A  \\\n",
       "655       202      i9  1704411610       1       1        46   571510152_02   \n",
       "656       202      i9  1704411610       2       2       114            NaN   \n",
       "657       175      i0  1704411629       1       1        40  -571542797_02   \n",
       "658       175      i0  1704411629       2       2        42  -571500487_01   \n",
       "659       175      i0  1704411629       3       3        29   571545870_02   \n",
       "..        ...     ...         ...     ...     ...       ...            ...   \n",
       "871       201      i8  1704412640       5       5        17   571500583_01   \n",
       "872       206      i7  1704412660       1       1        25  -571511538_02   \n",
       "873       206      i7  1704412660       2       2        25            NaN   \n",
       "874       206      i7  1704412660       3       3        15  -571511538_02   \n",
       "875       206      i7  1704412660       4       4        15            NaN   \n",
       "\n",
       "        out_edge_A     inc_edge_B       out_edge_B  \n",
       "655  -571510152_01   571510152_01  571510152_01.65  \n",
       "656  -571510152_01            NaN              NaN  \n",
       "657   571500487_01  -571500487_01     571542797_02  \n",
       "658   571545870_01  -571542797_02     571510153_01  \n",
       "659   571510153_01   571545870_02     571542797_02  \n",
       "..             ...            ...              ...  \n",
       "871   571500617_01   571500583_01     571500569_01  \n",
       "872   571542073_02   571542073_01     571511538_02  \n",
       "873   571542073_02            NaN              NaN  \n",
       "874   571542073_02   571542073_01     571511538_02  \n",
       "875   571542073_02            NaN              NaN  \n",
       "\n",
       "[221 rows x 10 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preprocess(105)\n",
    "preprocess(106)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for m in range(30, 288):\n",
    "#     print(m)\n",
    "#     histid = preprocess(m)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "rts",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/Archives/Scripts/preprocess_daily.ipynb
+++ b/Archives/Scripts/preprocess_daily.ipynb
--- a/Archives/Scripts/preprocess_daily_0.py
+++ b/Archives/Scripts/preprocess_daily_0.py
@ -1,431 +0,0 @@
 import pandas as pd
 import numpy as np
 import os
 import json
 import sumolib
 from tqdm import tqdm

 def check_inter_info(inter_info):
    print(inter_info)
    print('check')

 def make_match1(path_root):
    '''
    신호 DB에는 매 초마다 이동류정보가 업데이트 된다. 그리고 이 이동류정보를 매 5초마다 불러와서 사용하게 된다.
    '../Data/tables/move/'에는 5초마다의 이동류정보가 저장되어 있다.

    return : 통합된 이동류정보
     - 모든 inter_no(교차로번호)에 대한 A, B링 현시별 이동류정보

    match1을 만드는 데 시간이 소요되므로 한 번 만들어서 저장해두고 저장해둔 것을 쓴다.
    '''
    # [이동류번호] 불러오기 (약 1분의 소요시간)
    path_move = os.path.join(path_root, 'Data', 'tables', 'move')
    csv_moves = os.listdir(path_move)
    moves = [pd.read_csv(os.path.join(path_move, csv_move), index_col=0) for csv_move in tqdm(csv_moves, desc='이동류정보 불러오는 중 : match1')]
    match1 = pd.concat(moves).drop_duplicates().sort_values(by=['inter_no','phas_A','phas_B']).reset_index(drop=True)
    match1.to_csv(os.path.join(path_root, 'Intermediates', 'match1.csv'))
    return match1

 def make_match2(match1):
    '''
    match1을 계층화함.
     - match1의 컬럼 : inter_no, phas_A, phas_B, move_A, move_B
     - match2의 컬럼 : inter_no, phase_no, ring_type, move_no
    '''
    # 계층화 (inter_no, phas_A, phas_B, move_A, move_B) -> ('inter_no', 'phase_no', 'ring_type', 'move_no')
    matchA = match1[['inter_no', 'phas_A', 'move_A']].copy()
    matchA.columns = ['inter_no', 'phase_no', 'move_no']
    matchA['ring_type'] = 'A'
    matchB = match1[['inter_no', 'phas_B', 'move_B']].copy()
    matchB.columns = ['inter_no', 'phase_no', 'move_no']
    matchB['ring_type'] = 'B'
    match2 = pd.concat([matchA, matchB]).drop_duplicates()
    match2 = match2[['inter_no', 'phase_no', 'ring_type', 'move_no']]
    match2 = match2.sort_values(by=list(match2.columns))
    return match2

 def make_match3(match2, nema):
    '''
    각 movement들에 방향(진입방향, 진출방향)을 매칭시켜 추가함.
     - match2의 컬럼 : inter_no, phase_no, ring_type, move_no
     - match3의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir

    nema : 
     - 컬럼 : move_no, inc_dir, out_dir
     - 모든 종류의 이동류번호에 대하여 진입방향과 진출방향을 매칭시키는 테이블
     - 이동류번호 : 1 ~ 16, 17, 18, 21
     - 진입, 진출방향(8방위) : 동, 서, 남, 북, 북동, 북서, 남동, 남서
    '''
    # nema 정보 불러오기 및 병합
    match3 = pd.merge(match2, nema, how='left', on='move_no').drop_duplicates()
    return match3

 def make_match4(match3, angle):
    '''
    방위각 정보를 매칭시켜 추가함.
     - match3의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir
     - match4의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle

    angle_original : 
     - 컬럼 : inter_no, angle_Aj, angle_Bj (j : 1 ~ 8)
     - 모든 종류의 이동류번호에 대하여 진입방향과 진출방향을 매칭시키는 테이블
     - 이동류번호 : 1 ~ 16, 17, 18, 21
     - 진입, 진출방향(8방위) : 동, 서, 남, 북, 북동, 북서, 남동, 남서
    '''

    # 계층화
    angles = []
    for i, row in angle.iterrows():
        angle_codes = row[[f'angle_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]]
        new = pd.DataFrame({'inter_no':[row.inter_no] * 16, 'phase_no':list(range(1, 9))*2, 'ring_type':['A'] * 8 + ['B'] * 8, 'angle_code':angle_codes.to_list()})
        angles.append(new)
    angles = pd.concat(angles)
    angles = angles.dropna().reset_index(drop=True)

    # 병합
    six_chars = angles.angle_code.apply(lambda x:len(x)==6)
    angles.loc[six_chars,'inc_angle'] = angles.angle_code.apply(lambda x:x[:3])
    angles.loc[six_chars,'out_angle'] = angles.angle_code.apply(lambda x:x[3:])
    angles = angles.drop('angle_code', axis=1)
    match4 = pd.merge(match3, angles, how='left', left_on=['inter_no', 'phase_no', 'ring_type'],
                    right_on=['inter_no', 'phase_no', 'ring_type']).drop_duplicates()
    return match4

 def make_match5(match4, net, inter_node, inter_info):
    '''
    진입엣지id, 진출엣지id, 노드id를 추가함 (주교차로).
     - match4의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle
     - match5의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle, inc_edge, out_edge, node_id
    
    사용된 데이터 : 
    (1) net
     - 성남시 정자동 부근의 샘플 네트워크
    (2) inter_node
     - 교차로번호와 노드id를 매칭시키는 테이블.
     - parent/child 정보도 포함되어 있음
     - 컬럼 : inter_no, node_id, inter_type
    (3) inter_info
     - 교차로 정보. 여기에서는 위도와 경도가 쓰임.
     - 컬럼 : inter_no, inter_name, inter_lat, inter_lon, group_no, main_phase_no

    진입엣지id, 진출엣지id를 얻는 과정 :
     - match5 = match4.copy()의 각 열을 순회하면서 아래 과정을 반복함.
     * 진입에 대해서만 서술하겠지만 진출도 마찬가지로 설명될 수 있음
     - 해당 행의 교차로정보로부터 노드ID를 얻어내고, 해당 노드에 대한 모든 진출엣지id를 inc_edges에 저장.
     * inc_edge(진입엣지) : incoming edge, out_edge(진출엣지) : outgoing_edge
     - inc_edges의 모든 진입엣지에 대하여 진입방향(inc_dires, 2차원 단위벡터)을 얻어냄.
     - 해당 행의 진입각으로부터 그에 대응되는 진입각방향(단위벡터)를 얻어냄.
     - 주어진 진입각방향에 대하여 내적이 가장 작은 진입방향에 대한 진입엣지를 inc_edge_id로 지정함.
    '''

    # parent node만 가져옴.
    inter_node1 = inter_node[inter_node.inter_type == 'parent'].drop('inter_type', axis=1)
    inter_info1 = inter_info[['inter_no', 'inter_lat', 'inter_lon']]
    inter = pd.merge(inter_node1, inter_info1, how='left', left_on=['inter_no'],
                    right_on=['inter_no']).drop_duplicates()

    inter2node = dict(zip(inter['inter_no'], inter['node_id']))

    match5 = match4.copy()
    # 진입진출ID 매칭
    for index, row in match5.iterrows():
        node_id = inter2node[row.inter_no]
        node = net.getNode(node_id)
        # 교차로의 모든 (from / to) edges
        inc_edges = [edge for edge in node.getIncoming() if edge.getFunction() == ''] # incoming edges
        out_edges = [edge for edge in node.getOutgoing() if edge.getFunction() == ''] # outgoing edges
        # 교차로의 모든 (from / to) directions
        inc_dirs = []
        for inc_edge in inc_edges:
            start = inc_edge.getShape()[-2]
            end = inc_edge.getShape()[-1]
            inc_dir = np.array(end) - np.array(start)
            inc_dir = inc_dir / (inc_dir ** 2).sum() ** 0.5
            inc_dirs.append(inc_dir)
        out_dirs = []
        for out_edge in out_edges:
            start = out_edge.getShape()[0]
            end = out_edge.getShape()[1]
            out_dir = np.array(end) - np.array(start)
            out_dir = out_dir / (out_dir ** 2).sum() ** 0.5
            out_dirs.append(out_dir)
        # 진입각, 진출각 불러오기
        if not pd.isna(row.inc_angle):
            inc_angle = int(row.inc_angle)
            out_angle = int(row.out_angle)
            # 방위각을 일반각으로 가공, 라디안 변환, 단위벡터로 변환
            inc_angle = (-90 - inc_angle) % 360
            inc_angle = inc_angle * np.pi / 180.
            inc_dir_true = np.array([np.cos(inc_angle), np.sin(inc_angle)])
            out_angle = (90 - out_angle) % 360
            out_angle = out_angle * np.pi / 180.
            out_dir_true = np.array([np.cos(out_angle), np.sin(out_angle)])
            # 매칭 엣지 반환
            inc_index = np.array([np.dot(inc_dir, inc_dir_true) for inc_dir in inc_dirs]).argmax()
            out_index = np.array([np.dot(out_dir, out_dir_true) for out_dir in out_dirs]).argmax()
            inc_edge_id = inc_edges[inc_index].getID()
            out_edge_id = out_edges[out_index].getID()
            match5.at[index, 'inc_edge'] = inc_edge_id
            match5.at[index, 'out_edge'] = out_edge_id
    match5['node_id'] = match5['inter_no'].map(inter2node)
    match5 = match5.sort_values(by=['inter_no','phase_no','ring_type']).reset_index(drop=True)
    return match5

 def make_match6(match5, inter_node, uturn, coord, path_root):
    '''
    진입엣지id, 진출엣지id, 노드id를 추가함 (부교차로).
     - match6의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle, inc_edge, out_edge, node_id
    
    사용된 데이터 : 
    (1) inter_node
     - 교차로번호와 노드id를 매칭시키는 테이블.
     - parent/child 정보도 포함되어 있음
     - 컬럼 : inter_no, node_id, inter_type
    (2) uturn (유턴정보)
     - 컬럼 : parent_id, child_id, direction, condition, inc_edge, out_edge
     - parent_id, child_id : 주교차로id, 유턴교차로id
     - direction : 주교차로에 대한 유턴노드의 상대적인 위치(방향)
     - condition : 좌회전시, 직진시, 직좌시, 보행신호시 중 하나
     - inc_edge, out_edge : 유턴에 대한 진입진출엣지
    (3) coord (연동교차로정보)
     - 컬럼 : parent_id, child_id, phase_no, ring_type, inc_edge, out_edge
     - parent_id, child_id : 주교차로id, 연동교차로id
     - 나머지 컬럼 : 각 (현시, 링)별 진입진출엣지

    설명 :
     - match5는 주교차로에 대해서만 진입엣지id, 진출엣지id, 노드id를 추가했었음.
       여기에서 uturn, coord를 사용해서 부교차로들(유턴교차로, 연동교차로)에 대해서도 해당 값들을 부여함.
    유턴교차로 :
     - directions를 정북기준 시계방향의 8방위로 정함.
        - 이를 통해 진입방향이 주어진 경우에 좌회전, 직진, 보행 등에 대한 (진입방향, 진출방향)을 얻어낼 수 있음.
        - 예) 진입방향(direction)이 '북'일 때, 
        - 직진 : (북, 남)
            * 남 : directions[(ind + 4) % len(directions)]
        - 좌회전 : (북, 동)
            * 동 : directions[(ind + 2) % len(directions)]
        - 보행 : (서, 동)
            * 서 : directions[(ind - 2) % len(directions)]
     - uturn의 각 행을 순회하면서 아래 과정을 반복함
        - match5에서 parent_id에 해당하는 행들을 가져옴(cmatch).
        - condition 별로 진입방향, 진출방향A, 진출방향B 정함.
            - 상술한 directions를 활용하여 정함.
        - (진입방향, 진출방향A, 진출방향B)을 고려하여 (현시, 링) 별로 진입엣지id, 진출엣지id를 정함.
            - ex) cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
     - 순회하면서 만든 cmatch를 cmatchs라는 리스트에 저장함.

    연동교차로 :
     - 연동교차로의 경우 coord에 (현시, 링)별 진입엣지ID, 진출엣지ID가 명시되어 있음.
     - 'inc_dir', 'out_dir', 'inc_angle','out_angle'와 같은 열들은 np.nan을 지정해놓음.
     - 이 열들은, 사실상 다음 스텝부터는 사용되지 않는 열들이기 때문에 np.nan으로 지정해놓아도 문제없음.

    match6 :
     - 이렇게 얻은 match5, cmatchs, coord를 모두 pd.concat하여 match6을 얻어냄.
    '''

    node2inter = dict(zip(inter_node['node_id'], inter_node['inter_no']))

    child_ids = inter_node[inter_node.inter_type=='child'].node_id.unique()
    ch2pa = {} # child to parent
    for child_id in child_ids:
        parent_no = inter_node[inter_node.node_id==child_id].inter_no.iloc[0]
        sub_inter_node = inter_node[inter_node.inter_no==parent_no]
        ch2pa[child_id] = sub_inter_node[sub_inter_node.inter_type=='parent'].iloc[0].node_id
    directions = ['북', '북동', '동', '남동', '남', '남서', '서', '북서'] # 정북기준 시계방향으로 8방향

    # 각 uturn node에 대하여 (inc_edge_id, out_edge_id) 부여
    cmatches = []
    for _, row in uturn.iterrows():
        child_id = row.child_id
        parent_id = row.parent_id
        direction = row.direction
        condition = row.condition
        inc_edge_id = row.inc_edge
        out_edge_id = row.out_edge
        # match5에서 parent_id에 해당하는 행들을 가져옴
        cmatch = match5.copy()[match5.node_id==parent_id] # match dataframe for a child node
        cmatch = cmatch.sort_values(by=['phase_no', 'ring_type']).reset_index(drop=True)
        cmatch['node_id'] = child_id
        cmatch[['inc_edge', 'out_edge']] = np.nan

        # condition 별로 inc_dire, out_dire_A, out_dire_B를 정함
        ind = directions.index(direction)
        if condition == "좌회전시":
            inc_dire = direction
            out_dire_A = out_dire_B = directions[(ind + 2) % len(directions)]
        elif condition == "직진시":
            inc_dire = direction
            out_dire_A = out_dire_B = directions[(ind + 4) % len(directions)]
        elif condition == "보행신호시":
            inc_dire = directions[(ind + 2) % len(directions)]
            out_dire_A = directions[(ind - 2) % len(directions)]
            out_dire_B = directions[(ind - 2) % len(directions)]

        # (inc_dire, out_dire_A, out_dire_B) 별로 inc_edge_id, out_edge_id를 정함
        if condition == '보행신호시':
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
            # 이동류번호가 17(보행신호)이면서 유턴노드방향으로 가는 신호가 없으면 (inc_edge_id, out_edge_id)를 부여한다.
            cmatch.loc[(cmatch.move_no==17) & (cmatch.out_dir!=direction), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
        else: # '직진시', '좌회전시'
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
        # 유턴신호의 이동류번호를 19로 부여한다.
        cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), 'move_no'] = 19
        cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), 'move_no'] = 19
        cmatches.append(cmatch)

    # 각 coordination node에 대하여 (inc_edge_id, out_edge_id) 부여
    coord['inter_no'] = coord['parent_id'].map(node2inter)
    coord = coord.rename(columns={'child_id':'node_id'})
    coord[['inc_dir', 'out_dir', 'inc_angle','out_angle']] = np.nan
    coord['move_no'] = 20
    coord = coord[['inter_no', 'phase_no', 'ring_type', 'move_no', 'inc_dir', 'out_dir', 'inc_angle','out_angle', 'inc_edge', 'out_edge', 'node_id']]
    
    # display(coord)
    cmatches = pd.concat(cmatches)
    match6 = pd.concat([match5, cmatches, coord]).drop_duplicates().sort_values(by=['inter_no', 'node_id', 'phase_no', 'ring_type'])
    match6.to_csv(os.path.join(path_root, 'Intermediates', 'match6.csv'))
    return match6

 def make_matching(match6, inter_node, nema, path_root):
    '''
    이동류 매칭 : 각 교차로에 대하여, 가능한 모든 이동류 (1~18, 21)에 대한 진입·진출엣지ID를 지정한다.
    모든 이동류에 대해 지정하므로, 시차제시 이전과 다른 이동류가 등장하더라도 항상 진입·진출 엣지 ID를 지정할 수 있다.    
     - matching의 컬럼 : inter_no, move_no, inc_dir, out_dir, inc_edge, out_edge, node_id
    
    설명 : 
     - 필요한 리스트, 딕셔너리 등을 정의
        (1) 가능한 (진입방향, 진출방향) 목록 [리스트]
        (2) 각 교차로별 방향 목록 : pdires (possible directions) [딕셔너리]
        (3) 각 (교차로, 진입방향) 별 진입id 목록 : inc2id (incoming direction to incoming edge_id) [딕셔너리]
        (4) 각 (교차로, 진출방향) 별 진출id 목록 : out2id (outgoing direction to outgoing edge_id) [딕셔너리]
        (5) 각 교차로별 가능한 (진입방향, 진출방향) 목록 : pflow (possible flows) [딕셔너리]
     - matching은 빈 리스트로 지정.
     - 모든 노드id에 대하여 다음 과정을 반복
     - 해당 노드id에 대한 모든 가능한 (진입방향, 진출방향)에 대하여 다음 과정을 반복
     - (노드id, 진입방향)으로부터 진입엣지id를 얻어냄. 마찬가지로 진출엣지id도 얻어냄
     - 얻어낸 정보를 바탕으로 한 행(new_row)을 만들고 이것을 matching에 append
     '''

    match7 = match6.copy()
    match7 = match7[['inter_no', 'move_no', 'inc_dir', 'out_dir', 'inc_edge', 'out_edge', 'node_id']]

    parent_ids = sorted(inter_node[inter_node.inter_type=='parent'].node_id.unique())
    child_ids = sorted(inter_node[inter_node.inter_type=='child'].node_id.unique())

    # (1) 가능한 (진입방향, 진출방향) 목록 
    flows = nema.dropna().apply(lambda row: (row['inc_dir'], row['out_dir']), axis=1).tolist()
    # (2) 각 교차로별 방향 목록 : pdires (possible directions)
    pdires = {}
    for node_id in parent_ids:
        dires = match7[match7.node_id == node_id][['inc_dir','out_dir']].values.flatten()
        dires = {dire for dire in dires if type(dire)==str}
        pdires[node_id] = dires
    # (3) 각 (교차로, 진입방향) 별 진입id 목록 : inc2id (incoming direction to incoming edge_id)
    inc2id = {}
    for node_id in parent_ids:
        for inc_dir in pdires[node_id]:
            df = match7[(match7.node_id==node_id) & (match7.inc_dir==inc_dir)]
            inc2id[(node_id, inc_dir)] = df.inc_edge.iloc[0]
    # (4) 각 (교차로, 진출방향) 별 진출id 목록 : out2id (outgoing direction to outgoing edge_id)
    out2id = {}
    for node_id in parent_ids:
        for out_dir in pdires[node_id]:
            df = match7[(match7.node_id==node_id) & (match7.out_dir==out_dir)]
            out2id[(node_id, out_dir)] = df.out_edge.iloc[0]
    # (5) 각 교차로별 가능한 (진입방향, 진출방향) 목록 : pflow (possible flows)
    pflow = {}
    for node_id in parent_ids:
        pflow[node_id] = [flow for flow in flows if set(flow).issubset(pdires[node_id])]
    # (6) 가능한 이동류에 대하여 진입id, 진출id 배정 : matching
    node2inter = dict(zip(match7['node_id'], match7['inter_no']))
    dires_right = ['북', '서', '남', '동', '북'] # ex (북, 서), (서, 남) 등은 우회전 flow
    matching = []
    for node_id in parent_ids:
        inter_no = node2inter[node_id]
        # 좌회전과 직진(1 ~ 16)
        for (inc_dir, out_dir) in pflow[node_id]:
            move_no = nema[(nema.inc_dir==inc_dir) & (nema.out_dir==out_dir)].move_no.iloc[0]
            inc_edge = inc2id[(node_id, inc_dir)]
            out_edge = out2id[(node_id, out_dir)]
            new_row = pd.DataFrame({'inter_no':[inter_no], 'move_no':[move_no],
                                    'inc_dir':[inc_dir], 'out_dir':[out_dir],
                                    'inc_edge':[inc_edge], 'out_edge':[out_edge], 'node_id':[node_id]})
            matching.append(new_row)
        # 보행신호(17), 전적색(18)
        new_row = pd.DataFrame({'inter_no':[inter_no] * 2, 'move_no':[17, 18],
                                'inc_dir':[None]*2, 'out_dir':[None]*2,
                                'inc_edge':[None]*2, 'out_edge':[None]*2, 'node_id':[node_id]*2})
        matching.append(new_row)
        # 신호우회전(21)
        for d in range(len(dires_right)-1):
            inc_dir = dires_right[d]
            out_dir = dires_right[d+1]
            if {inc_dir, out_dir}.issubset(pdires[node_id]):
                inc_edge = inc2id[(node_id, inc_dir)]
                out_edge = out2id[(node_id, out_dir)]
                new_row = pd.DataFrame({'inter_no':[inter_no], 'move_no':[21],
                                        'inc_dir':[inc_dir], 'out_dir':[out_dir],
                                        'inc_edge':[inc_edge], 'out_edge':[out_edge], 'node_id':[node_id]})
                matching.append(new_row)
    matching.append(match7[match7.node_id.isin(child_ids)])
    matching = pd.concat(matching)
    matching = matching.dropna().sort_values(by=['inter_no', 'node_id', 'move_no']).reset_index(drop=True)
    matching['move_no'] = matching['move_no'].astype(int)
    matching.to_csv(os.path.join(path_root, 'Intermediates', 'matching.csv'))
    return matching

 def make_movements(path_root):
    movements_path = os.path.join(path_root, 'Intermediates', 'movement')
    movements_list = [pd.read_csv(os.path.join(movements_path, file), index_col=0) for file in tqdm(os.listdir(movements_path), desc='이동류정보 불러오는 중 : movements')]
    movements = pd.concat(movements_list)
    movements = movements.drop(columns=['start_unix'])
    movements = movements.drop_duplicates()
    movements = movements.sort_values(by=['inter_no', 'phas_A', 'phas_B'])
    movements = movements.reset_index(drop=True)
    movements.to_csv(os.path.join(path_root, 'Intermediates', 'movements.csv'))
    return movements

 # node2num_cycles : A dictionary that maps a node_id to the number of cycles
 def get_node2num_cycles(plan, inter_node, path_root):
    node2inter = dict(zip(inter_node['node_id'], inter_node['inter_no']))
    node_ids = sorted(inter_node.node_id.unique())

    Aplan = plan.copy()[['inter_no'] + [f'dura_A{j}' for j in range(1,9)] + ['cycle']]
    grouped = Aplan.groupby('inter_no')
    df = grouped.agg({'cycle': 'min'}).reset_index()
    df = df.rename(columns={'cycle': 'min_cycle'})
    df['num_cycle'] = 300 // df['min_cycle'] + 2
    inter2num_cycles = dict(zip(df['inter_no'], df['num_cycle']))
    node2numcycles = {node_id : inter2num_cycles[node2inter[node_id]] for node_id in node_ids}
    with open(os.path.join('Intermediates','node2numcycles.json'), 'w') as file:
        json.dump(node2numcycles, file, indent=4)
    return node2numcycles

 def main():
    path_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    inter_info = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'inter_info.csv'))
    check_inter_info(inter_info)
    angle = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'angle.csv'), dtype = {f'angle_{alph}{j}':'str' for alph in ['A', 'B'] for j in range(1,9)})
    plan = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'plan.csv'))
    inter_node = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'inter_node.csv'))
    uturn = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'child_uturn.csv'))
    coord = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'child_coord.csv'))
    nema = pd.read_csv(os.path.join(path_root, 'Data', 'tables', 'nema.csv'), encoding='cp949')

    net = sumolib.net.readNet(os.path.join(path_root, 'Data', 'networks', 'sn.net.xml'))

    match1 = make_match1(path_root)
    match2 = make_match2(match1)
    match3 = make_match3(match2, nema)
    match4 = make_match4(match3, angle)
    match5 = make_match5(match4, net, inter_node, inter_info)
    match6 = make_match6(match5, inter_node, uturn, coord, path_root)
    matching = make_matching(match6, inter_node, nema, path_root)
    movements = make_movements(path_root)
    node2num_cycles = get_node2num_cycles(plan, inter_node, path_root)

 if __name__ == '__main__':
    main()
--- a/Archives/Scripts/preprocess_daily_1.py
+++ b/Archives/Scripts/preprocess_daily_1.py
@ -1,587 +0,0 @@
 import pandas as pd
 import numpy as np
 import os, sys, traci
 import json
 import sumolib
 from tqdm import tqdm

 class DailyPreprocessor():
    def __init__(self):
        self.path_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        self.issues = []
    
    # 1. 데이터 불러오기
    def load_data(self):
        self.load_networks()
        self.load_tables()
        self.check_networks()
        self.check_tables()
        print('1. 모든 데이터가 로드되었습니다.')

    # 1-1. 네트워크 불러오기
    def load_networks(self):
        self.net = sumolib.net.readNet(os.path.join(self.path_root, 'Data', 'networks', 'sn.net.xml'))
        print("1-1. 네트워크가 로드되었습니다.")

    # 1-2. 테이블 불러오기
    def load_tables(self):
        # 모든 컬럼에 대하여 데이터타입 지정
        loading_dtype = {
            'inter_no':'int', 'start_hour':'int', 'start_minute':'int', 'cycle':'int','offset':'int',
            'node_id':'str', 'inter_type':'str', 'parent_id':'str','child_id':'str',
            'direction':'str', 'condition':'str', 'inc_edge':'str', 'out_edge':'str',
            'end_unix':'int', 'inter_name':'str', 'inter_lat':'float', 'inter_lon':'float',
            'group_no':'int', 'main_phase_no':'int', 'phase_no':'int','ring_type':'str'
            }
        for alph in ['A', 'B']:
            for j in range(1,9):
                loading_dtype[f'angle_{alph}{j}'] = 'str'
                loading_dtype[f'dura_{alph}{j}'] = 'int'

        self.path_table = os.path.join(self.path_root, 'Data', 'tables')

        self.inter_info = pd.read_csv(os.path.join(self.path_table, 'inter_info.csv'), dtype=loading_dtype)
        self.angle      = pd.read_csv(os.path.join(self.path_table, 'angle.csv'), dtype=loading_dtype)
        self.plan       = pd.read_csv(os.path.join(self.path_table, 'plan.csv'), dtype=loading_dtype)
        self.inter_node = pd.read_csv(os.path.join(self.path_table, 'inter_node.csv'), dtype=loading_dtype)
        self.uturn      = pd.read_csv(os.path.join(self.path_table, 'child_uturn.csv'), dtype=loading_dtype)
        self.coord      = pd.read_csv(os.path.join(self.path_table, 'child_coord.csv'), dtype=loading_dtype)
        self.nema       = pd.read_csv(os.path.join(self.path_table, 'nema.csv'), encoding='cp949', dtype=loading_dtype)
        print("1-2. 테이블들이 로드되었습니다.")

    # 1-3. 테이블 불러오기
    def check_networks(self):
        # https://sumo.dlr.de/docs/Netedit/neteditUsageExamples.html#simplify_tls_program_state_after_changing_connections
        if 'SUMO_HOME' in os.environ:
            tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
            if tools not in sys.path:
                sys.path.append(tools)
        else:
            raise EnvironmentError("please declare environment variable 'SUMO_HOME'")
        traci.start([sumolib.checkBinary('sumo'), "-n", os.path.join(self.path_root, 'Data', 'networks', 'sn.net.xml')])
        nodes = [node for node in self.net.getNodes() if node.getType()=='traffic_light']
        for node in nodes:
            node_id = node.getID()
            from_xml   = len([c for c in node.getConnections() if c.getTLLinkIndex() >= 0])
            from_traci = len(traci.trafficlight.getRedYellowGreenState(node_id))
            if from_xml != from_traci:
                sub = {'id': node_id, 'type': 'node', 'note': '유효하지 않은 연결이있음. netedit에서 clean states 필요.'}
                self.issues.append(sub)
        traci.close()
        print("1-3. 네트워크의 모든 clean state requirement들을 체크했습니다.")
    
    # 1-4. 테이블의 무결성 검사
    def check_tables(self):
        self.check_inter_info()
        self.check_angle()
        self.check_plan()
        print("1-4. 모든 테이블들의 무결성을 검사했고 이상 없습니다.")
        pass

    # 1-4-1. 교차로정보(inter_info) 검사
    def check_inter_info(self):
        # 1-4-1-1. inter_lat, inter_lon 적절성 검사
        self.inter_info.loc[0, 'inter_lat'] = 38.0 # 에러 발생을 위한 코드
        self.max_lon, self.min_lon = 127.207888, 127.012492
        self.max_lat, self.min_lat = 37.480693, 37.337112
        for _, row in self.inter_info.iterrows():
            latbool = self.min_lat <= row['inter_lat'] <= self.max_lat
            lonbool = self.min_lon <= row['inter_lon'] <= self.max_lon
            if not(latbool and lonbool):
                msg = f"1-4-1-1. 위도 또는 경도가 범위를 벗어난 교차로가 있습니다: inter_no : {row['inter_no']}"
                self.issues.append(msg)
        # 교차로목록 정의
        self.inter_nos = sorted(self.inter_info.inter_no.unique())

    # 1-4-2. 방위각정보(inter_info) 검사
    def check_angle(self):
        # 1-4-2-1. inter_no 검사
        # self.angle.loc[0, 'inter_no'] = '4' # 에러 발생을 위한 코드
        missing_inter_nos = set(self.angle.inter_no) - set(self.inter_nos)
        if missing_inter_nos:
            msg = f"1-4-2-1. angle의 inter_no 중 교차로 목록(inter_nos)에 포함되지 않는 항목이 있습니다: {missing_inter_nos}"
            self.issues.append(msg)

    # 1-4-3. 신호계획(plan) 검사
    def check_plan(self):
        # 1-4-3-1. inter_no 검사
        # self.plan.loc[0, 'inter_no'] = '4' # 에러 발생을 위한 코드
        missing_inter_nos = set(self.plan.inter_no) - set(self.inter_nos)
        if missing_inter_nos:
            msg = f"1-4-3-1. plan의 inter_no 중 교차로 목록(inter_nos)에 포함되지 않는 항목이 있습니다: {missing_inter_nos}"
            self.issues.append(msg)

        # 1-4-3-2. 시작시각 검사
        # self.plan.loc[0, 'start_hour'] = 27 # 에러 발생을 위한 코드
        for _, row in self.plan.iterrows():
            start_hour = row.start_hour
            start_minute = row.start_minute
            if not (0 <= start_hour <= 23) or not (0 <= start_minute <= 59):
                msg = f"1-4-3-2. plan에 잘못된 형식의 start_time이 존재합니다: {start_hour, start_minute}"
                self.issues.append(msg)

        # 1-4-3-3. 현시시간 검사
        # self.plan.loc[0, 'dura_A1'] = -2 # 에러 발생을 위한 코드
        durations = self.plan[[f'dura_{alph}{j}' for alph in ['A','B'] for j in range(1, 9)]]
        valid_indices = ((durations >= 0) & (durations <= 200)).all(axis=1)
        invalid_inter_nos = sorted(self.plan[~ valid_indices].inter_no.unique())
        if invalid_inter_nos:
            msg = f"1-4-3-3. 음수이거나 200보다 큰 현시시간이 존재합니다. : {invalid_inter_nos}"
        
        # 1-4-3-4. 주기 일관성 검사
        # self.plan.loc[0, 'cycle'] = 50 # 에러 발생을 위한 코드
        inconsistent_cycle = self.plan.groupby(['inter_no', 'start_hour', 'start_minute'])['cycle'].nunique().gt(1)
        if inconsistent_cycle.any():
            inc_inter_no, start_hour, start_minute = inconsistent_cycle[inconsistent_cycle].index[0]
            msg = f"1-4-3-4. inter_no:{inc_inter_no}, start_hour:{start_minute}, start_hour:{start_minute}일 때, cycle이 유일하게 결정되지 않습니다."
            self.issues.append(msg)

        # 1-4-3-5. 현시시간 / 주기 검사
        # self.plan.loc[0, 'duration'] = 10 # 에러 발생을 위한 코드
        right_duration = True
        for (inter_no, start_hour, start_minute), group in self.plan.groupby(['inter_no', 'start_hour', 'start_minute']):
            A_sum = group[[f'dura_A{j}' for j in range(1, 9)]].iloc[0].sum()
            B_sum = group[[f'dura_B{j}' for j in range(1, 9)]].iloc[0].sum()
            # A_sum = group[group['ring_type']=='A']['duration'].sum()
            # B_sum = group[group['ring_type']=='B']['duration'].sum()
            cycle = group['cycle'].unique()[0]
            if not (A_sum == B_sum == cycle):
                right_duration = False
                inc_inter_no = inter_no
        if not right_duration:
            msg = f"1-4-4-5. inter_no:{inc_inter_no}, A링현시시간의 합과 B링현시시간의 합이 일치하지 않거나, 현시시간의 합과 주기가 일치하지 않습니다."
            self.issues.append(msg)

    # 2. 중간산출물 만들기
    def get_intermediates(self):
        self.get_matches()
        # self.get_movements()
        self.get_node2num_cycles()

    # 2-1 매칭테이블들 생성
    def get_matches(self):
        self.make_match1()
        self.make_match2()
        self.make_match3()
        self.make_match4()
        self.make_match5()
        self.make_match6()
        self.make_matching()
        
    # 2-1-1
    def make_match1(self):
        '''
        신호 DB에는 매 초마다 이동류정보가 업데이트 된다. 그리고 이 이동류정보를 매 5초마다 불러와서 사용하게 된다.
        '../Data/tables/move/'에는 5초마다의 이동류정보가 저장되어 있다.

        return : 통합된 이동류정보
        - 모든 inter_no(교차로번호)에 대한 A, B링 현시별 이동류정보

        match1을 만드는 데 시간이 소요되므로 한 번 만들어서 저장해두고 저장해둔 것을 쓴다.
        '''
        # [이동류번호] 불러오기 (약 1분의 소요시간)
        path_move = os.path.join(self.path_root, 'Data', 'tables', 'move')
        csv_moves = os.listdir(path_move)
        moves = [pd.read_csv(os.path.join(path_move, csv_move), index_col=0) for csv_move in tqdm(csv_moves, desc='이동류정보 불러오는 중 : match1')]
        self.match1 = pd.concat(moves).drop_duplicates().sort_values(by=['inter_no','phas_A','phas_B']).reset_index(drop=True)
        self.match1.to_csv(os.path.join(self.path_root, 'Intermediates', 'match1.csv'))

    # 2-1-2
    def make_match2(self):
        '''
        match1을 계층화함.
        - match1의 컬럼 : inter_no, phas_A, phas_B, move_A, move_B
        - match2의 컬럼 : inter_no, phase_no, ring_type, move_no
        '''
        # 계층화 (inter_no, phas_A, phas_B, move_A, move_B) -> ('inter_no', 'phase_no', 'ring_type', 'move_no')
        matchA = self.match1[['inter_no', 'phas_A', 'move_A']].copy()
        matchA.columns = ['inter_no', 'phase_no', 'move_no']
        matchA['ring_type'] = 'A'
        matchB = self.match1[['inter_no', 'phas_B', 'move_B']].copy()
        matchB.columns = ['inter_no', 'phase_no', 'move_no']
        matchB['ring_type'] = 'B'
        self.match2 = pd.concat([matchA, matchB]).drop_duplicates()
        self.match2 = self.match2[['inter_no', 'phase_no', 'ring_type', 'move_no']]
        self.match2 = self.match2.sort_values(by=list(self.match2.columns))

    # 2-1-3
    def make_match3(self):
        '''
        각 movement들에 방향(진입방향, 진출방향)을 매칭시켜 추가함.
        - match2의 컬럼 : inter_no, phase_no, ring_type, move_no
        - match3의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir

        nema : 
        - 컬럼 : move_no, inc_dir, out_dir
        - 모든 종류의 이동류번호에 대하여 진입방향과 진출방향을 매칭시키는 테이블
        - 이동류번호 : 1 ~ 16, 17, 18, 21
        - 진입, 진출방향(8방위) : 동, 서, 남, 북, 북동, 북서, 남동, 남서
        '''
        # nema 정보 불러오기 및 병합
        self.match3 = pd.merge(self.match2, self.nema, how='left', on='move_no').drop_duplicates()

    # 2-1-4
    def make_match4(self):
        '''
        방위각 정보를 매칭시켜 추가함.
        - match3의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir
        - match4의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle

        angle_original : 
        - 컬럼 : inter_no, angle_Aj, angle_Bj (j : 1 ~ 8)
        - 모든 종류의 이동류번호에 대하여 진입방향과 진출방향을 매칭시키는 테이블
        - 이동류번호 : 1 ~ 16, 17, 18, 21
        - 진입, 진출방향(8방위) : 동, 서, 남, 북, 북동, 북서, 남동, 남서
        '''

        # 계층화
        angles = []
        for i, row in self.angle.iterrows():
            angle_codes = row[[f'angle_{alph}{j}' for alph in ['A', 'B'] for j in range(1,9)]]
            new = pd.DataFrame({'inter_no':[row.inter_no] * 16, 'phase_no':list(range(1, 9))*2, 'ring_type':['A'] * 8 + ['B'] * 8, 'angle_code':angle_codes.to_list()})
            angles.append(new)
        angles = pd.concat(angles)
        angles = angles.dropna().reset_index(drop=True)

        # 병합
        six_chars = angles.angle_code.apply(lambda x:len(x)==6)
        angles.loc[six_chars,'inc_angle'] = angles.angle_code.apply(lambda x:x[:3])
        angles.loc[six_chars,'out_angle'] = angles.angle_code.apply(lambda x:x[3:])
        angles = angles.drop('angle_code', axis=1)
        self.match4 = pd.merge(self.match3, angles, how='left', left_on=['inter_no', 'phase_no', 'ring_type'],
                        right_on=['inter_no', 'phase_no', 'ring_type']).drop_duplicates()

    # 2-1-5
    def make_match5(self):
        '''
        진입엣지id, 진출엣지id, 노드id를 추가함 (주교차로).
        - match4의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle
        - match5의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle, inc_edge, out_edge, node_id
        
        사용된 데이터 : 
        (1) net
        - 성남시 정자동 부근의 샘플 네트워크
        (2) inter_node
        - 교차로번호와 노드id를 매칭시키는 테이블.
        - parent/child 정보도 포함되어 있음
        - 컬럼 : inter_no, node_id, inter_type
        (3) inter_info
        - 교차로 정보. 여기에서는 위도와 경도가 쓰임.
        - 컬럼 : inter_no, inter_name, inter_lat, inter_lon, group_no, main_phase_no

        진입엣지id, 진출엣지id를 얻는 과정 :
        - match5 = match4.copy()의 각 열을 순회하면서 아래 과정을 반복함.
        * 진입에 대해서만 서술하겠지만 진출도 마찬가지로 설명될 수 있음
        - 해당 행의 교차로정보로부터 노드ID를 얻어내고, 해당 노드에 대한 모든 진출엣지id를 inc_edges에 저장.
        * inc_edge(진입엣지) : incoming edge, out_edge(진출엣지) : outgoing_edge
        - inc_edges의 모든 진입엣지에 대하여 진입방향(inc_dires, 2차원 단위벡터)을 얻어냄.
        - 해당 행의 진입각으로부터 그에 대응되는 진입각방향(단위벡터)를 얻어냄.
        - 주어진 진입각방향에 대하여 내적이 가장 작은 진입방향에 대한 진입엣지를 inc_edge_id로 지정함.
        '''

        # parent node만 가져옴.
        inter_node1 = self.inter_node[self.inter_node.inter_type == 'parent'].drop('inter_type', axis=1)
        inter_info1 = self.inter_info[['inter_no', 'inter_lat', 'inter_lon']]
        inter = pd.merge(inter_node1, inter_info1, how='left', left_on=['inter_no'],
                        right_on=['inter_no']).drop_duplicates()

        self.inter2node = dict(zip(inter['inter_no'], inter['node_id']))

        self.match5 = self.match4.copy()
        # 진입진출ID 매칭
        for index, row in self.match5.iterrows():
            node_id = self.inter2node[row.inter_no]
            node = self.net.getNode(node_id)
            # 교차로의 모든 (from / to) edges
            inc_edges = [edge for edge in node.getIncoming() if edge.getFunction() == ''] # incoming edges
            out_edges = [edge for edge in node.getOutgoing() if edge.getFunction() == ''] # outgoing edges
            # 교차로의 모든 (from / to) directions
            inc_dirs = []
            for inc_edge in inc_edges:
                start = inc_edge.getShape()[-2]
                end = inc_edge.getShape()[-1]
                inc_dir = np.array(end) - np.array(start)
                inc_dir = inc_dir / (inc_dir ** 2).sum() ** 0.5
                inc_dirs.append(inc_dir)
            out_dirs = []
            for out_edge in out_edges:
                start = out_edge.getShape()[0]
                end = out_edge.getShape()[1]
                out_dir = np.array(end) - np.array(start)
                out_dir = out_dir / (out_dir ** 2).sum() ** 0.5
                out_dirs.append(out_dir)
            # 진입각, 진출각 불러오기
            if not pd.isna(row.inc_angle):
                inc_angle = int(row.inc_angle)
                out_angle = int(row.out_angle)
                # 방위각을 일반각으로 가공, 라디안 변환, 단위벡터로 변환
                inc_angle = (-90 - inc_angle) % 360
                inc_angle = inc_angle * np.pi / 180.
                inc_dir_true = np.array([np.cos(inc_angle), np.sin(inc_angle)])
                out_angle = (90 - out_angle) % 360
                out_angle = out_angle * np.pi / 180.
                out_dir_true = np.array([np.cos(out_angle), np.sin(out_angle)])
                # 매칭 엣지 반환
                inc_index = np.array([np.dot(inc_dir, inc_dir_true) for inc_dir in inc_dirs]).argmax()
                out_index = np.array([np.dot(out_dir, out_dir_true) for out_dir in out_dirs]).argmax()
                inc_edge_id = inc_edges[inc_index].getID()
                out_edge_id = out_edges[out_index].getID()
                self.match5.at[index, 'inc_edge'] = inc_edge_id
                self.match5.at[index, 'out_edge'] = out_edge_id
        self.match5['node_id'] = self.match5['inter_no'].map(self.inter2node)
        self.match5 = self.match5.sort_values(by=['inter_no','phase_no','ring_type']).reset_index(drop=True)

    # 2-1-6
    def make_match6(self):
        '''
        진입엣지id, 진출엣지id, 노드id를 추가함 (부교차로).
        - match6의 컬럼 : inter_no, phase_no, ring_type, move_no, inc_dir, out_dir, inc_angle, out_angle, inc_edge, out_edge, node_id
        
        사용된 데이터 : 
        (1) inter_node
        - 교차로번호와 노드id를 매칭시키는 테이블.
        - parent/child 정보도 포함되어 있음
        - 컬럼 : inter_no, node_id, inter_type
        (2) uturn (유턴정보)
        - 컬럼 : parent_id, child_id, direction, condition, inc_edge, out_edge
        - parent_id, child_id : 주교차로id, 유턴교차로id
        - direction : 주교차로에 대한 유턴노드의 상대적인 위치(방향)
        - condition : 좌회전시, 직진시, 직좌시, 보행신호시 중 하나
        - inc_edge, out_edge : 유턴에 대한 진입진출엣지
        (3) coord (연동교차로정보)
        - 컬럼 : parent_id, child_id, phase_no, ring_type, inc_edge, out_edge
        - parent_id, child_id : 주교차로id, 연동교차로id
        - 나머지 컬럼 : 각 (현시, 링)별 진입진출엣지

        설명 :
        - match5는 주교차로에 대해서만 진입엣지id, 진출엣지id, 노드id를 추가했었음.
        여기에서 uturn, coord를 사용해서 부교차로들(유턴교차로, 연동교차로)에 대해서도 해당 값들을 부여함.
        유턴교차로 :
        - directions를 정북기준 시계방향의 8방위로 정함.
            - 이를 통해 진입방향이 주어진 경우에 좌회전, 직진, 보행 등에 대한 (진입방향, 진출방향)을 얻어낼 수 있음.
            - 예) 진입방향(direction)이 '북'일 때, 
            - 직진 : (북, 남)
                * 남 : directions[(ind + 4) % len(directions)]
            - 좌회전 : (북, 동)
                * 동 : directions[(ind + 2) % len(directions)]
            - 보행 : (서, 동)
                * 서 : directions[(ind - 2) % len(directions)]
        - uturn의 각 행을 순회하면서 아래 과정을 반복함
            - match5에서 parent_id에 해당하는 행들을 가져옴(cmatch).
            - condition 별로 진입방향, 진출방향A, 진출방향B 정함.
                - 상술한 directions를 활용하여 정함.
            - (진입방향, 진출방향A, 진출방향B)을 고려하여 (현시, 링) 별로 진입엣지id, 진출엣지id를 정함.
                - ex) cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
        - 순회하면서 만든 cmatch를 cmatchs라는 리스트에 저장함.

        연동교차로 :
        - 연동교차로의 경우 coord에 (현시, 링)별 진입엣지ID, 진출엣지ID가 명시되어 있음.
        - 'inc_dir', 'out_dir', 'inc_angle','out_angle'와 같은 열들은 np.nan을 지정해놓음.
        - 이 열들은, 사실상 다음 스텝부터는 사용되지 않는 열들이기 때문에 np.nan으로 지정해놓아도 문제없음.

        match6 :
        - 이렇게 얻은 match5, cmatchs, coord를 모두 pd.concat하여 match6을 얻어냄.
        '''

        self.node2inter = dict(zip(self.inter_node['node_id'], self.inter_node['inter_no']))

        child_ids = self.inter_node[self.inter_node.inter_type=='child'].node_id.unique()
        ch2pa = {} # child to parent
        for child_id in child_ids:
            parent_no = self.inter_node[self.inter_node.node_id==child_id].inter_no.iloc[0]
            sub_inter_node = self.inter_node[self.inter_node.inter_no==parent_no]
            ch2pa[child_id] = sub_inter_node[sub_inter_node.inter_type=='parent'].iloc[0].node_id
        directions = ['북', '북동', '동', '남동', '남', '남서', '서', '북서'] # 정북기준 시계방향으로 8방향

        # 각 uturn node에 대하여 (inc_edge_id, out_edge_id) 부여
        cmatches = []
        for _, row in self.uturn.iterrows():
            child_id = row.child_id
            parent_id = row.parent_id
            direction = row.direction
            condition = row.condition
            inc_edge_id = row.inc_edge
            out_edge_id = row.out_edge
            # match5에서 parent_id에 해당하는 행들을 가져옴
            cmatch = self.match5.copy()[self.match5.node_id==parent_id] # match dataframe for a child node
            cmatch = cmatch.sort_values(by=['phase_no', 'ring_type']).reset_index(drop=True)
            cmatch['node_id'] = child_id
            cmatch[['inc_edge', 'out_edge']] = np.nan

            # condition 별로 inc_dire, out_dire_A, out_dire_B를 정함
            ind = directions.index(direction)
            if condition == "좌회전시":
                inc_dire = direction
                out_dire_A = out_dire_B = directions[(ind + 2) % len(directions)]
            elif condition == "직진시":
                inc_dire = direction
                out_dire_A = out_dire_B = directions[(ind + 4) % len(directions)]
            elif condition == "보행신호시":
                inc_dire = directions[(ind + 2) % len(directions)]
                out_dire_A = directions[(ind - 2) % len(directions)]
                out_dire_B = directions[(ind - 2) % len(directions)]

            # (inc_dire, out_dire_A, out_dire_B) 별로 inc_edge_id, out_edge_id를 정함
            if condition == '보행신호시':
                cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
                cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
                # 이동류번호가 17(보행신호)이면서 유턴노드방향으로 가는 신호가 없으면 (inc_edge_id, out_edge_id)를 부여한다.
                cmatch.loc[(cmatch.move_no==17) & (cmatch.out_dir!=direction), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
            else: # '직진시', '좌회전시'
                cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
                cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), ['inc_edge', 'out_edge']] = [inc_edge_id, out_edge_id]
            # 유턴신호의 이동류번호를 19로 부여한다.
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_A), 'move_no'] = 19
            cmatch.loc[(cmatch.inc_dir==inc_dire) & (cmatch.out_dir==out_dire_B), 'move_no'] = 19
            cmatches.append(cmatch)

        # 각 coordination node에 대하여 (inc_edge_id, out_edge_id) 부여
        self.coord['inter_no'] = self.coord['parent_id'].map(self.node2inter)
        self.coord = self.coord.rename(columns={'child_id':'node_id'})
        self.coord[['inc_dir', 'out_dir', 'inc_angle','out_angle']] = np.nan
        self.coord['move_no'] = 20
        self.coord = self.coord[['inter_no', 'phase_no', 'ring_type', 'move_no', 'inc_dir', 'out_dir', 'inc_angle','out_angle', 'inc_edge', 'out_edge', 'node_id']]
        
        # display(coord)
        cmatches = pd.concat(cmatches)
        self.match6 = pd.concat([self.match5, cmatches, self.coord]).drop_duplicates().sort_values(by=['inter_no', 'node_id', 'phase_no', 'ring_type'])
        self.match6.to_csv(os.path.join(self.path_root, 'Intermediates', 'match6.csv'))

    # 2-1-7
    def make_matching(self):
        '''
        이동류 매칭 : 각 교차로에 대하여, 가능한 모든 이동류 (1~18, 21)에 대한 진입·진출엣지ID를 지정한다.
        모든 이동류에 대해 지정하므로, 시차제시 이전과 다른 이동류가 등장하더라도 항상 진입·진출 엣지 ID를 지정할 수 있다.    
        - matching의 컬럼 : inter_no, move_no, inc_dir, out_dir, inc_edge, out_edge, node_id
        
        설명 : 
        - 필요한 리스트, 딕셔너리 등을 정의
            (1) 가능한 (진입방향, 진출방향) 목록 [리스트]
            (2) 각 교차로별 방향 목록 : pdires (possible directions) [딕셔너리]
            (3) 각 (교차로, 진입방향) 별 진입id 목록 : inc2id (incoming direction to incoming edge_id) [딕셔너리]
            (4) 각 (교차로, 진출방향) 별 진출id 목록 : out2id (outgoing direction to outgoing edge_id) [딕셔너리]
            (5) 각 교차로별 가능한 (진입방향, 진출방향) 목록 : pflow (possible flows) [딕셔너리]
        - matching은 빈 리스트로 지정.
        - 모든 노드id에 대하여 다음 과정을 반복
        - 해당 노드id에 대한 모든 가능한 (진입방향, 진출방향)에 대하여 다음 과정을 반복
        - (노드id, 진입방향)으로부터 진입엣지id를 얻어냄. 마찬가지로 진출엣지id도 얻어냄
        - 얻어낸 정보를 바탕으로 한 행(new_row)을 만들고 이것을 matching에 append
        '''

        self.match7 = self.match6.copy()
        self.match7 = self.match7[['inter_no', 'move_no', 'inc_dir', 'out_dir', 'inc_edge', 'out_edge', 'node_id']]

        parent_ids = sorted(self.inter_node[self.inter_node.inter_type=='parent'].node_id.unique())
        child_ids = sorted(self.inter_node[self.inter_node.inter_type=='child'].node_id.unique())

        # (1) 가능한 (진입방향, 진출방향) 목록 
        flows = self.nema.dropna().apply(lambda row: (row['inc_dir'], row['out_dir']), axis=1).tolist()
        # (2) 각 교차로별 방향 목록 : pdires (possible directions)
        pdires = {}
        for node_id in parent_ids:
            dires = self.match7[self.match7.node_id == node_id][['inc_dir','out_dir']].values.flatten()
            dires = {dire for dire in dires if type(dire)==str}
            pdires[node_id] = dires
        # (3) 각 (교차로, 진입방향) 별 진입id 목록 : inc2id (incoming direction to incoming edge_id)
        inc2id = {}
        for node_id in parent_ids:
            for inc_dir in pdires[node_id]:
                df = self.match7[(self.match7.node_id==node_id) & (self.match7.inc_dir==inc_dir)]
                inc2id[(node_id, inc_dir)] = df.inc_edge.iloc[0]
        # (4) 각 (교차로, 진출방향) 별 진출id 목록 : out2id (outgoing direction to outgoing edge_id)
        out2id = {}
        for node_id in parent_ids:
            for out_dir in pdires[node_id]:
                df = self.match7[(self.match7.node_id==node_id) & (self.match7.out_dir==out_dir)]
                out2id[(node_id, out_dir)] = df.out_edge.iloc[0]
        # (5) 각 교차로별 가능한 (진입방향, 진출방향) 목록 : pflow (possible flows)
        pflow = {}
        for node_id in parent_ids:
            pflow[node_id] = [flow for flow in flows if set(flow).issubset(pdires[node_id])]
        # (6) 가능한 이동류에 대하여 진입id, 진출id 배정 : matching
        # node2inter = dict(zip(self.match7['node_id'], self.match7['inter_no']))
        dires_right = ['북', '서', '남', '동', '북'] # ex (북, 서), (서, 남) 등은 우회전 flow
        self.matching = []
        for node_id in parent_ids:
            inter_no = self.node2inter[node_id]
            # 좌회전과 직진(1 ~ 16)
            for (inc_dir, out_dir) in pflow[node_id]:
                move_no = self.nema[(self.nema.inc_dir==inc_dir) & (self.nema.out_dir==out_dir)].move_no.iloc[0]
                inc_edge = inc2id[(node_id, inc_dir)]
                out_edge = out2id[(node_id, out_dir)]
                new_row = pd.DataFrame({'inter_no':[inter_no], 'move_no':[move_no],
                                        'inc_dir':[inc_dir], 'out_dir':[out_dir],
                                        'inc_edge':[inc_edge], 'out_edge':[out_edge], 'node_id':[node_id]})
                self.matching.append(new_row)
            # 보행신호(17), 전적색(18)
            new_row = pd.DataFrame({'inter_no':[inter_no] * 2, 'move_no':[17, 18],
                                    'inc_dir':[None]*2, 'out_dir':[None]*2,
                                    'inc_edge':[None]*2, 'out_edge':[None]*2, 'node_id':[node_id]*2})
            self.matching.append(new_row)
            # 신호우회전(21)
            for d in range(len(dires_right)-1):
                inc_dir = dires_right[d]
                out_dir = dires_right[d+1]
                if {inc_dir, out_dir}.issubset(pdires[node_id]):
                    inc_edge = inc2id[(node_id, inc_dir)]
                    out_edge = out2id[(node_id, out_dir)]
                    new_row = pd.DataFrame({'inter_no':[inter_no], 'move_no':[21],
                                            'inc_dir':[inc_dir], 'out_dir':[out_dir],
                                            'inc_edge':[inc_edge], 'out_edge':[out_edge], 'node_id':[node_id]})
                    self.matching.append(new_row)
        self.matching.append(self.match7[self.match7.node_id.isin(child_ids)])
        self.matching = pd.concat(self.matching)
        self.matching = self.matching.dropna().sort_values(by=['inter_no', 'node_id', 'move_no']).reset_index(drop=True)
        self.matching['move_no'] = self.matching['move_no'].astype(int)
        self.matching.to_csv(os.path.join(self.path_root, 'Intermediates', 'matching.csv'))

    # 2-2
    def get_movements(self):
        movements_path = os.path.join(self.path_root, 'Intermediates', 'movement')
        movements_list = [pd.read_csv(os.path.join(movements_path, file), index_col=0) for file in tqdm(os.listdir(movements_path), desc='이동류정보 불러오는 중 : movements')]
        movements = pd.concat(movements_list)
        movements = movements.drop(columns=['start_unix'])
        movements = movements.drop_duplicates()
        movements = movements.sort_values(by=['inter_no', 'phas_A', 'phas_B'])
        movements = movements.reset_index(drop=True)
        movements.to_csv(os.path.join(self.path_root, 'Intermediates', 'movements.csv'))
        return movements

    # 2-3 node2num_cycles : A dictionary that maps a node_id to the number of cycles
    def get_node2num_cycles(self):
        # node2inter = dict(zip(inter_node['node_id'], inter_node['inter_no']))
        self.node_ids = sorted(self.inter_node.node_id.unique())

        Aplan = self.plan.copy()[['inter_no'] + [f'dura_A{j}' for j in range(1,9)] + ['cycle']]
        grouped = Aplan.groupby('inter_no')
        df = grouped.agg({'cycle': 'min'}).reset_index()
        df = df.rename(columns={'cycle': 'min_cycle'})
        df['num_cycle'] = 300 // df['min_cycle'] + 2
        inter2num_cycles = dict(zip(df['inter_no'], df['num_cycle']))
        node2numcycles = {node_id : inter2num_cycles[self.node2inter[node_id]] for node_id in self.node_ids}
        with open(os.path.join('Intermediates','node2numcycles.json'), 'w') as file:
            json.dump(node2numcycles, file, indent=4)
        return node2numcycles
    
    # 3. 이슈사항 저장
    def write_issues(self):
        path_issues = os.path.join(self.path_root, "Results", "issues_intermediates.txt")
        with open(path_issues, "w", encoding="utf-8") as file:
            for item in self.issues:
                file.write(item + "\n")
        if self.issues:
            print("데이터 처리 중 발생한 특이사항은 다음과 같습니다. :")
            for review in self.issues:
                print(review)

    def main(self):
        # 1. 데이터 불러오기
        self.load_data()
        # 2. 중간산출물 만들기
        self.get_intermediates()
        # 3. 이슈사항 저장
        self.write_issues()

 if __name__ == '__main__':
    self = DailyPreprocessor()
    self.main()
--- a/Archives/Scripts/scheduler_example.py
+++ b/Archives/Scripts/scheduler_example.py
@ -1,20 +0,0 @@
 import sched
 import time
 from datetime import datetime

 # 스케줄러 객체 생성
 scheduler = sched.scheduler(time.time, time.sleep)

 def print_current_time(sc): 
    print("Current Time:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    # 다음 실행을 위해 5초 후에 이 작업을 다시 예약
    sc.enter(5, 1, print_current_time, (sc,))

 if __name__ == "__main__":
    # 현재 초(second)를 기준으로 다음 5초 배수 시각까지의 지연 시간 계산
    now = datetime.now()
    initial_delay = 5 - (now.second % 5)
    # 초기 작업 예약
    scheduler.enter(initial_delay, 1, print_current_time, (scheduler,))
    # 스케줄러 실행
    scheduler.run()