# ********Pandas Merge********: import pandas as dp import numpy as np # merge by index named 'key'(may be used in database) df1 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) df2 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) result = pd.merge(df1, df2, on = 'key') # we have to make sure these two frames contain the same index named 'key' # consider two keys df1 = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) df2 = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], 'key2': ['K0', 'K0', 'K0', 'K0'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) # default join = 'inner' result = pd.merge(df1, df2, on = ['key1', 'key2']) result2 = pd.merge(df1, df2, on = ['key1', 'key2'], how = 'outer') # how = {'left', 'right', 'outer', 'inner'} # consider indicator(detail of merge) df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']}) df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]}) result = pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = True) result1 = pd.merge(df1, df2, on = 'col1', how = 'outer', indicator = 'indicator_column') # rename 'indicator' # merged by index df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index = ['K0', 'K1', 'K2']) df2 = pd.DataFrame({'C': ['C0', 'C2', 'C3'], 'D': ['D0', 'D2', 'D3']}, index = ['K0', 'K2', 'K3']) result = pd.merge(df1, df2, left_index = True, right_index = True, how = 'outer') result1 = pd.merge(df1, df2, left_index = True, right_index = True, how = 'outer') # handle overlapping df1 = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]}) df2 = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]}) result = pd.merge(df1, df2, on = 'k', suffixes = ['_boy', '_girl'], how = 'inner')
|