Python pandas 合并两个或多个DataFrame的方法代码

Python 中使用 pandas 处理数据时,合并两个或多个 DataFrame 是常见的操作。本文主要介绍Python pandas中,通过pd.concat或merge或append合并DataFrame的方法代码。

示例代码

import pandas as pd
df1 = pd.DataFrame({'depth': [0.500000, 0.600000, 1.300000],
'VAR1': [38.196202, 38.198002, 38.200001],
'profile': ['profile_1', 'profile_1','profile_1']})
df2 = pd.DataFrame({'depth': [0.600000, 1.100000, 1.200000],
'VAR2': [0.20440, 0.20442, 0.20446],
'profile': ['profile_1', 'profile_1','profile_1']})
df3 = pd.DataFrame({'depth': [1.200000, 1.300000, 1.400000],
'VAR3': [15.1880, 15.1820, 15.1820],
'profile': ['profile_1', 'profile_1','profile_1']})

要实现输出结果

name_profile depth VAR1 VAR2 VAR3
profile_1 0.500000 38.196202 NaN NaN
profile_1 0.600000 38.198002 0.20440 NaN
profile_1 1.100000 NaN 0.20442 NaN
profile_1 1.200000 NaN 0.20446 15.1880
profile_1 1.300000 38.200001 NaN 15.1820
profile_1 1.400000 NaN NaN 15.1820

1、使用concat合并

import pandas as pd
df1 = pd.DataFrame({'depth': [0.500000, 0.600000, 1.300000],
       'VAR1': [38.196202, 38.198002, 38.200001],
       'profile': ['profile_1', 'profile_1','profile_1']})
df2 = pd.DataFrame({'depth': [0.600000, 1.100000, 1.200000],
       'VAR2': [0.20440, 0.20442, 0.20446],
       'profile': ['profile_1', 'profile_1','profile_1']})
df3 = pd.DataFrame({'depth': [1.200000, 1.300000, 1.400000],
       'VAR3': [15.1880, 15.1820, 15.1820],
       'profile': ['profile_1', 'profile_1','profile_1']})
       
dfs = [df.set_index(['profile', 'depth']) for df in [df1, df2, df3]]
print(pd.concat(dfs, axis=1).reset_index())
#      profile  depth       VAR1     VAR2    VAR3
# 0  profile_1    0.5  38.198002      NaN     NaN
# 1  profile_1    0.6  38.198002  0.20440     NaN
# 2  profile_1    1.1        NaN  0.20442     NaN
# 3  profile_1    1.2        NaN  0.20446  15.188
# 4  profile_1    1.3  38.200001      NaN  15.182
# 5  profile_1    1.4        NaN      NaN  15.182

2、使用merge合并

import pandas as pd
df1 = pd.DataFrame({'depth': [0.500000, 0.600000, 1.300000],
       'VAR1': [38.196202, 38.198002, 38.200001],
       'profile': ['profile_1', 'profile_1','profile_1']})
df2 = pd.DataFrame({'depth': [0.600000, 1.100000, 1.200000],
       'VAR2': [0.20440, 0.20442, 0.20446],
       'profile': ['profile_1', 'profile_1','profile_1']})
df3 = pd.DataFrame({'depth': [1.200000, 1.300000, 1.400000],
       'VAR3': [15.1880, 15.1820, 15.1820],
       'profile': ['profile_1', 'profile_1','profile_1']})
       
from functools import partial, reduce
dfs = [df1,df2,df3]
df_final = pd.DataFrame(columns=df1.columns)
for df in dfs:
    df_final = df_final.merge(df, on=['depth','profile'], how='outer')
print(df_final)
#    depth       VAR1    profile     VAR2    VAR3
#0    0.6  38.198002  profile_1  0.20440     NaN
#1    0.6  38.198002  profile_1  0.20440     NaN
#2    1.3  38.200001  profile_1      NaN  15.182
#3    1.1        NaN  profile_1  0.20442     NaN
#4    1.2        NaN  profile_1  0.20446  15.188
#5    1.4        NaN  profile_1      NaN  15.182

3、使用append合并

import pandas as pd
df1 = pd.DataFrame({'depth': [0.500000, 0.600000, 1.300000],
       'VAR1': [38.196202, 38.198002, 38.200001],
       'profile': ['profile_1', 'profile_1','profile_1']})
df2 = pd.DataFrame({'depth': [0.600000, 1.100000, 1.200000],
       'VAR2': [0.20440, 0.20442, 0.20446],
       'profile': ['profile_1', 'profile_1','profile_1']})
df3 = pd.DataFrame({'depth': [1.200000, 1.300000, 1.400000],
       'VAR3': [15.1880, 15.1820, 15.1820],
       'profile': ['profile_1', 'profile_1','profile_1']})
       
df1.append(df2).append(df3).sort_values('depth')
#        VAR1     VAR2    VAR3  depth    profile
#0  38.196202      NaN     NaN    0.5  profile_1
#1  38.198002      NaN     NaN    0.6  profile_1
#0        NaN  0.20440     NaN    0.6  profile_1
#1        NaN  0.20442     NaN    1.1  profile_1
#2        NaN  0.20446     NaN    1.2  profile_1
#0        NaN      NaN  15.188    1.2  profile_1
#2  38.200001      NaN     NaN    1.3  profile_1
#1        NaN      NaN  15.182    1.3  profile_1
#2        NaN      NaN  15.182    1.4  profile_1
推荐阅读
cjavapy编程之路首页