Use:
files = glob.glob('shelldemo/*.csv')
dfs = []
for fp in files:
#if multiple columns with no ()
#df = pd.read_csv(fp, index_col=['S.no','id','number'])
df = pd.read_csv(fp, index_col=['ID'])
df['file'] = os.path.basename(fp).split('.')[0]
df = df.set_index('file', append=True)
df.columns = df.columns.str.extract('((d+))', expand=False).astype(int)
dfs.append(df)
df1 = pd.concat(dfs, sort=False).reset_index()
print (df1)
ID file 58 67 89 91 96 100
0 1 file1 NaN 56 78.0 98.0 NaN 101.0
1 2 file1 NaN 91 100.0 121.0 NaN NaN
2 3 file2 102.0 103 NaN NaN 101.0 104.0
3 4 file2 113.0 117 NaN NaN 112.0 119.0
print (df2)
File Price1 Price2 Price3 Price4
0 File1 67 89 91 100
1 File2 96 58 105 99
df2.columns = df2.columns.str.lower()
df2['file'] = df2['file'].str.lower()
#merge data together by left join
df = df1.merge(df2, on='file', how='left')
print (df)
ID file 58 67 89 91 96 100 price1 price2 price3
0 1 file1 NaN 56 78.0 98.0 NaN 101.0 67 89 91
1 2 file1 NaN 91 100.0 121.0 NaN NaN 67 89 91
2 3 file2 102.0 103 NaN NaN 101.0 104.0 96 58 105
3 4 file2 113.0 117 NaN NaN 112.0 119.0 96 58 105
price4
0 100
1 100
2 99
3 99
#filter integers between ()
df1 = df.loc[:, df.columns.str.isnumeric().isnull()].copy()
#filter all columns with price
df2 = df.filter(regex='price').copy()
uniq_vals_df2 = df2.stack().dropna().drop_duplicates()
not_matched_vals = np.setdiff1d(uniq_vals_df2, df1.columns)
df1 = df1.join(pd.DataFrame(columns=not_matched_vals.tolist() + ['a']))
#replace columns by match values from df2
for c in df2.columns:
df2[c] = df1.lookup(df1.index, df2[c].fillna('a'))
#join to original DataFrame
df = df[['file','ID']].join(df2)
print (df)
file ID price1 price2 price3 price4
0 file1 1 56.0 78.0 98.0 101.0
1 file1 2 91.0 100.0 121.0 NaN
2 file2 3 101.0 102.0 NaN NaN
3 file2 4 112.0 113.0 NaN NaN
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…