# Import Packages
import pandas as pd
import numpy as np
import seaborn as sns
# dataset upload
df = sns.load_dataset("titanic")
df
1. index ๋ฐ columns ๋ค๋ฃจ๊ธฐ
Q. 'survived' ์ปฌ๋ผ์ index๋ก ๋ง๋ค์ด ํ์ธํ๊ณ , ๋ค์ 'survived' ์ปฌ๋ผ์ ๋๋ ค๋์ ๋ค ์ธ๋ฑ์ค๋ฅผ ์ด๊ธฐํ์ํค์ธ์.
df.set_index('survived', inplace=True)
temp = df.index
df.reset_index(drop=True, inplace=True)
df['survived'] = temp
Q. DataFrame df์ ์ปฌ๋ผ๋ช
์ ๋ค์๊ณผ ๊ฐ์ด ๋ณ๊ฒฝํ์ธ์
plcass : passenger_class | sex : gender | age : old
df.rename(columns={'pclass':'passenger_class', 'sex':'gender', 'age':'old'}, inplace=True)
2. goupby
Q. pclass ๊ฐ์ ๋ฐ๋ฅธ fare ๊ฐ์ ํ๊ท , ๋ถ์ฐ, ์ต๋, ์ต์ ๊ฐ์ ๊ตฌํ์ธ์.
df.groupby('passenger_class').fare.describe()
Q. pclass ๊ฐ๊ณผ sex ๊ฐ์ ๋ฐ๋ฅธ age ์ ํ๊ท ์ ๊ตฌํ์ธ์.
df.groupby(['passenger_class', 'gender']).old.mean() # ์์์ age๋ฅผ old๋ก ๋ณ๊ฒฝ
3. apply
Q. 'class'์ ์นดํ
๊ณ ๋ฆฌ๋ฅผ apply ํจ์๋ฅผ ์ด์ฉํ์ฌ, ๋ค์๊ณผ ๊ฐ์ด ๋ณ๊ฒฝํ์ธ์.
First : 01 | Second : 02 | Third : 03
def change(value):
if value == 'First':
return '01'
elif value =='Second':
return '02'
else :
return '03'
df['class'] = df['class'].apply(change)
Q. 'embark_town' ์ปฌ๋ผ์ 'Southampton'๋ฅผ apply ํจ์๋ฅผ ์ด์ฉํ์ฌ, 'Manchester'๋ก ๋ณ๊ฒฝํ์ธ์.
def change1(value):
if value == 'Southampton':
return 'Manchester'
else :
return value
df['embark_town'] = df['embark_town'].apply(change1)
4. concat ๋ฐ merge (๋ฐ์ดํฐ ํฉ์น๊ธฐ)
Q. ์๋์ DataFrame df_2์ df๋ฅผ ํ๋์ ํ๋ ์์ผ๋ก ํฉ์น์ธ์.
# df_2 ์ ๊ณต
data = {'col_1':100, 'col_2':200}
df_2 = pd.DataFrame(data, index=np.arange(891))
df_2
pd.concat([df,df_2], axis=1)
Q. ์๋์ DataFrame df_3์ df๋ฅผ ํ๋์ ํ๋ ์์ผ๋ก ํฉ์น์ธ์.
# df_3 ์ ๊ณต
data = [[2, 12.0, 'S'], [1, 45.0, 'Q'], [3, 33.0, 'S']]
df_3 = pd.DataFrame(data, columns=['passenger_class', 'old', 'embarked'])
df_3
df.merge(df_3, how='inner')
5. melt ๋ฐ pivot
Q. melt ํจ์๋ฅผ ์ฌ์ฉํ์ฌ DataFrame df๋ฅผ ์๋์ ๊ฐ์ tidy format์ผ๋ก ๋ฐ๊พธ์ธ์.
survived variable value
0 0 adult_male True
1 1 adult_male False
2 1 adult_male False
3 1 adult_male False
4 0 adult_male True
... ... ... ...
1777 0 alone True
1778 1 alone True
1779 0 alone False
1780 1 alone True
1781 0 alone True
df_tidy = df.melt(id_vars='survived', value_vars=['adult_male','alone'])
Q. pivot_table ํจ์๋ฅผ ์ฌ์ฉํ์ฌ, ์ ๋ฌธ์ ์ tidy format์ ์๋์ ๊ฐ์ wide format์ผ๋ก ๋ฐ๊พธ์ธ์.
variable adult_male alone
survived
0 0.817851 0.681239
1 0.257310 0.476608
df_wide = df_tidy.pivot_table(index='survived', columns='variable', values='value')
'๐ฟ Data > ์ด๋ชจ์ ๋ชจ' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
Feature Engineering_๊ฒฐ์ธก์น ์ฒ๋ฆฌ, apply ํจ์ ์ ์ฉ (0) | 2021.12.09 |
---|---|
Seaborn 'penguins' (0) | 2021.12.09 |
๋ฐ์ดํฐ ๋ค๋ฃจ๊ธฐ ์์1 (0) | 2021.12.07 |
Cramer's rule(ํฌ๋ ์ด๋จธ ์๊ฑฐ๋ฒ) (0) | 2021.12.07 |
๋ฒกํฐ ๋ด์ ๋ฐ projection (0) | 2021.12.07 |