# coding: utf-8
'''
作业要求:
1、不同类型卡的持卡人的性别对比
2、不同类型卡的持卡人在办卡时的平均年龄对比
3、不同类型卡的持卡人在办卡前一年内的平均帐户余额对比
4、不同类型卡的持卡人在办卡前一年内的平均收入对比
'''
#########################################################
import os
import pandas as pd
os.chdir(r'D:\HW5')
loanfile = os.listdir()
createVar = locals()
for i in loanfile:
if i.endswith("csv"):
createVar[i.split('.')[0]] = pd.read_csv(i, encoding = 'gbk')
print(i.split('.')[0])
#########################################################
# In[]
#1、不同类型卡的持卡人的性别对比
data1=pd.merge(card,disp,on='disp_id')
data_con1=pd.merge(data1,clients,on='client_id')
data_con1=data_con1.rename(columns={'type_x':'type'})
gender_cross_raw=pd.crosstab(data_con1['type'],data_con1['sex'])
gender_cross=gender_cross_raw.div(gender_cross_raw.sum(1),axis=0)
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
gender_cross.plot(kind='bar',stacked=True)
#########################################################
# In[]
#2、不同类型卡的持卡人在办卡时的平均年龄对比
from datetime import datetime
data_con1['birth_date']=pd.to_datetime(data_con1['birth_date'])
data_con1['age']=datetime.today().year-data_con1['birth_date'].dt.year
import seaborn as sns
from pylab import mpl
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
sns.boxplot('type','age',data=data_con1)
#########################################################
# In[]
#3、不同类型卡的持卡人在办卡前一年内的平均帐户余额对比
data_con2=trans[['account_id','date','amount','balance']].merge(disp[['disp_id','account_id']],on='account_id')\
.merge(card[['disp_id','issued','type']],on='disp_id')
data_con2['balance']=data_con2['balance'].str.strip('$').str.replace(',','').astype('int')
data_con2['issued']=pd.to_datetime(data_con2['issued']).dt.date
data_con2['date']=pd.to_datetime(data_con2['date']).dt.date
data_con3=data_con2[(data_con2['issued']-data_con2['date']).dt.days<365]
#print(data_con3.head())
import seaborn as sns
from pylab import mpl
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
sns.boxplot('type','balance',data=data_con3)
#########################################################
# In[]
#4、不同类型卡的持卡人在办卡前一年内的平均收入对比
data_con3['amount']=data_con3['amount'].str.strip('$').str.replace(',','').astype('int')
#print(data_con3.sample())
import seaborn as sns
from pylab import mpl
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
sns.boxplot('type','amount',data=data_con3)