#%%第一周作业
import pandas as pd
import os
os.chdir(r"E:\anaconda\learning\homework\week_one")
accounts=pd.read_csv("accounts.csv",encoding="gbk")
print(accounts.columns)
#account_id'帐户号(主键), 'district_id'开户行地区ID, 'frequency'结算频度,'date'开户日期
card=pd.read_csv("card.csv",encoding="gbk")
print(card.columns)
#'card_id'信用卡ID(主健), 'disp_id'帐记权限ID, 'issued'发卡日期, 'type'卡类型
clients=pd.read_csv("clients.csv",encoding="gbk")
print(clients.columns)
#'client_id'客户ID, 'sex'性别, 'birth_date'出生日期, 'district_id'开户地区ID
disp=pd.read_csv("disp.csv",encoding="gbk")
print(disp.columns)
#'disp_id'权限ID主健, 'client_id'客户ID, 'account_id'帐户ID, 'type'权限类型
district=pd.read_csv("district.csv",encoding="gbk")
print(district.columns)
#'A1'=district_id 地区ID主健, 'GDP'GDP总量, 'A4'居住人口, 'A10'城镇人口比例, 'A11'平均工资,
#'A12'95年失业率, 'A13'96年失业率, 'A14'10000人中有多少企业家, 'A15'95年犯罪率(千人),
#'a16'96年犯罪率(千人)
loans=pd.read_csv("loans.csv",encoding="gbk")
print(loans.columns)
#'loan_id'贷款ID主键, 'account_id'帐号ID, 'date'发放贷款日期, 'amount'贷款金额,
#'duration'还款期限, 'payments'每月归还金额,'status'状态
order=pd.read_csv("order.csv",encoding="gbk")
print(order.columns)
#'order_id'订单ID, 'account_id'发起订单的帐户ID, 'bank_to'收款银行, 'account_to'收款客户帐号,
#'amount'收款额,'k_symbol'支付方式
trans=pd.read_csv("trans.csv",encoding="gbk")
print(trans.columns)
#'trans_id'交易ID主健, 'account_id'发起交易的帐户ID, 'date'交易日期, 'type'借贷类型,
#'operation'交易类型, 'amount'金额,'balance'帐户余额, 'k_symbol交易特征', 'bank'对方银行, 'account'对方帐户号
#%%
import sqlite3
con=sqlite3.connect(':memory:') #CON相当于注册在内存里的一个数据库
accounts.to_sql('accounts',con) #将数据框accounts放到con里,
card.to_sql('card',con)
clients.to_sql('clients',con)
disp.to_sql('disp',con)
district.to_sql('district',con)
loans.to_sql('loans',con)
order.to_sql('order',con)
trans.to_sql('trans',con)
#%%不同类型卡的持卡人的性别对比
sex_card1=pd.read_sql_query("select * from clients as a left join disp as b on a.client_id=b.client_id ",con)
sex_card1=sex_card1.drop(['index','client_id','type'],axis=1)
sex_card1.to_sql('sex_card1',con)
sex_card2=pd.read_sql_query("select * from sex_card1 as a left join card as b on a.disp_id=b.disp_id",con)
sex_card2=sex_card2.drop(['index','disp_id'],axis=1)
from pylab import mpl
mpl.rcParams['font.sans-serif']=['SimHei']
mpl.rcParams['axes.unicode_minus']=False
t=pd.crosstab(sex_card2.sex,sex_card2.type)
t["sum1"]=t.sum(1)
t = t.div(t.sum(1),axis = 0)
t[['普通卡','金卡','青年卡']].plot(kind = 'bar',stacked= True)
#%%不同类型卡的持卡人在办卡时的平均年龄对比
import datetime as dt
now_year=dt.datetime.today().year
sex_card2['birth_year']=pd.to_datetime(sex_card2['birth_date'])
sex_card2['age']=now_year-sex_card2.birth_year.dt.year
sex_card2.age.hist(bins=30) #非右偏,故用均值
sex_card2.age.groupby(sex_card2.type).mean().plot(kind="bar")