python基础4-xgboost

浏览: 1736
import xgboost as xgb
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from datetime import date,datetime
import numpy as np
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6 #设置画布

dir_path='E://DATA//'

order_data = pd.read_csv(dir_path+'2018629.csv',sep=',')
data_df = pd.read_csv(dir_path+'/0629//week_order1_0629.csv') # 加载数据集作为DataFrame对象
X = data_df[['qty_one','qty_two','cx1','cx2','dq_cx']].values
#X = data_df[['qty_one','qty_two','test']].values
y = data_df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
params = {
'booster': 'gbtree',
'objective': 'multi:softmax',
'num_class': 2,
'gamma': 0.1,
'max_depth': 6,
'lambda': 2,
'subsample': 0.7,
'colsample_bytree': 0.7,
'min_child_weight': 3,
'silent': 1,
'eta': 0.1,
'seed': 1000,
'nthread': 4,
}
plst = params.items()

dtrain = xgb.DMatrix(X_train, y_train)
num_rounds = 500
model = xgb.train(plst, dtrain, num_rounds)

# 对测试集进行预测
dtest = xgb.DMatrix(X_test)
ans = model.predict(dtest)

# 计算准确率、正确率、召回率
tp=0
fp=0
tn=0
fn=0
for i in range(len(y_test)):
if ans[i]==1:
if y_test[i]==1:
tp += 1
else:
fp += 1
else:
if y_test[i]==0:
tn += 1
else:
fn += 1
print("Precision: %.2f %% " %(100 * ( tp/(tp+fp))))
print("Recall: %.2f %% " %(100 * ( tp/(tp+fn))))
print("Accuracy: %.2f %% " %(100 * ( (tp+tn)/(tp+fn+tn+fp))))

Precision: 100.00 %
Recall: 85.71 %  

Accuracy: 90.91 %

# 显示重要特征
plot_importance(model)
plt.show()

plt.rcParams['font.sans-serif']=['SimHei']
plt.plot(X[y==1][:,0],X[y==1][:,1],'g.')
plt.plot(X[y!=1][:,0],X[y!=1][:,1],'r.')
plt.axvline(x=40,ymin=0)
plt.legend(['>40', '<40'])
plt.grid(True)
plt.xlabel('维度')
plt.ylabel('销量')
#plt.axhline(y=40,xmin=0)
plt.show()

index_qty1 = X_train[:, 0] > 40
X_train1 = X_train[index_qty1,:]
X_train2 = X_train[~index_qty1,:]

plt.plot(X_train1[:,0], X_train1[:,1], 'g.',
X_train2[:,0], X_train2[:,1], 'b*',
X_test[:, 0], X_test[:, 1], 'rs')
plt.legend(['>40', '<40', 'test_data'])
plt.title('Distribution')
plt.grid(True)
plt.xlabel('axis1')
plt.ylabel('axis2')
plt.show()

index1 = ans > 0.5
res1 = X_test[index1, :]
res2 = X_test[~index1, :]

plt.plot(X_train1[:,0], X_train1[:,1], 'g.',
X_train2[:,0], X_train2[:,1], 'b*',
res1[:, 0], res1[:, 1], 'ro',
res2[:, 0], res2[:, 1], 'ys'
)
plt.legend(['>40', '<40','res1', 'res2'])
plt.title('predict_res')
plt.grid(True)
plt.xlabel('axis1')
plt.ylabel('axis2')
plt.show()

推荐 1
本文由 liliwu 创作,采用 知识共享署名-相同方式共享 3.0 中国大陆许可协议 进行许可。
转载、引用前需联系作者,并署名作者且注明文章出处。
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。

0 个评论

要回复文章请先登录注册