python基础4-xgboost

发表: 2018-07-03 浏览: 1736

Python

import xgboost as xgb
from xgboost import plot_importance
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from datetime import date,datetime
import numpy as np
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6 #设置画布

dir_path='E://DATA//'

order_data = pd.read_csv(dir_path+'2018629.csv',sep=',')
data_df = pd.read_csv(dir_path+'/0629//week_order1_0629.csv') # 加载数据集作为DataFrame对象
X = data_df[['qty_one','qty_two','cx1','cx2','dq_cx']].values
#X = data_df[['qty_one','qty_two','test']].values
y = data_df['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
params = {
    'booster': 'gbtree',
    'objective': 'multi:softmax',
    'num_class': 2,
    'gamma': 0.1,
    'max_depth': 6,
    'lambda': 2,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 3,
    'silent': 1,
    'eta': 0.1,
    'seed': 1000,
    'nthread': 4,
}
plst = params.items()

dtrain = xgb.DMatrix(X_train, y_train)
num_rounds = 500
model = xgb.train(plst, dtrain, num_rounds)

# 对测试集进行预测
dtest = xgb.DMatrix(X_test)
ans = model.predict(dtest)

# 计算准确率、正确率、召回率
tp=0

fp=0

tn=0

fn=0 

for i in range(len(y_test)):

    if ans[i]==1:

        if y_test[i]==1:

            tp += 1

        else:

            fp += 1

    else:

        if y_test[i]==0:

            tn += 1

        else:

            fn += 1

print("Precision: %.2f %% " %(100 * ( tp/(tp+fp))))

print("Recall: %.2f %% " %(100 * ( tp/(tp+fn))))

print("Accuracy: %.2f %% " %(100 * ( (tp+tn)/(tp+fn+tn+fp))))

Precision: 100.00 %
Recall: 85.71 %

Accuracy: 90.91 %

# 显示重要特征

plot_importance(model)

plt.show()



plt.rcParams['font.sans-serif']=['SimHei']

plt.plot(X[y==1][:,0],X[y==1][:,1],'g.')

plt.plot(X[y!=1][:,0],X[y!=1][:,1],'r.')

plt.axvline(x=40,ymin=0)

plt.legend(['>40', '<40'])

plt.grid(True)

plt.xlabel('维度')

plt.ylabel('销量')

#plt.axhline(y=40,xmin=0)

plt.show()



index_qty1 = X_train[:, 0] > 40

X_train1 = X_train[index_qty1,:]

X_train2 = X_train[~index_qty1,:]



plt.plot(X_train1[:,0], X_train1[:,1], 'g.',

         X_train2[:,0], X_train2[:,1], 'b*',

         X_test[:, 0], X_test[:, 1], 'rs')

plt.legend(['>40', '<40', 'test_data'])

plt.title('Distribution')

plt.grid(True)

plt.xlabel('axis1')

plt.ylabel('axis2')

plt.show()



index1 = ans > 0.5

res1 = X_test[index1, :]

res2 = X_test[~index1, :]



plt.plot(X_train1[:,0], X_train1[:,1], 'g.',

         X_train2[:,0], X_train2[:,1], 'b*',

         res1[:, 0], res1[:, 1], 'ro',

         res2[:, 0], res2[:, 1], 'ys'

        )

plt.legend(['>40', '<40','res1', 'res2'])

plt.title('predict_res')

plt.grid(True)

plt.xlabel('axis1')

plt.ylabel('axis2')

plt.show()

0 个评论

要回复文章请先登录或注册