In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings('ignore')
In [2]:
pima = pd.read_csv("diabetes.csv")
In [3]:
X = pima.drop(["Outcome"], axis = 1)
In [4]:
y = pima.Outcome
In [5]:
pima.head()
Out[5]:
In [7]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
In [8]:
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size,
random_state=seed)
Tuning num of estimators¶
In [9]:
model = XGBClassifier()
In [10]:
n_estimators = range(50,350,50)
In [12]:
list(n_estimators)
Out[12]:
In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
In [14]:
param_grid = dict(n_estimators = n_estimators)
In [17]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=8)
In [21]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [22]:
grid_result = grid_search.fit(X, y)
In [23]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning Max Depth¶
In [24]:
model = XGBClassifier()
In [27]:
max_depth = range(1,10,2)
In [28]:
list(max_depth)
Out[28]:
In [44]:
param_grid = dict(max_depth = max_depth)
In [45]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=8)
In [46]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [47]:
grid_result = grid_search.fit(X, y)
In [48]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning num of estimators and max_depth¶
In [50]:
n_estimators = [10,20,50,100]
max_depth = [1,3,6,9]
In [51]:
param_grid = dict(n_estimators = n_estimators, max_depth = max_depth)
In [52]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=8)
In [53]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [54]:
grid_result = grid_search.fit(X, y)
In [55]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning learning rate and num of trees¶
In [57]:
n_estimators = [100,200,300,500]
learning_rate = [0.1,0.01,0.001,1]
In [58]:
param_grid = dict(n_estimators = n_estimators, learning_rate = learning_rate)
In [59]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=8)
In [60]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [61]:
grid_result = grid_search.fit(X, y)
In [62]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning Row Subsampling¶
In [73]:
subsample = np.arange(0.1,1.1,0.1)
list(subsample)
Out[73]:
In [74]:
param_grid = dict(subsample = subsample)
In [75]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=8)
In [76]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [77]:
grid_result = grid_search.fit(X, y)
In [78]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning Column subsampling by tree¶
In [84]:
colsample_bytree = np.arange(0.4,1.1,0.1)
list(colsample_bytree)
Out[84]:
In [85]:
param_grid = dict(colsample_bytree = colsample_bytree)
In [86]:
kfold = StratifiedKFold(n_splits=10, shuffle = True, random_state = 8)
In [87]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [88]:
grid_result = grid_search.fit(X, y)
In [89]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
Tuning Column subsampling by split¶
In [90]:
colsample_bylevel = np.arange(0.3,1.1,0.1)
list(colsample_bylevel)
Out[90]:
In [91]:
param_grid = dict(colsample_bylevel = colsample_bylevel)
In [92]:
kfold = StratifiedKFold(n_splits=10, shuffle = True, random_state = 8)
In [93]:
grid_search = GridSearchCV(model, param_grid, scoring="accuracy", n_jobs=-1, cv=kfold)
In [94]:
grid_result = grid_search.fit(X, y)
In [95]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
In [ ]:
No comments :
Post a Comment