1 Import data¶
In [32]:
dataset = pd.read_csv("datasets/Churn_Modelling.csv")
In [33]:
dataset.head()
Out[33]:
In [34]:
X = dataset.iloc[:,3:13]
y = dataset.iloc[:,13]
In [35]:
X.head()
Out[35]:
In [36]:
y.head()
Out[36]:
2 Encode categorical independent variables¶
In [37]:
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
label_encoder_geo = LabelEncoder()
X.iloc[:,1] = label_encoder_geo.fit_transform(X.iloc[:,1])
In [38]:
X.head()
Out[38]:
In [39]:
label_encoder_sex = LabelEncoder()
X.iloc[:,2] = label_encoder_sex.fit_transform(X.iloc[:,2])
In [40]:
X.head()
Out[40]:
In [41]:
onehotencoder = OneHotEncoder(categorical_features=[1])
X = onehotencoder.fit_transform(X).toarray()
In [42]:
X
Out[42]:
In [43]:
X[0]
Out[43]:
Let's delete one categorical variable to avoid dummy variable trap¶
In [44]:
X = X[:,1:]
In [45]:
from sklearn.model_selection import train_test_split
In [46]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 10)
Feature scaling is compulsory for ANN¶
In [47]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
In [48]:
X_test = sc.fit_transform(X_test)
Let's make ANN¶
In [49]:
import keras
In [50]:
from keras.models import Sequential
from keras.layers import Dense
Initialise ANN¶
In [51]:
classifier = Sequential()
Add the first hidden layer and input layer¶
In [54]:
classifier.add(Dense(output_dim = 6, init = 'uniform',activation = 'relu', input_dim = 11))
Adding one more hidden layer¶
In [56]:
classifier.add(Dense(output_dim = 6, init = 'uniform',activation = 'relu'))
Adding output layer¶
We can get probability if we use sigmoid activtion function in the output layerUse 'softmax' if its a multiclass
In [57]:
classifier.add(Dense(output_dim = 1, init = 'uniform',activation = 'sigmoid'))
Compiling ANN¶
adding stochastic gradient descent for calculating the weights. We use 'adam here
In [58]:
classifier.compile(optimizer='adam',loss ='binary_crossentropy', metrics = ['accuracy'])
Fitting the model¶
In [59]:
classifier.fit(X_train,y_train, batch_size=10, epochs=100)
Out[59]:
In [60]:
y_pred = classifier.predict(X_test)
In [61]:
y_pred[0:5]
Out[61]:
In [62]:
y_pred = y_pred >0.5
In [63]:
y_pred[0:5]
Out[63]:
Evaluate the results¶
In [64]:
from sklearn.metrics import confusion_matrix
In [65]:
confusion_matrix(y_test, y_pred)
Out[65]:
In [66]:
(1514+152)/2000
Out[66]:
In [ ]:
No comments :
Post a Comment