Import the data¶
In [33]:
data = pd.read_csv("Data.csv")
In [34]:
data
Out[34]:
In [35]:
X = data.iloc[:,:-1]
In [36]:
y = data.iloc[:,-1]
In [37]:
X
Out[37]:
In [38]:
y
Out[38]:
Imputation for missing data¶
In [39]:
from sklearn.preprocessing import Imputer
In [40]:
imputer = Imputer(missing_values="NaN", strategy= "mean", axis=0)
In [41]:
imputer = imputer.fit(X.iloc[:,1:3])
In [42]:
X.iloc[:,1:3] = imputer.transform(X.iloc[:,1:3])
In [43]:
X
Out[43]:
Ecoding categorical data¶
In [44]:
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
In [45]:
labelencoder = LabelEncoder()
In [46]:
X.iloc[:,0] = labelencoder.fit_transform(X.iloc[:,0])
In [47]:
X
Out[47]:
In [48]:
onehotencoder = OneHotEncoder(categorical_features=[0])
In [49]:
X = onehotencoder.fit_transform(X).toarray()
In [50]:
X
Out[50]:
In [51]:
labelencoder_y = LabelEncoder()
In [52]:
y = labelencoder_y.fit_transform(y)
In [53]:
y
Out[53]:
Train test split¶
In [54]:
from sklearn.model_selection import train_test_split
In [57]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)
Feature Scaling¶
In [58]:
from sklearn.preprocessing import StandardScaler
In [59]:
sc_X = StandardScaler()
In [60]:
X_train = sc_X.fit_transform(X_train)
In [61]:
X_test = sc_X.transform(X_test)
In [62]:
X_train
Out[62]:
No comments :
Post a Comment