kNN¶
In [4]:
from sklearn.datasets import load_iris
In [5]:
iris = load_iris()
In [8]:
X = iris.data
In [10]:
y = iris.target
In [11]:
from sklearn.neighbors import KNeighborsClassifier
In [12]:
knn = KNeighborsClassifier(n_neighbors=1)
In [13]:
knn
Out[13]:
In [14]:
knn.fit(X,y)
Out[14]:
In [17]:
knn.predict([[1,2,3,4],[2,2,2,2]])
Out[17]:
In [20]:
from sklearn.model_selection import train_test_split
In [56]:
X_Train,X_test,y_train, y_test = train_test_split(X,y,test_size = 0.4, random_state = 4)
In [57]:
knn = KNeighborsClassifier(n_neighbors=1)
In [58]:
knn.fit(X_Train, y_train)
Out[58]:
In [59]:
y_predicted = knn.predict(X_test)
In [60]:
y_predicted
Out[60]:
In [61]:
from sklearn.metrics import accuracy_score
In [62]:
accuracy_score(y_test,y_predicted)
Out[62]:
In [69]:
k = np.arange(1,25)
In [70]:
k_result = []
for val in k:
knn = KNeighborsClassifier(n_neighbors=val)
knn.fit(X_Train,y_train)
y_predict = knn.predict(X_test)
k_result.append(accuracy_score(y_test,y_predict))
In [71]:
plt.plot(k,k_result)
Out[71]:
Linear Regression¶
In [73]:
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
data.head()
Out[73]:
In [77]:
sns.pairplot(data, x_vars=["TV","Radio","Newspaper"], y_vars="Sales", size=7, aspect=0.8)
Out[77]:
In [78]:
sns.pairplot(data, x_vars=["TV","Radio","Newspaper"], y_vars="Sales", size=7, aspect=0.8, kind="reg")
Out[78]:
In [79]:
X = data[["TV","Radio","Newspaper"]]
In [80]:
X.head()
Out[80]:
In [81]:
y = data["Sales"]
In [82]:
y.head()
Out[82]:
In [84]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
In [85]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 7, test_size = 0.3)
In [86]:
model = LinearRegression()
In [87]:
model.fit(X_train, y_train)
Out[87]:
In [89]:
predicted = model.predict(X_test)
In [91]:
np.sqrt(mean_squared_error(y_test, predicted))
Out[91]:
In [92]:
model.intercept_
Out[92]:
In [93]:
model.coef_
Out[93]:
kFold Cross Validation¶
In [94]:
data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0)
In [95]:
X = data[["TV","Radio","Newspaper"]]
y = data["Sales"]
In [96]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
In [112]:
model = LinearRegression()
kfold = KFold(n_splits=10)
score = cross_val_score(model,X,y,cv = kfold,scoring = "mean_squared_error")
In [113]:
score
Out[113]:
In [114]:
score.mean()
Out[114]:
In [115]:
score = -score.mean()
In [116]:
score = np.sqrt(score)
In [117]:
score
Out[117]:
No comments :
Post a Comment