I have the following variables:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
np.random.seed(0)
n = 15
x = np.linspace(0,10,n) + np.random.randn(n)/5
y = np.sin(x)+x/6 + np.random.randn(n)/10
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=0)
def part1_scatter():
%matplotlib notebook
plt.figure()
plt.scatter(X_train, y_train, label='training data')
plt.scatter(X_test, y_test, label='test data')
plt.legend(loc=4);
And the following question:
Write a function that fits a polynomial LinearRegression model on the training data X_train for degrees 1, 3, 6, and 9. (Use PolynomialFeatures in sklearn.preprocessing to create the polynomial features and then fit a linear regression model) For each model, find 100 predicted values over the interval x = 0 to 10 (e.g. np.linspace(0,10,100)) and store this in a numpy array. The first row of this array should correspond to the output from the model trained on degree 1, the second row degree 3, the third row degree 6, and the fourth row degree 9.
This is my code, but it don't work out:
def answer_one():
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(0)
n = 15
x = np.linspace(0,10,n) + np.random.randn(n)/5
y = np.sin(x)+x/6 + np.random.randn(n)/10
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=0)
results = []
pred_data = np.linspace(0,10,100)
degree = [1,3,6,9]
y_train1 = y_train.reshape(-1,1)
for i in degree:
poly = PolynomialFeatures(degree=i)
pred_poly1 = poly.fit_transform(pred_data[:,np.newaxis])
X_F1_poly = poly.fit_transform(X_train[:,np.newaxis])
linreg = LinearRegression().fit(X_F1_poly, y_train1)
pred = linreg.predict(pred_poly1)
results.append(pred)
dataArray = np.array(results).reshape(4, 100)
return dataArray
I receive this error:
line 58 for i
in degree: ^ IndentationError: unexpected
indent
Could you tell me where the problem is?