from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5, random_state = 0) X=df.iloc[:, :-1].values y=df.iloc[:, -1].values # adding extra column because of Multiple linear regression X_select = np.concatenate((X[:,9:10],X[:,10:11]), axis=1) X_select_square = np.square(X_select) X_select = X #X_square = np.square(X_select) #X_select = np.hstack((X_select, X_select_square)) X_t=np.append(arr=np.ones((X_select.shape[0],1)), values=X_select, axis=1) # splitting the dataset X_train, X_test, y_train, y_test = train_test_split(X_select, y, test_size=0.5, random_state=0) # scaling the dataset from sklearn.preprocessing import StandardScaler sc_X=StandardScaler() X_train=sc_X.fit_transform(X_train) X_test=sc_X.transform(X_test) #linear regression #from sklearn.linear_model import LinearRegression #regressor=LinearRegression() from sklearn.linear_model import LogisticRegression regressor=LogisticRegression() regressor.fit(X_train,y_train) #Prediction predictions=regressor.predict(X_test) for i, prediction in enumerate(predictions): print('Predicted: %s, Target: %s' % (prediction, y_test[i])) score = regressor.score(X_test, y_test) print('R-squared: %.2f' % regressor.score(X_test, y_test))