import math
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn import metrics
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# Zad 1
print("Zad 1")
boston_dataset = load_boston()
data_frame = pd.DataFrame(data=boston_dataset['data'], columns=boston_dataset['feature_names'])
data_frame['MEDV'] = pd.Series(boston_dataset['target'])
print(data_frame.head(10))
print(data_frame.tail(10))
# Zad 2
print("Zad 2")
data_frame.info()
# a - 506
# b - float64
# c - nie
# Zad 3
print("Zad 3")
describe = data_frame.describe()
print(describe)
# a - sredni=3.593761 std=8.596783
# b - max=50.000000 min=5.000000
# c - 12.653063
# Zad 4
print("Zad 4")
sns.distplot(data_frame.MEDV)
plt.show()
# Zad 5
print("Zad 5")
corr_matrix = data_frame.corr().round(2)
sns.heatmap(corr_matrix, annot=True)
# a - RM, ZN, B
# b - LSTAT
# c - TAX do RAD oraz same dla siebie - 1
sns.lmplot('MEDV', 'RM', data=corr_matrix)
sns.lmplot('MEDV', 'LSTAT', data=corr_matrix)
plt.show()
# Zad 6
print("Zad 6")
x = data_frame[['RM', 'B', 'ZN']]
y = data_frame[['MEDV']]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2)
# Zad 7
print("Zad 7")
lin = LinearRegression()
lin.fit(X_train, Y_train)
Y_pred = lin.predict(X_test)
Y_pred_ = lin.predict(X_train)
plt.scatter(Y_test, Y_pred)
plt.title('testowy')
plt.show()
plt.title('treningowy')
plt.scatter(Y_train, Y_pred_)
plt.show()
# Zad 8
print("Zad 8")
print('treningowy')
print('RMSE: {}'.format(math.sqrt(metrics.mean_squared_error(Y_train, Y_pred_))))
print('MAE: {}'.format(metrics.mean_absolute_error(Y_train, Y_pred_)))
print('testowy')
print('RMSE: {}'.format(math.sqrt(metrics.mean_squared_error(Y_test, Y_pred))))
print('MAE: {}'.format(metrics.mean_absolute_error(Y_test, Y_pred)))
{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}