%load_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.structured import *
import csv
from pandas_summary import DataFrameSummary
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from IPython.display import display
from sklearn import metrics
df_raw = pd.read_csv('Bulldozer_train.csv', low_memory=False, parse_dates=["saledate"])
df_raw.SalePrice = np.log(df_raw.SalePrice)
add_datepart(df_raw, 'YearBuilt')
add_datepart(df_raw, 'YearRemodAdd')
train_cats(df_raw)
df, y, nas = proc_df(df_raw, 'SalePrice')
def split_vals(a,n): return a[:n].copy(), a[n:].copy()
n_valid = 140 # same as Kaggle's test set size
n_trn = len(df)-n_valid
raw_train, raw_valid = split_vals(df_raw, n_trn)
X_train, X_valid = split_vals(df, n_trn)
y_train, y_valid = split_vals(y, n_trn)
X_train.shape, y_train.shape, X_valid.shape
df_trn, y_trn, nas = proc_df(df_raw, 'SalePrice', subset=200, na_dict=nas)
X_train, _ = split_vals(df_trn, 150)
y_train, _ = split_vals(y_trn, 150)
m = RandomForestRegressor(n_estimators=40, min_samples_leaf=3, max_features=0.5, n_jobs=-1, oob_score=True)
m.fit(X_train, y_train)
df_test = pd.read_csv('Housing_test.csv', low_memory=False, parse_dates=["YearBuilt", "YearRemodAdd"])
add_datepart(df_test, 'YearBuilt')
add_datepart(df_test, 'YearRemodAdd')
train_cats(df_test)
df_1 = proc_df(df_test)
y_test= m.predict(df_1)
{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}