Facebook
From mendu_v, 5 Years ago, written in Python.
This paste is a reply to Untitled from mendu_v - view diff
Embed
Download Paste or View Raw
Hits: 328
  1. %load_ext autoreload
  2. %autoreload 2
  3. %matplotlib inline
  4.  
  5. from fastai.imports import *
  6. from fastai.structured import *
  7. import csv
  8.  
  9. from pandas_summary import DataFrameSummary
  10. from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
  11. from IPython.display import display
  12.  
  13. from sklearn import metrics
  14.  
  15. df_raw = pd.read_csv('Bulldozer_train.csv', low_memory=False, parse_dates=["saledate"])        
  16.  
  17. df_raw.SalePrice = np.log(df_raw.SalePrice)
  18.  
  19. add_datepart(df_raw, 'YearBuilt')
  20. add_datepart(df_raw, 'YearRemodAdd')
  21.  
  22. train_cats(df_raw)
  23.  
  24. df, y, nas = proc_df(df_raw, 'SalePrice')
  25.  
  26. def split_vals(a,n): return a[:n].copy(), a[n:].copy()
  27.  
  28. n_valid = 140  # same as Kaggle's test set size
  29. n_trn = len(df)-n_valid
  30. raw_train, raw_valid = split_vals(df_raw, n_trn)
  31. X_train, X_valid = split_vals(df, n_trn)
  32. y_train, y_valid = split_vals(y, n_trn)
  33.  
  34. X_train.shape, y_train.shape, X_valid.shape
  35.  
  36. df_trn, y_trn, nas = proc_df(df_raw, 'SalePrice', subset=200, na_dict=nas)
  37. X_train, _ = split_vals(df_trn, 150)
  38. y_train, _ = split_vals(y_trn, 150)
  39.  
  40. m = RandomForestRegressor(n_estimators=40, min_samples_leaf=3, max_features=0.5, n_jobs=-1, oob_score=True)
  41. m.fit(X_train, y_train)
  42.  
  43. df_test = pd.read_csv('Housing_test.csv', low_memory=False, parse_dates=["YearBuilt", "YearRemodAdd"])
  44.  
  45. add_datepart(df_test, 'YearBuilt')
  46. add_datepart(df_test, 'YearRemodAdd')
  47.  
  48. train_cats(df_test)
  49.  
  50. df_1 = proc_df(df_test)
  51.  
  52. y_test= m.predict(df_1)
  53.