Facebook
From Cute Tapir, 3 Years ago, written in Python.
Embed
Download Paste or View Raw
Hits: 80
  1. import numpy as np
  2. from scipy.special import expit
  3. import sys
  4. import struct
  5. import os
  6.  
  7.  
  8. class NeuralNetMLP(object):
  9.     def __init__(self, n_output, n_features, n_hidden=30
  10.                  , l1=0.0, l2=0.0, epochs=500, eta=0.001,
  11.                  alpha=0.0, decrease_const=0.0, shuffle=True,
  12.                  minibatches=1, random_state=None):
  13.         np.random.seed(random_state)
  14.         self.n_output = n_output
  15.         self.n_features = n_features
  16.         self.n_hidden = n_hidden
  17.         self.w1, self.w2, = self._initialize_weights()
  18.         self.l1 = l1
  19.         self.l2 = l2
  20.         self.epochs = epochs
  21.         self.eta = eta
  22.         self.alpha = alpha
  23.         self.decrease_const = decrease_const
  24.         self.shuffle = shuffle
  25.         self.minibatches = minibatches
  26.  
  27.     def _encode_labels(selfself, y, k):
  28.         onehot = np.zeros((k, y.shape[0]))
  29.         for idx, val in enumerate(y):
  30.             onehot[val, idx] = 1.0
  31.         return onehot
  32.  
  33.     def _initialize_weights(self):
  34.         w1 = np.random.uniform(-1.0, 1.0,
  35.                                size=self.n_hidden * (self.n_features + 1))
  36.         w1 = w1.reshape(self.n_hidden, self.n_features + 1)
  37.         w2 = np.random.uniform(-1.0, 1.0,
  38.                                size=self.n_output * (self.n_hidden + 1))
  39.         w2 = w2.reshape(self.n_output, self.n_hidden + 1)
  40.         return w1, w2
  41.  
  42.     def _sigmoid(self, z):
  43.         return expit(z)
  44.  
  45.     def _sigmoid_gradient(self, z):
  46.         sg = self._sigmoid(z)
  47.         return sg * (1 - sg)
  48.  
  49.     def _add_bias_unit(self, x, how='column'):
  50.         if how == 'column':
  51.             x_new = np.ones((x.shape[0], x.shape[1]+1))
  52.             x_new[:, 1:] = x
  53.         elif how == 'row':
  54.             x_new = np.ones((x.shape[0] + 1, x.shape[1]))
  55.             x_new[1:, :] = x
  56.         else:
  57.             raise AttributeError('Atrybut how musi miec wartosc column lub row')
  58.         return x_new
  59.  
  60.     def _feedforward(self, X, w1, w2):
  61.         a1 = self._add_bias_unit(X, how='column')
  62.         z2 = w1.dot(a1.T)
  63.         a2 = self._sigmoid(z2)
  64.         a2 = self._add_bias_unit(a2, how='row')
  65.         z3 = w2.dot(a2)
  66.         a3 = self._sigmoid(z3)
  67.         return a1, z2, a2, z3, a3
  68.  
  69.     def _L2_reg(self, lambda_, w1, w2):
  70.         return (lambda_ / 2.0) * (np.sum(w1[:, 1:] ** 2) + np.sum(w2[:, 1:]))
  71.  
  72.     def _L1_reg(self, lambda_, w1, w2):
  73.         return (lambda_ / 2.0) * (np.abs(w1[:, 1:].sum() ** 2) + np.abs(w2[:, 1:]).sum())
  74.  
  75.     def _get_cost(self, y_enc, output, w1, w2):
  76.         term1 = -y_enc * (np.log(output))
  77.         term2 = (1-y_enc) * np.log(1-output)
  78.         cost = np.sum(term1-term2)
  79.         L1_term = self._L1_reg(self.l1, w1, w2)
  80.         L2_term = self._L2_reg(self.l2, w1, w2)
  81.         cost = cost + L1_term + L2_term
  82.         return cost
  83.  
  84.     def _get_gradient(self, a1, a2, a3, z2, y_enc, w1, w2):
  85.         sigma3 = a3 - y_enc
  86.         z2 = self._add_bias_unit(z2, how='row')
  87.         sigma2 = w2.T.dot(sigma3) * self._sigmoid_gradient(z2)
  88.         sigma2 = sigma2[1:, :]
  89.         grad1 = sigma2.dot(a1)
  90.         grad2 = sigma3.dot(a2.T)
  91.  
  92.         grad1[:, 1:] += self.l2 * w1[:, 1:]
  93.         grad1[:, 1:] += self.l1 * np.sign(w1[:, 1:])
  94.         grad2[:, 1:] += self.l2 * w2[:, 1:]
  95.         grad2[:, 1:] += self.l1 * np.sign(w2[:, 1:])
  96.  
  97.         return  grad1, grad2
  98.  
  99.     def predict(self, X):
  100.         a1, z2, a2, z3, a3 = self._feedforward(X, self.w1, self.w2)
  101.         y_pred = np.argmax(z3, axis=0)
  102.         return y_pred
  103.  
  104.     def fit(self, X, y, print_progress = False):
  105.         self.cost_ = []
  106.         X_data, y_data = X.copy(), y.copy()
  107.         y_enc = self._encode_labels(y, self.n_output)
  108.  
  109.         delta_w1_prev = np.zeros(self.w1.shape)
  110.         delta_w2_prev = np.zeros(self.w2.shape)
  111.  
  112.         for i in range(self.epochs):
  113.  
  114.             self.eta /= (1+self.decrease_const*i)
  115.  
  116.             if print_progress:
  117.                 sys.stderr.write('\rEpoka: %d/%d' % (i+1, self.epochs))
  118.                 sys.stderr.flush()
  119.  
  120.             if self.shuffle:
  121.                 idx = np.random.permutation(y_data.shape[0])
  122.                 X_data, y_enc = X_data[idx], y_enc[:,idx]
  123.  
  124.             mini = np.array_split(range(y_data.shape[0]), self.minibatches)
  125.             for idx in mini:
  126.                 a1, z2, a2, z3, a3 = self._feedforward(X_data[idx], self.w1, self.w2)
  127.                 cost = self._get_cost(y_enc=y_enc[:, idx],
  128.                                       output=a3,
  129.                                       w1 = self.w1,
  130.                                       w2 = self.w2)
  131.                 self.cost_.append(cost)
  132.  
  133.                 grad1, grad2 = self._get_gradient(a1=a1, a2=a2, a3=a3,z2=z2, y_enc=y_enc[:, idx], w1=self.w1, w2=self.w2)
  134.  
  135.                 delta_w1, delta_w2 = self.eta * grad1, self.eta*grad2
  136.                 self.w1 -=(delta_w1+(self.alpha * delta_w1_prev))
  137.                 self.w2 -= (delta_w2 + (self.alpha * delta_w2_prev))
  138.                 delta_w1_prev, delta_w2_prev = delta_w1, delta_w2
  139.  
  140.         return self
  141.  
  142. def load_mnist(path, kind='train'):
  143.     labels_path = os.path.join(path, '%s-labels.idx1-ubyte' % kind)
  144.     images_path = os.path.join(path, '%s-images.idx3-ubyte' % kind)
  145.  
  146.     with open(labels_path, 'rb') as lbpath:
  147.         magic, n = struct.unpack('>II', lbpath.read(8))
  148.         labels = np.fromfile(lbpath, dtype=np.uint8)
  149.  
  150.     with open(images_path, 'rb') as imgpath:
  151.         magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
  152.         images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
  153.  
  154.     return images, labels
  155.  
  156.  
  157. if __name__ == "__main__":
  158.  
  159.     X_train, y_train = load_mnist(r'C:\Users\domin\Desktop\Python\mnist', kind='train')
  160.     X_test, y_test =  load_mnist(r'C:\Users\domin\Desktop\Python\mnist', kind='t10k')
  161.  
  162.  
  163.     nn = NeuralNetMLP(n_output=10,
  164.                       n_features=X_train.shape[1],
  165.                       n_hidden=50,
  166.                       l2=0.1,
  167.                       l1=0.0,
  168.                       epochs=1000,
  169.                       eta=0.001,
  170.                       alpha=0.001,
  171.                       decrease_const=0.00001,
  172.                       shuffle=True,
  173.                       minibatches=50,
  174.                       random_state=1)
  175.     nn.fit(X_train, y_train, print_progress=True)
  176.  
  177.  
  178.  
  179.