|
| 1 | +import numpy as np |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +from scipy.optimize import minimize |
| 4 | +import multiprocessing as mp |
| 5 | +import pandas as pd |
| 6 | +import scipy.io |
| 7 | + |
| 8 | +def displayData(X): |
| 9 | + """ |
| 10 | + Displays 2D data stored in design matrix in a nice grid. |
| 11 | + """ |
| 12 | + fig, ax = plt.subplots(10,10,sharex=True,sharey=True) |
| 13 | + img_num = 0 |
| 14 | + for i in range(10): |
| 15 | + for j in range(10): |
| 16 | + # Convert column vector into 20x20 pixel matrix |
| 17 | + # You have to transpose to have them display correctly |
| 18 | + img = X[img_num,:].reshape(20,20).T |
| 19 | + ax[i][j].imshow(img,cmap='gray') |
| 20 | + img_num += 1 |
| 21 | + |
| 22 | + return (fig, ax) |
| 23 | + |
| 24 | +def displayImage(im): |
| 25 | + """ |
| 26 | + Displays a single image stored as a column vector |
| 27 | + """ |
| 28 | + fig2, ax2 = plt.subplots() |
| 29 | + image = im.reshape(20,20).T |
| 30 | + ax2.imshow(image,cmap='gray') |
| 31 | + |
| 32 | + return (fig2, ax2) |
| 33 | + |
| 34 | +def sigmoid(z): |
| 35 | + """ Returns the value from using x in the sigmoid function """ |
| 36 | + |
| 37 | + return 1.0/(1 + np.e**(-z)) |
| 38 | + |
| 39 | +def lrCostFunction(theta,X,y,reg_param): |
| 40 | + """ |
| 41 | + Computes loss using sum of square errors for logistic regression |
| 42 | + using theta as the parameter vector for linear regression to fit |
| 43 | + the data points in X and y with penalty reg_param. |
| 44 | + """ |
| 45 | + |
| 46 | + # Initialize some useful values |
| 47 | + m = len(y) # number of training examples |
| 48 | + |
| 49 | + # Cost function J(theta) |
| 50 | + J =((np.sum(-y*np.log(sigmoid(np.dot(X,theta)))- |
| 51 | + (1-y)*(np.log(1-sigmoid(np.dot(X,theta))))))/m + |
| 52 | + (reg_param/m)*np.sum(theta**2)) |
| 53 | + |
| 54 | + |
| 55 | + # Gradient |
| 56 | + |
| 57 | + # Non-regularized |
| 58 | + grad_0 = (np.sum((sigmoid(np.dot(X,theta))-y)[:,None]*X,axis=0)/m) |
| 59 | + |
| 60 | + # Regularized |
| 61 | + grad_reg = grad_0 + (reg_param/m)*theta |
| 62 | + |
| 63 | + # Replace gradient for theta_0 with non-regularized gradient |
| 64 | + grad_reg[0] = grad_0[0] |
| 65 | + |
| 66 | + # Don't bother with the gradient. Let scipy compute numerical |
| 67 | + # derivatives for you instead |
| 68 | + |
| 69 | + return (J,grad_reg) |
| 70 | + |
| 71 | +def oneVsAll(X, y, num_labels, reg_param): |
| 72 | + """" |
| 73 | + Calculates parameters for num_labels individual regularized logistic |
| 74 | + regression classifiers using training data X and labels y. |
| 75 | + """ |
| 76 | + n = np.size(X,1) |
| 77 | + theta = np.zeros((n,num_labels)) |
| 78 | + |
| 79 | + # Function to find parameters for single logit |
| 80 | + def findOptParam(p_num): |
| 81 | + outcome = np.array(y == p_num).astype(int) |
| 82 | + initial_theta = theta[:,p_num] |
| 83 | + results = minimize(lrCostFunction, |
| 84 | + initial_theta, |
| 85 | + method='Newton-CG', |
| 86 | + args=(X,outcome,reg_param), |
| 87 | + jac=True, |
| 88 | + tol=1e-6, |
| 89 | + options={'maxiter':400, |
| 90 | + 'disp':True}) |
| 91 | + theta[:,p_num] = results.x |
| 92 | + |
| 93 | + |
| 94 | + for digit in range(10): |
| 95 | + findOptParam(digit) |
| 96 | + |
| 97 | + return theta |
| 98 | + |
| 99 | + |
| 100 | +def predictOneVsAllAccuracy(est_theta,X): |
| 101 | + """ |
| 102 | + Given a full set of parameters theta and training set X |
| 103 | + returns the predicted probabilities for each of the possible |
| 104 | + classifications. Classifies each observations by using the |
| 105 | + highest predicted probability from the possible classifications. |
| 106 | + """ |
| 107 | + |
| 108 | + probs = np.dot(X,est_theta) |
| 109 | + predict = np.argmax(probs,axis=1) |
| 110 | + |
| 111 | + return predict |
| 112 | + |
| 113 | + |
| 114 | +def predict(theta1,theta2,X): |
| 115 | + m = len(X) # number of samples |
| 116 | + |
| 117 | + if np.ndim(X) == 1: |
| 118 | + X = X.reshape((-1,1)) |
| 119 | + |
| 120 | + D1 = np.hstack((np.ones((m,1)),X))# add column of ones |
| 121 | + |
| 122 | + # Calculate hidden layer from theta1 parameters |
| 123 | + hidden_pred = np.dot(D1,theta1.T) # (5000 x 401) x (401 x 25) = 5000 x 25 |
| 124 | + |
| 125 | + # Add column of ones to new design matrix |
| 126 | + ones = np.ones((len(hidden_pred),1)) # 5000 x 1 |
| 127 | + hidden_pred = sigmoid(hidden_pred) |
| 128 | + hidden_pred = np.hstack((ones,hidden_pred)) # 5000 x 26 |
| 129 | + |
| 130 | + # Calculate output layer from new design matrix |
| 131 | + output_pred = np.dot(hidden_pred,theta2.T) # (5000 x 26) x (26 x 10) |
| 132 | + output_pred = sigmoid(output_pred) |
| 133 | + # Get predictions |
| 134 | + p = np.argmax(output_pred,axis=1) |
| 135 | + |
| 136 | + return p |
| 137 | + |
| 138 | + |
| 139 | + |
| 140 | +# Set up parameters |
| 141 | +input_layer_size = 400 |
| 142 | +num_labels = 10 |
| 143 | + |
| 144 | + |
| 145 | +# Load training data |
| 146 | +print("Loading training data...") |
| 147 | + |
| 148 | +raw_mat = scipy.io.loadmat("ex3data1.mat") |
| 149 | +X = raw_mat.get("X") |
| 150 | +y = raw_mat.get("y").flatten() |
| 151 | +y[y== 10] = 0 |
| 152 | + |
| 153 | +m = np.hstack((np.ones((len(y),1)),X))# add column of ones |
| 154 | + |
| 155 | +# Randomly select 100 datapoints to display |
| 156 | +rand_indices = np.random.randint(0,len(m),100) |
| 157 | +sel = X[rand_indices,:] |
| 158 | + |
| 159 | + |
| 160 | +# Display the data |
| 161 | +digit_grid, ax = displayData(sel) |
| 162 | +digit_grid.show() |
| 163 | + |
| 164 | +input("Program paused, press enter to continue...") |
| 165 | + |
| 166 | + |
| 167 | +# ============ Part 2: Vectorize Logistic Regression ============ |
| 168 | +reg_param = 1.0 |
| 169 | +theta = oneVsAll(m,y,10,reg_param) |
| 170 | + |
| 171 | +predictions = predictOneVsAllAccuracy(theta,m) |
| 172 | +accuracy = np.mean(y == predictions) * 100 |
| 173 | +print("Training Accuracy with logit: ", accuracy, "%") |
| 174 | +input("Program pauses, press enter to continue...") |
| 175 | + |
| 176 | +# =================== Part 3: Neural Networks =================== |
| 177 | + |
| 178 | +# Load pre-estimated weights |
| 179 | +print("Loading saved neural networks parameters...") |
| 180 | +raw_params = scipy.io.loadmat("ex3weights.mat") |
| 181 | +theta1 = raw_params.get("Theta1") # 25 x 401 |
| 182 | +theta2 = raw_params.get("Theta2") # 10 x 26 |
| 183 | + |
| 184 | + |
| 185 | +# Note: Parameters in theta1,theta2 are based on 1 indexing. To make it work, |
| 186 | +# we need to either adjust theta1 and theta2, or manipulate the |
| 187 | +# predictions. Solution is to add 1 and take the mod with resepct to |
| 188 | +# 10 so 10s become zeros and everything else gets bumped up one. |
| 189 | + |
| 190 | +predictions = (predict(theta1,theta2,X) + 1) % 10 |
| 191 | +accuracy = np.mean(y == predictions) * 100 |
| 192 | +print("Training Accuracy with neural network: ", accuracy, "%") |
0 commit comments