PartialLeastSquaresCodeOnly.py

__version__ = '1.0.3' __author__ = "Avinash Kak (kak@purdue.edu)" __date__ = '2015-May-20' __url__ = 'https://engineering.purdue.edu/kak/distPLS/PartialLeastSquares-1.0.3.html' __copyright__ = "(C) 2015 Avinash Kak. Python Software Foundation." from PIL import Image import numpy import numpy.linalg import re import sys, os import functools import glob numpy.set_printoptions(precision=3) def convert(value): try: answer = float(value) return answer except: return value #----------------------------- PartialLeastSquares Class Definition -------------------------------- class PartialLeastSquares(object): def __init__(self, *args, **kwargs ): if args: raise ValueError( '''constructor can only be called with keyword arguments for the following keywords: XMatrix_file, YMatrix_file, epsilon, image_directory, image_type, image_size_for_computations, debug''') XMatrix_file=YMatrix_file=epsilon=image_directory=image_type=image_size_for_computations=debug=None if 'XMatrix_file' in kwargs : XMatrix_file=kwargs.pop('XMatrix_file') if 'YMatrix_file' in kwargs : YMatrix_file=kwargs.pop('YMatrix_file') if 'epsilon' in kwargs : epsilon=kwargs.pop('epsilon') if 'image_directory' in kwargs : image_directory=kwargs.pop('image_directory') if 'image_type' in kwargs : image_type=kwargs.pop('image_type') if 'debug' in kwargs : debug=kwargs.pop('debug') if 'image_size_for_computations' in kwargs : image_size_for_computations=kwargs.pop('image_size_for_computations') if len(kwargs) != 0: raise ValueError('''You have provided unrecognizable keyword args''') if XMatrix_file: self.XMatrix_file = XMatrix_file if YMatrix_file: self.YMatrix_file = YMatrix_file if epsilon: self.epsilon = epsilon else: self.epsilon = .0001 if image_directory: self.image_directory = image_directory if image_type: self.image_type = image_type if image_size_for_computations: self.image_size_for_computations = image_size_for_computations if debug: self.debug = debug else: self.debug = 0 self.X = None # Each column of X stands for a predictor variable self.Y = None # Each column of Y stands for a predicted variable self.mean0X = None # Store column-wise mean for X self.mean0Y = None # and for Y self.Xtest = None # X matrix for evaluating PLS regression self.Ytest = None # Y matrix for evaluating PLS regression self.B = None # regression coefficients self.training_positives = [] # list of row vectors self.training_negatives = [] self.testing_positives = [] self.testing_negatives = [] self.testing_positives_filenames = [] self.testing_negatives_filenames = [] def get_XMatrix_from_csv(self): """ If you wish to use your own X and Y matrices for PLS regression, you'd need to supply them in the form of CSV files. This method extracts the X matrix from the file named for this purpose by the constructor option XMatrix_file. """ self.X = self._get_matrix_from_csv_file(self.XMatrix_file) self.N = self.X.shape[0] self.num_predictor_vars = self.X.shape[1] print("\n\nThe X matrix: ") print(self.X) def get_YMatrix_from_csv(self): """ If you wish to use your own X and Y matrices for PLS regression, you'd need to supply them in the form of CSV files. This method extracts the Y matrix from the file named for this purpose by the constructor option YMatrix_file. """ self.Y = self._get_matrix_from_csv_file(self.YMatrix_file) if (self.Y.shape[0] != self.N): sys.exit("The X and Y matrix data are not consistent") print("\n\nThe Y matrix: ") print(self.Y) def _get_matrix_from_csv_file(self, filename): if not filename.endswith('.csv'): sys.exit("Aborted. get_training_data_from_csv() is only for CSV files") all_data = [line.rstrip().split(',') for line in open(filename,"rU")] num_rows = len(all_data) num_cols = len(all_data[0]) if self.debug: print("num rows: " + str(num_rows) + " num columns: " + str(num_cols)) all_data = [[convert(entry) for entry in all_data[i]] for i in range(len(all_data))] if self.debug: print(all_data) matrix = numpy.matrix(all_data) if self.debug: print(matrix) return matrix def apply_regression_matrix_interactively_to_one_row_of_X_to_get_one_row_of_Y(self): first_message = "\n\nEnter your values for the predictor variables.\n" + \ "The numbers you enter must be space separated.\n" + \ "You need to enter as many numbers as the number of\n" + \ "columns in the X matrix used for calculating B.\n\n" + \ "For starters, you could enter a row of the X used\n" + \ "for calculating B: " if sys.version_info[0] == 3: answer = input("\n\nWould you like to apply this regression matrix to new\n" + \ "data. Enter `y' for yes or `n' for no: ") else: answer = raw_input("\n\nWould you like to apply this regression matrix to new\n" + \ "data. Enter `y' for yes or `n' for no: ") if answer == 'n': sys.exit(0) else: first_try = 1 while 1: if first_try: if sys.version_info[0] == 3: new_data = input(first_message) else: new_data = raw_input(first_message) else: if sys.version_info[0] == 3: new_data = input("\n\nEnter another set of values for the predictor\n" + \ "variables, or `n' to quit: ") else: new_data = raw_input("\n\nEnter another set of values for the predictor\n" + \ "variables, or `n' to quit: ") if new_data == "n": sys.exit(0) data_values = re.split( r'\s+', new_data ) data_values = list(map(lambda x: int(x), data_values)) if len(data_values) != self.X.shape[1]: print("Incorrect number of values entered. Aborting.") sys.exit(1) self.Xtest = numpy.matrix(data_values) self.Ytest = (self.Xtest - self.mean0X) * self.B + self.mean0Y print("\nHere is the tuple of predictions for the data you entered:\n") print(self.Ytest) first_try = 0 def PLS(self): """ This implementation is based on the description of the algorithm by Herve Abdi in the article "Partial Least Squares Regression and Projection on Latent Structure Regression," Computational Statistics, 2010. From my experiments with the different variants of PLS, this particular version generates the best regression results. The Examples directory contains a script that carries out head-pose estimation using this version of PLS. """ X,Y = self.X, self.Y self.mean0X = X.mean(0) if self.debug: print("\nColumn-wise mean for X:") print(self.mean0X) X = X - self.mean0X if self.debug: print("\nZero-mean version of X:") print(X) self.mean0Y = Y.mean(0) if self.debug: print("\nColumn-wise mean for Y is:") print(self.mean0Y) Y = Y - self.mean0Y if self.debug: print("\nZero-mean version of Y:") print(Y) T=U=W=C=P=Q=B=Bdiag=t=w=u=c=p=q=b=None u = numpy.random.rand(1,self.N) u = numpy.asmatrix(u).T if self.debug: print("\nThe initial random guess for u: ") print(u) i = 0 while (True): j = 0 while (True): w = X.T * u w = w / numpy.linalg.norm(w) t = X * w t = t / numpy.linalg.norm(t) c = Y.T * t c = c / numpy.linalg.norm(c) u_old = u u = Y * c error = numpy.linalg.norm(u - u_old) if error < self.epsilon: if self.debug: print("Number of iterations for the %dth latent vector: %d" % (i,j+1)) break j += 1 b = t.T * u b = b[0,0] if T is None: T = t else: T = numpy.hstack((T,t)) if U is None: U = u else: U = numpy.hstack((U,u)) if W is None: W = w else: W = numpy.hstack((W,w)) if C is None: C = c else: C = numpy.hstack((C,c)) p = X.T * t / (numpy.linalg.norm(t) ** 2) q = Y.T * u / (numpy.linalg.norm(u) ** 2) if P is None: P = p else: P = numpy.hstack((P,p)) if Q is None: Q = q else: Q = numpy.hstack((Q,q)) if Bdiag is None: Bdiag = [b] else: Bdiag.append(b) X_old = X Y_old = Y X = X - t * p.T Y = Y - b * t * c.T i += 1 if numpy.linalg.norm(X) < 0.001: break if self.debug: print("\n\n\nThe T matrix:") print(T) print("\nThe U matrix:") print(U) print("\nThe W matrix:") print(W) print("\nThe C matrix:") print(C) print("\nThe P matrix:") print(P) print("\nThe b vector:") print(Bdiag) print("\nThe final deflated X matrix:") print(X) print("\nThe final deflated Y matrix:") print(Y) B = numpy.diag(Bdiag) B = numpy.asmatrix(B) if self.debug: print("\nThe diagonal matrix B of b values:") print(B) self.B = numpy.linalg.pinv(P.T) * B * C.T if self.debug: print("\nThe matrix B of regression coefficients:") print(self.B) # For testing, make a prediction based on the original X: if self.debug: Y_predicted = (self.X - self.mean0X) * self.B print("\nY_predicted from the original X:") print(Y_predicted) Y_predicted_with_mean = Y_predicted + self.mean0Y print("\nThe predicted Y with the original Y's column-wise mean added:") print(Y_predicted_with_mean) print("\nThe original Y for comparison:") print(self.Y) return self.B def PLS1(self): """ This implementation is based on the description of the algorithm in the article "Overview and Recent Advances in Partial Least Squares" by Roman Rosipal and Nicole Kramer, LNCS, 2006. Note that PLS1 assumes that the Y matrix consists of just one column. That makes it particularly appropriate for solving face recognition problems. This module uses this method for a two-class discrimination between the faces. We construct the X and Y matrices from the positive and the negative examples of the face to be recognized. Each row of the X matrix consists of the vectorized representation of either a positive example of a face or a negative example. The corresponding element in the one-column Y is +1 for the positive examples and -1 for the negative examples. """ X,Y = self.X, self.Y if Y.shape[1] != 1: raise ValueError("PLS1 can only be called when the Y has only one column") self.mean0X = X.mean(0) X = X - self.mean0X self.mean0Y = Y.mean(0) Y = Y - self.mean0Y T=U=W=C=P=Q=B=t=w=u=c=p=q=None u = Y i = 0 while (True): w = X.T * u w = w / numpy.linalg.norm(w) t = X * w c = Y.T * t c = c / numpy.linalg.norm(c) u = Y * c if T is None: T = t else: T = numpy.hstack((T,t)) if U is None: U = u else: U = numpy.hstack((U,u)) if W is None: W = w else: W = numpy.hstack((W,w)) p = X.T * t / (numpy.linalg.norm(t) ** 2) q = Y.T * u / (numpy.linalg.norm(u) ** 2) if P is None: P = p else: P = numpy.hstack((P,p)) if Q is None: Q = q else: Q = numpy.hstack((Q,q)) X_old = X Y_old = Y X = X - t * p.T Y = Y - ( (t * t.T) * Y ) / (numpy.linalg.norm(t) ** 2) i += 1 if numpy.linalg.norm(X) < 0.001: break if self.debug: print("\n\n\nThe T matrix:") print(T) print("\nThe U matrix:") print(U) print("\nThe W matrix:") print(W) print("\nThe C matrix:") print(C) print("\nThe X matrix:") print(X) print("\nThe Y matrix:") print(Y) self.B = W * ((P.T * W).I) * T.T * self.Y if self.debug: print("\nThe matrix B of regression coefficients:") print(self.B) return self.B def PLS2(self): """ This implementation is based on the description of the algorithm in the article "Overview and Recent Advances in Partial Least Squares" by Roman Rosipal and Nicole Kramer, LNCS, 2006. Unlike PLS1, this implementation places no constraints on the number of columns in the Y matrix. """ X,Y = self.X, self.Y self.mean0X = X.mean(0) if self.debug: print("\ncolumn-wise mean for X:") print(self.mean0X) X = X - self.mean0X self.mean0Y = Y.mean(0) if self.debug: print("\ncolumn-wise mean for Y:") print(self.mean0Y) Y = Y - self.mean0Y T=U=W=C=P=Q=B=t=w=u=c=p=q=None u = numpy.random.rand(1,self.N) u = numpy.asmatrix(u).T if self.debug: print("\nu vector initialization: ") print(u) i = 0 while (True): j = 0 while (True): w = X.T * u w = w / numpy.linalg.norm(w) t = X * w c = Y.T * t c = c / numpy.linalg.norm(c) u_old = u u = Y * c error = numpy.linalg.norm(u - u_old) if error < self.epsilon: if self.debug: print("Number of iterations for the %dth latent vector: %d" % (i,j+1)) break j += 1 if T is None: T = t else: T = numpy.hstack((T,t)) if U is None: U = u else: U = numpy.hstack((U,u)) if W is None: W = w else: W = numpy.hstack((W,w)) if C is None: C = c else: C = numpy.hstack((C,c)) p = X.T * t / (numpy.linalg.norm(t) ** 2) q = Y.T * u / (numpy.linalg.norm(u) ** 2) if P is None: P = p else: P = numpy.hstack((P,p)) if Q is None: Q = q else: Q = numpy.hstack((Q,q)) X_old = X Y_old = Y X = X - t * p.T Y = Y - ( (t * t.T) * Y ) / (numpy.linalg.norm(t) ** 2) i += 1 if numpy.linalg.norm(X) < 0.001: break if self.debug: print("\n\n\nThe T matrix:") print(T) print("\nThe U matrix:") print(U) print("\nThe W matrix:") print(W) print("\nThe C matrix:") print(C) print("\nThe final deflated X matrix:") print(X) print("\nThe final deflated Y matrix:") print(Y) self.B = W * (P.T * W).I * C.T if self.debug: print("\nThe matrix B of regression coefficients:") print(self.B) if self.Y.shape[1] > 1: Y_predicted = (self.X - self.mean0X) * self.B print("\nY_predicted from the original X:") print(Y_predicted) Y_predicted_with_mean = Y_predicted + self.mean0Y print("\nThe predicted Y with the original Y's column-wise mean added:") print(Y_predicted_with_mean) print("\nThe original Y for comparison:") print(self.Y) return self.B def vectorize_images_and_construct_X_and_Y_matrices_for_face_recognition_with_PLS1(self): """ This method assumes that the images to be used for training and testing are organized as follows in the image_directory option supplied to the constructor of the module: image_directory | | -------------------------------------- | | | | training testing | | | | ------------------------- ----------------------------- | | | | | | | | positives negatives positives negatives The module constructs the X and the Y matrices from the images in the `training/positives' and the `training/negatives' subdirectories. The vectorized representation of each image constitutes a row of the X matrix. The corresponding element in the one-column Y matrix is +1 for the images in the `positives' directory and -1 for the images in the `negatives' directory. In a similar manner, the method constructs Xtest and Ytest matrices from the images in the `testing/positives' and `testing/negatives' subdirectories. """ self._vectorize_images_for_PLS1() self._construct_X_and_Y_matrices_from_image_vectors_for_PLS1('training') self._construct_X_and_Y_matrices_from_image_vectors_for_PLS1('testing') if self.debug: print("\nThe X matrix:") print(self.X) print("\nThe Y matrix:") print(self.Y) print("\nThe Xtest matrix:") print(self.Xtest) print("\nThe Ytest matrix:") print(self.Ytest) def _vectorize_images_for_PLS1(self): os.chdir(self.image_directory) try: assert 'training' and 'testing' in glob.glob('*') except: raise AssertionError('''The image directory must contain two subdirectories ''' '''named `training' and `testing' ''') current_dir = os.getcwd() # Let's start with the images in training/positives/ os.chdir(current_dir + "/training") cwd = os.getcwd() try: assert 'positives' and 'negatives' in glob.glob('*') except: raise AssertionError('''The directory ''' + cwd + ''' must contain two ''' '''subdirectories named `positives' and `negatives' ''') curr_dir = os.getcwd() os.chdir(curr_dir + "/positives") cwd = os.getcwd() for image in glob.glob('*.' + self.image_type): if image.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(image) self.training_positives.append(numpy.matrix(pixel_list)) # Let's now process training/negatives/ os.chdir(curr_dir + "/negatives") cwd = os.getcwd() for image in glob.glob('*.' + self.image_type): if image.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(image) self.training_negatives.append(numpy.matrix(pixel_list)) # Let's start with the images in testing/positives/ os.chdir(current_dir + "/testing") cwd = os.getcwd() try: assert 'positives' and 'negatives' in glob.glob('*') except: raise AssertionError('''The directory ''' + cwd + ''' must contain two ''' '''subdirectories named `positives' and `negatives' ''') curr_dir = os.getcwd() os.chdir(curr_dir + "/positives") cwd = os.getcwd() for image in glob.glob('*.' + self.image_type): if image.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(image) self.testing_positives.append(numpy.matrix(pixel_list)) self.testing_positives_filenames.append(image) # Let's now process testing/negatives/ os.chdir(curr_dir + "/negatives") cwd = os.getcwd() for image in glob.glob('*.' + self.image_type): if image.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(image) self.testing_negatives.append(numpy.matrix(pixel_list)) self.testing_negatives_filenames.append(image) def vectorize_images_and_construct_X_and_Y_matrices_for_head_pose_estimation_with_PLS(self): """ This method assumes that the image directory contains two subdirectories named: -- training -- testing Furthermore, the method assumes that the name of each image file in the two subdirectories named above is an encoding of the roll, pitch, and yaw values associated with the face image in that image. For example, the name of the first image file in the directory `/head_pose_images/training/' is y1p1r2.jpg This name implies that the pose of the head in this image corresponds to the following values for roll, pitch, and yaw: yaw = -30 degrees pitch = -30 degrees roll = -20 degrees To understand why the name of the file translates into the values shown above, note that the pose of the head is varied with respect to each of the roll, pitch, and yaw parameters from -30 degrees to +30 degrees. We use the following mapping between the integer indices associated with the parameters in the file names and their actual angles: 1 => -30 deg 2 => -20 deg 3 => -10 deg 4 => 0 deg 5 => +10 deg 6 => +20 deg 7 => +30 deg This naming convention makes it easy to to create the rows of the Y matrix for each row of the X matrix. Each row of the X matrix is the vectorized representation of the pixels in the image and each corresponding row of the Y matrix consists of the three pose angles associated with that image. """ os.chdir(self.image_directory) try: assert 'training' and 'testing' in glob.glob('*') except: raise AssertionError('''The image directory must contain two subdirectories ''' '''names `training' and `testing' ''') current_dir = os.getcwd() os.chdir(current_dir + "/training") pattern = r'y(.)p(.)r(.)' for imagename in glob.glob('*.' + self.image_type): if imagename.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(imagename) if self.X is None: self.X = numpy.matrix(pixel_list) else: self.X = numpy.vstack((self.X, pixel_list)) basename = os.path.splitext(imagename)[0] m = re.search(pattern, basename) yaw,pitch,roll = m.group(1),m.group(2),m.group(3) yaw,pitch,roll = (int(yaw)-4)*10,(int(pitch)-4)*10,(int(roll)-4)*10 if self.Y is None: self.Y = numpy.matrix([yaw,pitch,roll]) else: self.Y = numpy.vstack((self.Y, [yaw,pitch,roll])) self.N = self.X.shape[0] if self.debug: print("\nThe X matrix:") print(self.X) print("\nThe Y matrix:") print(self.Y) os.chdir(current_dir + "/testing") for imagename in glob.glob('*.' + self.image_type): if imagename.endswith( self.image_type ): pixel_list = self.extract_pixels_from_image(imagename) if self.Xtest is None: self.Xtest = numpy.matrix(pixel_list) else: self.Xtest = numpy.vstack((self.Xtest, pixel_list)) basename = os.path.splitext(imagename)[0] m = re.search(pattern, basename) yaw,pitch,roll = m.group(1),m.group(2),m.group(3) yaw,pitch,roll = (int(yaw)-4)*10,(int(pitch)-4)*10,(int(roll)-4)*10 if self.Ytest is None: self.Ytest = numpy.matrix([yaw,pitch,roll]) else: self.Ytest = numpy.vstack((self.Ytest, [yaw,pitch,roll])) if self.debug: print("\nThe Xtest matrix:") print(self.Xtest) print("\nThe Ytest matrix:") print(self.Ytest) def _construct_X_and_Y_matrices_from_image_vectors_for_PLS1(self, training_or_testing): if training_or_testing is 'training': for vector in self.training_positives: if self.X is None: self.X = numpy.matrix(vector) else: self.X = numpy.vstack((self.X, vector)) if self.Y is None: self.Y = numpy.matrix([1]) else: self.Y = numpy.vstack((self.Y, numpy.matrix([1]))) for vector in self.training_negatives: if self.X is None: self.X = numpy.matrix(vector) else: self.X = numpy.vstack((self.X, vector)) if self.Y is None: self.Y = numpy.matrix([-1]) else: self.Y = numpy.vstack((self.Y, numpy.matrix([-1]))) self.N = self.X.shape[0] elif training_or_testing is 'testing': for vector in self.testing_positives: if self.Xtest is None: self.Xtest = numpy.matrix(vector) else: self.Xtest = numpy.vstack((self.Xtest, vector)) if self.Ytest is None: self.Ytest = numpy.matrix([1]) else: self.Ytest = numpy.vstack((self.Ytest, numpy.matrix([1]))) for vector in self.testing_negatives: if self.Xtest is None: self.Xtest = numpy.matrix(vector) else: self.Xtest = numpy.vstack((self.Xtest, vector)) if self.Ytest is None: self.Ytest = numpy.matrix([-1]) else: self.Ytest = numpy.vstack((self.Ytest, numpy.matrix([-1]))) if self.debug: print("\nThe size of the X matrix:") print(self.X.shape) print("\nprinting out the Y matrix:") print(self.Y) def extract_pixels_from_image(self, imagename): if self.debug: cwd = os.getcwd() print("\nimage name is: ", cwd + "/" + imagename) im = Image.open(imagename) im = im.convert('L') ## convert to gray level im.thumbnail(self.image_size_for_computations, Image.ANTIALIAS) width,height = im.size if self.debug: print("width: %d height: %d" % (width, height)) diff_width = self.image_size_for_computations[0] - width diff_height = self.image_size_for_computations[1] - height even_diff_width = even_diff_height = None if diff_width % 2 == 0: even_diff_width = True if diff_height % 2 == 0: even_diff_height = True pixel_list = [] for i in range(height + diff_height): for j in range(width + diff_width): if i < diff_height // 2: pixel_list.append(0.0) elif i >= height + diff_height // 2: pixel_list.append(0.0) elif j < diff_width // 2 or j >= (diff_width // 2) + width: pixel_list.append(0.0) else: pixel_list.append(im.getpixel((j-(diff_width // 2),i-(diff_height // 2)))) size_of_pixel_list = len(pixel_list) if size_of_pixel_list != self.image_size_for_computations[0] * \ self.image_size_for_computations[1]: print("Image resizing step is not correct. Aborting.") sys.exit(1) return pixel_list def run_evaluation_of_PLS_regression_for_head_pose_estimation(self): """ The docstring associated with the method vectorize_images_and_construct_X_and_Y_matrices_for_head_pose_estimation_with_PLS() applies here also. The method here uses the Xtest and Yest matrices constructed by the `vectorize' method named above from the images in the `testing' directory for evaluating PLS regression for head pose estimation. """ if self.Xtest is None: raise ValueError("There is no data in your Xtest and Ytest matrices. Aborting.") Y_predicted = (self.Xtest - self.mean0X) * self.B + self. mean0Y error = numpy.linalg.norm(Y_predicted - self.Ytest) / (self.Ytest.shape[0] * 3) print("\nAverage error in head pose estimation: " + str(error) + " degrees") print("\nThe error shown above was calculated by (1) taking the Frobenius norm of\n" +\ "the difference between the true Y matrix (for just the data in the `testing'\n" +\ "directory) and its value estimated by PLS regression; (2) Dividing the norm\n" +\ "by the number of rows in Y to calculate the error per observation; and,\n" +\ "finally, by (3) dividing the result by 3 to estimate the error per degree\n" +\ "of freedom.") side_by_side_comparison = numpy.hstack((self.Ytest, Y_predicted)) if sys.version_info[0] == 3: answer = input("\n\nWould you like to see a side-by-side comparison of the\n" +\ "the true values for the pose parameters and the values\n" +\ "as computed by PLS regression? Answer `y' for yes and\n" +\ "`n' for no: ") else: answer = raw_input("\n\nWould you like to see a side-by-side comparison of the\n" +\ "the true values for the pose parameters and the values\n" +\ "as computed by PLS regression? Answer `y' for yes and\n" +\ "`n' for no: ") if answer == 'y': print("\nDisplay of a side by side comparison. The first three columns show the\n" + \ "true values for the head pose parameters and the last three columns show\n" +\ "the values as estimated by PLS regression.\n") print(side_by_side_comparison) return side_by_side_comparison def run_evaluation_of_PLS_regression_for_face_recognition(self): """ The docstring associated with the method vectorize_images_and_construct_X_and_Y_matrices_for_face_recognition_with_PLS1() applies here also. The method here uses the Xtest and Yest matrices constructed by the `vectorize' method named above from the images in the `testing/positives' and the /testing/negatives/ subdirectories for evaluating PLS regression for face recognition. """ if self.Xtest is None: raise ValueError("There is no data in your Xtest and Ytest matrices. Aborting.") Y_predicted = (self.Xtest - self.mean0X) * self.B + self.mean0Y if self.debug: print("\nPrinting the predicted Y:") print(Y_predicted) y_vals = Y_predicted[:,0].flatten().tolist()[0] if self.debug: print("predicted values as a list: ", y_vals) minval,maxval = min(y_vals),max(y_vals) if self.debug: print("min value: ", minval, " and max value: ", maxval) delta = (maxval - minval) / 100.0 hist = [0] * 101 if self.debug: print("\nThe bin structure of the histogram --- with empty bins") print(hist) for val in y_vals: bin_index = int((val - minval) / delta) hist[bin_index] += 1 if self.debug: print("\nThe populated histogram:") print(hist) total_count = functools.reduce(lambda x,y: x+y, hist) coarseness = 8 probs = [functools.reduce(lambda x,y: x+y, \ hist[coarseness*i:coarseness*i+coarseness])/float(total_count) for i in range(int(len(hist)/coarseness))] prob_times_graylevel = [coarseness * i * probs[i] for i in range(len(probs))] mu_T = functools.reduce(lambda x,y: x+y, prob_times_graylevel) # mean for the image prob_times_graysquared = [(coarseness * i - mu_T)**2 * probs[i] for i in range(len(probs))] sigma_squared_T = functools.reduce(lambda x,y: x+y, prob_times_graysquared) m0 = [functools.reduce(lambda x,y: x+y, probs[:k]) for k in range(1,len(probs)+1)] m1 = [functools.reduce(lambda x,y: x+y, prob_times_graylevel[:k]) for k in range(1,len(probs)+1)] sigmaB_squared = [None] * len(m0) # for between-class variance as a func of threshold sigmaW_squared = [None] * len(m0) # for within-class variance as a func of threshold variance_ratio = [None] * len(m0) # for the ratio of the two variances for k in range(len(m0)): if 0 < m0[k] < 1.0: sigmaB_squared[k] = (mu_T * m0[k] - m1[k])**2 / (m0[k] * (1.0 - m0[k])) sigmaW_squared[k] = sigma_squared_T - sigmaB_squared[k] variance_ratio[k] = sigmaB_squared[k] / sigmaW_squared[k] otsu_threshold_testdata = variance_ratio.index(max(variance_ratio)) * coarseness otsu_threshold_testdata = otsu_threshold_testdata * delta + minval if self.debug: print("\nbest threshold for test data: ", otsu_threshold_testdata) self.decision_threshold_testdata = otsu_threshold_testdata if self.debug: print("\n\nThe testdata decision threshold for binary recognition: ", \ self.decision_threshold_testdata) Y_predicted_as_list = Y_predicted[:,0].flatten().tolist()[0] Y_predicted_thresholded = \ list(map(lambda x: 1 if x > self.decision_threshold_testdata else -1, Y_predicted_as_list)) if self.debug: print(Y_predicted_thresholded) Y_predicted_thresholded = numpy.matrix(Y_predicted_thresholded).T Y_comparison = numpy.hstack((self.Ytest, Y_predicted_thresholded)) if self.debug: print('''\nShowing computed and true labels side by side. Left column is ''' + \ '''computed labels and the right column the true labels:''') print(Y_comparison) confusion_matrix = numpy.zeros(shape=(2,2)) confusion_matrix = numpy.asmatrix(confusion_matrix) testing_pos_images_identified_as_neg = [] testing_neg_images_identified_as_pos = [] total_num_testing_pos = len(self.testing_positives_filenames) i = 0 for row in Y_comparison: if (row[0,0] == 1) and (row[0,1] == 1): confusion_matrix[0,0] += 1 elif (row[0,0] == 1) and (row[0,1] == -1): confusion_matrix[1,0] += 1 testing_pos_images_identified_as_neg.append(self.testing_positives_filenames[i]) elif (row[0,0] == -1) and (row[0,1] == -1): confusion_matrix[1,1] += 1 elif (row[0,0] == -1) and (row[0,1] == 1): confusion_matrix[0,1] += 1 testing_neg_images_identified_as_pos.append(self.testing_negatives_filenames[i - \ total_num_testing_pos]) i += 1 print("\nDisplaying the confusion matrix: \n\n") print(" true pos true neg ") print(" ------------------------" ) print(" " ) print("computed pos: " + str(confusion_matrix[0,0]) + " " + str(confusion_matrix[0,1])) print(" ") print("computed neg: " + str(confusion_matrix[1,0]) + " " + str(confusion_matrix[1,1])) print("\n") true_positive_detection_rate = confusion_matrix[0,0] / confusion_matrix[:,0].sum() false_positive_detection_rate = confusion_matrix[0,1] / confusion_matrix[:,1].sum() print("\nEstimated probability for true positives: " + str(true_positive_detection_rate)) print("\nEstimated probability for false positives: " + str(false_positive_detection_rate)) if sys.version_info[0] == 3: answer = input("\nWould you like to see the names of the image files in the\n" +\ "`testing' directory that were misclassified? Answer `y' for\n" +\ "yes and `n' for no: ") else: answer = raw_input("\nWould you like to see the names of the image files in the\n" +\ "`testing' directory that were misclassified? Answer `y' for\n" +\ "yes and `n' for no: ") if answer == 'n': sys.exit(0) else: alist = sorted(testing_pos_images_identified_as_neg, key = lambda x: int(x.partition('.')[0])) blist = sorted(testing_neg_images_identified_as_pos, key = lambda x: int(x.partition('.')[0])) print("\npositive images in the `testing' directory misclassified as negatives: " + str(alist)) print("\nnegative images in the `testing' directory misclassified as positives: " + str(blist)) #----------------------- End of PartialLeastSquares Class Definition -------------------------- #---------------------------------- Test code follows ---------------------------------- if __name__ == '__main__': XMatrix_file = "X_data.csv" YMatrix_file = "Y_data.csv" pls = PartialLeastSquares( XMatrix_file = XMatrix_file, YMatrix_file = YMatrix_file, epsilon = 0.001, debug = 1, ) pls.get_XMatrix_from_csv() pls.get_YMatrix_from_csv() pls.PLS()