__version__ = '1.0.3'
__author__ = "Avinash Kak (kak@purdue.edu)"
__date__ = '2015-May-20'
__url__ = 'https://engineering.purdue.edu/kak/distPLS/PartialLeastSquares-1.0.3.html'
__copyright__ = "(C) 2015 Avinash Kak. Python Software Foundation."
from PIL import Image
import numpy
import numpy.linalg
import re
import sys, os
import functools
import glob
numpy.set_printoptions(precision=3)
def convert(value):
try:
answer = float(value)
return answer
except:
return value
#----------------------------- PartialLeastSquares Class Definition --------------------------------
class PartialLeastSquares(object):
def __init__(self, *args, **kwargs ):
if args:
raise ValueError(
'''constructor can only be called with keyword arguments for
the following keywords: XMatrix_file, YMatrix_file, epsilon,
image_directory, image_type, image_size_for_computations, debug''')
XMatrix_file=YMatrix_file=epsilon=image_directory=image_type=image_size_for_computations=debug=None
if 'XMatrix_file' in kwargs : XMatrix_file=kwargs.pop('XMatrix_file')
if 'YMatrix_file' in kwargs : YMatrix_file=kwargs.pop('YMatrix_file')
if 'epsilon' in kwargs : epsilon=kwargs.pop('epsilon')
if 'image_directory' in kwargs : image_directory=kwargs.pop('image_directory')
if 'image_type' in kwargs : image_type=kwargs.pop('image_type')
if 'debug' in kwargs : debug=kwargs.pop('debug')
if 'image_size_for_computations' in kwargs :
image_size_for_computations=kwargs.pop('image_size_for_computations')
if len(kwargs) != 0:
raise ValueError('''You have provided unrecognizable keyword args''')
if XMatrix_file:
self.XMatrix_file = XMatrix_file
if YMatrix_file:
self.YMatrix_file = YMatrix_file
if epsilon:
self.epsilon = epsilon
else:
self.epsilon = .0001
if image_directory:
self.image_directory = image_directory
if image_type:
self.image_type = image_type
if image_size_for_computations:
self.image_size_for_computations = image_size_for_computations
if debug:
self.debug = debug
else:
self.debug = 0
self.X = None # Each column of X stands for a predictor variable
self.Y = None # Each column of Y stands for a predicted variable
self.mean0X = None # Store column-wise mean for X
self.mean0Y = None # and for Y
self.Xtest = None # X matrix for evaluating PLS regression
self.Ytest = None # Y matrix for evaluating PLS regression
self.B = None # regression coefficients
self.training_positives = [] # list of row vectors
self.training_negatives = []
self.testing_positives = []
self.testing_negatives = []
self.testing_positives_filenames = []
self.testing_negatives_filenames = []
def get_XMatrix_from_csv(self):
"""
If you wish to use your own X and Y matrices for PLS regression, you'd need
to supply them in the form of CSV files. This method extracts the X
matrix from the file named for this purpose by the constructor option
XMatrix_file.
"""
self.X = self._get_matrix_from_csv_file(self.XMatrix_file)
self.N = self.X.shape[0]
self.num_predictor_vars = self.X.shape[1]
print("\n\nThe X matrix: ")
print(self.X)
def get_YMatrix_from_csv(self):
"""
If you wish to use your own X and Y matrices for PLS regression, you'd need
to supply them in the form of CSV files. This method extracts the Y
matrix from the file named for this purpose by the constructor option
YMatrix_file.
"""
self.Y = self._get_matrix_from_csv_file(self.YMatrix_file)
if (self.Y.shape[0] != self.N):
sys.exit("The X and Y matrix data are not consistent")
print("\n\nThe Y matrix: ")
print(self.Y)
def _get_matrix_from_csv_file(self, filename):
if not filename.endswith('.csv'):
sys.exit("Aborted. get_training_data_from_csv() is only for CSV files")
all_data = [line.rstrip().split(',') for line in open(filename,"rU")]
num_rows = len(all_data)
num_cols = len(all_data[0])
if self.debug:
print("num rows: " + str(num_rows) + " num columns: " + str(num_cols))
all_data = [[convert(entry) for entry in all_data[i]] for i in range(len(all_data))]
if self.debug:
print(all_data)
matrix = numpy.matrix(all_data)
if self.debug:
print(matrix)
return matrix
def apply_regression_matrix_interactively_to_one_row_of_X_to_get_one_row_of_Y(self):
first_message = "\n\nEnter your values for the predictor variables.\n" + \
"The numbers you enter must be space separated.\n" + \
"You need to enter as many numbers as the number of\n" + \
"columns in the X matrix used for calculating B.\n\n" + \
"For starters, you could enter a row of the X used\n" + \
"for calculating B: "
if sys.version_info[0] == 3:
answer = input("\n\nWould you like to apply this regression matrix to new\n" + \
"data. Enter `y' for yes or `n' for no: ")
else:
answer = raw_input("\n\nWould you like to apply this regression matrix to new\n" + \
"data. Enter `y' for yes or `n' for no: ")
if answer == 'n':
sys.exit(0)
else:
first_try = 1
while 1:
if first_try:
if sys.version_info[0] == 3:
new_data = input(first_message)
else:
new_data = raw_input(first_message)
else:
if sys.version_info[0] == 3:
new_data = input("\n\nEnter another set of values for the predictor\n" + \
"variables, or `n' to quit: ")
else:
new_data = raw_input("\n\nEnter another set of values for the predictor\n" + \
"variables, or `n' to quit: ")
if new_data == "n": sys.exit(0)
data_values = re.split( r'\s+', new_data )
data_values = list(map(lambda x: int(x), data_values))
if len(data_values) != self.X.shape[1]:
print("Incorrect number of values entered. Aborting.")
sys.exit(1)
self.Xtest = numpy.matrix(data_values)
self.Ytest = (self.Xtest - self.mean0X) * self.B + self.mean0Y
print("\nHere is the tuple of predictions for the data you entered:\n")
print(self.Ytest)
first_try = 0
def PLS(self):
"""
This implementation is based on the description of the algorithm by Herve
Abdi in the article "Partial Least Squares Regression and Projection on
Latent Structure Regression," Computational Statistics, 2010. From my
experiments with the different variants of PLS, this particular version
generates the best regression results. The Examples directory contains a
script that carries out head-pose estimation using this version of PLS.
"""
X,Y = self.X, self.Y
self.mean0X = X.mean(0)
if self.debug:
print("\nColumn-wise mean for X:")
print(self.mean0X)
X = X - self.mean0X
if self.debug:
print("\nZero-mean version of X:")
print(X)
self.mean0Y = Y.mean(0)
if self.debug:
print("\nColumn-wise mean for Y is:")
print(self.mean0Y)
Y = Y - self.mean0Y
if self.debug:
print("\nZero-mean version of Y:")
print(Y)
T=U=W=C=P=Q=B=Bdiag=t=w=u=c=p=q=b=None
u = numpy.random.rand(1,self.N)
u = numpy.asmatrix(u).T
if self.debug:
print("\nThe initial random guess for u: ")
print(u)
i = 0
while (True):
j = 0
while (True):
w = X.T * u
w = w / numpy.linalg.norm(w)
t = X * w
t = t / numpy.linalg.norm(t)
c = Y.T * t
c = c / numpy.linalg.norm(c)
u_old = u
u = Y * c
error = numpy.linalg.norm(u - u_old)
if error < self.epsilon:
if self.debug:
print("Number of iterations for the %dth latent vector: %d" % (i,j+1))
break
j += 1
b = t.T * u
b = b[0,0]
if T is None:
T = t
else:
T = numpy.hstack((T,t))
if U is None:
U = u
else:
U = numpy.hstack((U,u))
if W is None:
W = w
else:
W = numpy.hstack((W,w))
if C is None:
C = c
else:
C = numpy.hstack((C,c))
p = X.T * t / (numpy.linalg.norm(t) ** 2)
q = Y.T * u / (numpy.linalg.norm(u) ** 2)
if P is None:
P = p
else:
P = numpy.hstack((P,p))
if Q is None:
Q = q
else:
Q = numpy.hstack((Q,q))
if Bdiag is None:
Bdiag = [b]
else:
Bdiag.append(b)
X_old = X
Y_old = Y
X = X - t * p.T
Y = Y - b * t * c.T
i += 1
if numpy.linalg.norm(X) < 0.001: break
if self.debug:
print("\n\n\nThe T matrix:")
print(T)
print("\nThe U matrix:")
print(U)
print("\nThe W matrix:")
print(W)
print("\nThe C matrix:")
print(C)
print("\nThe P matrix:")
print(P)
print("\nThe b vector:")
print(Bdiag)
print("\nThe final deflated X matrix:")
print(X)
print("\nThe final deflated Y matrix:")
print(Y)
B = numpy.diag(Bdiag)
B = numpy.asmatrix(B)
if self.debug:
print("\nThe diagonal matrix B of b values:")
print(B)
self.B = numpy.linalg.pinv(P.T) * B * C.T
if self.debug:
print("\nThe matrix B of regression coefficients:")
print(self.B)
# For testing, make a prediction based on the original X:
if self.debug:
Y_predicted = (self.X - self.mean0X) * self.B
print("\nY_predicted from the original X:")
print(Y_predicted)
Y_predicted_with_mean = Y_predicted + self.mean0Y
print("\nThe predicted Y with the original Y's column-wise mean added:")
print(Y_predicted_with_mean)
print("\nThe original Y for comparison:")
print(self.Y)
return self.B
def PLS1(self):
"""
This implementation is based on the description of the algorithm in the article
"Overview and Recent Advances in Partial Least Squares" by Roman Rosipal and
Nicole Kramer, LNCS, 2006. Note that PLS1 assumes that the Y matrix consists
of just one column. That makes it particularly appropriate for solving face
recognition problems. This module uses this method for a two-class
discrimination between the faces. We construct the X and Y matrices from the
positive and the negative examples of the face to be recognized. Each row of
the X matrix consists of the vectorized representation of either a positive
example of a face or a negative example. The corresponding element in the
one-column Y is +1 for the positive examples and -1 for the negative
examples.
"""
X,Y = self.X, self.Y
if Y.shape[1] != 1:
raise ValueError("PLS1 can only be called when the Y has only one column")
self.mean0X = X.mean(0)
X = X - self.mean0X
self.mean0Y = Y.mean(0)
Y = Y - self.mean0Y
T=U=W=C=P=Q=B=t=w=u=c=p=q=None
u = Y
i = 0
while (True):
w = X.T * u
w = w / numpy.linalg.norm(w)
t = X * w
c = Y.T * t
c = c / numpy.linalg.norm(c)
u = Y * c
if T is None:
T = t
else:
T = numpy.hstack((T,t))
if U is None:
U = u
else:
U = numpy.hstack((U,u))
if W is None:
W = w
else:
W = numpy.hstack((W,w))
p = X.T * t / (numpy.linalg.norm(t) ** 2)
q = Y.T * u / (numpy.linalg.norm(u) ** 2)
if P is None:
P = p
else:
P = numpy.hstack((P,p))
if Q is None:
Q = q
else:
Q = numpy.hstack((Q,q))
X_old = X
Y_old = Y
X = X - t * p.T
Y = Y - ( (t * t.T) * Y ) / (numpy.linalg.norm(t) ** 2)
i += 1
if numpy.linalg.norm(X) < 0.001: break
if self.debug:
print("\n\n\nThe T matrix:")
print(T)
print("\nThe U matrix:")
print(U)
print("\nThe W matrix:")
print(W)
print("\nThe C matrix:")
print(C)
print("\nThe X matrix:")
print(X)
print("\nThe Y matrix:")
print(Y)
self.B = W * ((P.T * W).I) * T.T * self.Y
if self.debug:
print("\nThe matrix B of regression coefficients:")
print(self.B)
return self.B
def PLS2(self):
"""
This implementation is based on the description of the algorithm in the article
"Overview and Recent Advances in Partial Least Squares" by Roman Rosipal and
Nicole Kramer, LNCS, 2006. Unlike PLS1, this implementation places no
constraints on the number of columns in the Y matrix.
"""
X,Y = self.X, self.Y
self.mean0X = X.mean(0)
if self.debug:
print("\ncolumn-wise mean for X:")
print(self.mean0X)
X = X - self.mean0X
self.mean0Y = Y.mean(0)
if self.debug:
print("\ncolumn-wise mean for Y:")
print(self.mean0Y)
Y = Y - self.mean0Y
T=U=W=C=P=Q=B=t=w=u=c=p=q=None
u = numpy.random.rand(1,self.N)
u = numpy.asmatrix(u).T
if self.debug:
print("\nu vector initialization: ")
print(u)
i = 0
while (True):
j = 0
while (True):
w = X.T * u
w = w / numpy.linalg.norm(w)
t = X * w
c = Y.T * t
c = c / numpy.linalg.norm(c)
u_old = u
u = Y * c
error = numpy.linalg.norm(u - u_old)
if error < self.epsilon:
if self.debug:
print("Number of iterations for the %dth latent vector: %d" % (i,j+1))
break
j += 1
if T is None:
T = t
else:
T = numpy.hstack((T,t))
if U is None:
U = u
else:
U = numpy.hstack((U,u))
if W is None:
W = w
else:
W = numpy.hstack((W,w))
if C is None:
C = c
else:
C = numpy.hstack((C,c))
p = X.T * t / (numpy.linalg.norm(t) ** 2)
q = Y.T * u / (numpy.linalg.norm(u) ** 2)
if P is None:
P = p
else:
P = numpy.hstack((P,p))
if Q is None:
Q = q
else:
Q = numpy.hstack((Q,q))
X_old = X
Y_old = Y
X = X - t * p.T
Y = Y - ( (t * t.T) * Y ) / (numpy.linalg.norm(t) ** 2)
i += 1
if numpy.linalg.norm(X) < 0.001: break
if self.debug:
print("\n\n\nThe T matrix:")
print(T)
print("\nThe U matrix:")
print(U)
print("\nThe W matrix:")
print(W)
print("\nThe C matrix:")
print(C)
print("\nThe final deflated X matrix:")
print(X)
print("\nThe final deflated Y matrix:")
print(Y)
self.B = W * (P.T * W).I * C.T
if self.debug:
print("\nThe matrix B of regression coefficients:")
print(self.B)
if self.Y.shape[1] > 1:
Y_predicted = (self.X - self.mean0X) * self.B
print("\nY_predicted from the original X:")
print(Y_predicted)
Y_predicted_with_mean = Y_predicted + self.mean0Y
print("\nThe predicted Y with the original Y's column-wise mean added:")
print(Y_predicted_with_mean)
print("\nThe original Y for comparison:")
print(self.Y)
return self.B
def vectorize_images_and_construct_X_and_Y_matrices_for_face_recognition_with_PLS1(self):
"""
This method assumes that the images to be used for training and testing are
organized as follows in the image_directory option supplied to the
constructor of the module:
image_directory
|
|
--------------------------------------
| |
| |
training testing
| |
| |
------------------------- -----------------------------
| | | |
| | | |
positives negatives positives negatives
The module constructs the X and the Y matrices from the images in the
`training/positives' and the `training/negatives' subdirectories. The
vectorized representation of each image constitutes a row of the X
matrix. The corresponding element in the one-column Y matrix is +1 for the
images in the `positives' directory and -1 for the images in the `negatives'
directory. In a similar manner, the method constructs Xtest and Ytest
matrices from the images in the `testing/positives' and `testing/negatives'
subdirectories.
"""
self._vectorize_images_for_PLS1()
self._construct_X_and_Y_matrices_from_image_vectors_for_PLS1('training')
self._construct_X_and_Y_matrices_from_image_vectors_for_PLS1('testing')
if self.debug:
print("\nThe X matrix:")
print(self.X)
print("\nThe Y matrix:")
print(self.Y)
print("\nThe Xtest matrix:")
print(self.Xtest)
print("\nThe Ytest matrix:")
print(self.Ytest)
def _vectorize_images_for_PLS1(self):
os.chdir(self.image_directory)
try:
assert 'training' and 'testing' in glob.glob('*')
except:
raise AssertionError('''The image directory must contain two subdirectories '''
'''named `training' and `testing' ''')
current_dir = os.getcwd()
# Let's start with the images in training/positives/
os.chdir(current_dir + "/training")
cwd = os.getcwd()
try:
assert 'positives' and 'negatives' in glob.glob('*')
except:
raise AssertionError('''The directory ''' + cwd + ''' must contain two '''
'''subdirectories named `positives' and `negatives' ''')
curr_dir = os.getcwd()
os.chdir(curr_dir + "/positives")
cwd = os.getcwd()
for image in glob.glob('*.' + self.image_type):
if image.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(image)
self.training_positives.append(numpy.matrix(pixel_list))
# Let's now process training/negatives/
os.chdir(curr_dir + "/negatives")
cwd = os.getcwd()
for image in glob.glob('*.' + self.image_type):
if image.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(image)
self.training_negatives.append(numpy.matrix(pixel_list))
# Let's start with the images in testing/positives/
os.chdir(current_dir + "/testing")
cwd = os.getcwd()
try:
assert 'positives' and 'negatives' in glob.glob('*')
except:
raise AssertionError('''The directory ''' + cwd + ''' must contain two '''
'''subdirectories named `positives' and `negatives' ''')
curr_dir = os.getcwd()
os.chdir(curr_dir + "/positives")
cwd = os.getcwd()
for image in glob.glob('*.' + self.image_type):
if image.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(image)
self.testing_positives.append(numpy.matrix(pixel_list))
self.testing_positives_filenames.append(image)
# Let's now process testing/negatives/
os.chdir(curr_dir + "/negatives")
cwd = os.getcwd()
for image in glob.glob('*.' + self.image_type):
if image.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(image)
self.testing_negatives.append(numpy.matrix(pixel_list))
self.testing_negatives_filenames.append(image)
def vectorize_images_and_construct_X_and_Y_matrices_for_head_pose_estimation_with_PLS(self):
"""
This method assumes that the image directory contains two subdirectories named:
-- training
-- testing
Furthermore, the method assumes that the name of each image file in the two
subdirectories named above is an encoding of the roll, pitch, and yaw values
associated with the face image in that image. For example, the name of the
first image file in the directory `/head_pose_images/training/' is
y1p1r2.jpg
This name implies that the pose of the head in this image corresponds to the
following values for roll, pitch, and yaw:
yaw = -30 degrees
pitch = -30 degrees
roll = -20 degrees
To understand why the name of the file translates into the values shown
above, note that the pose of the head is varied with respect to each of the
roll, pitch, and yaw parameters from -30 degrees to +30 degrees. We use the
following mapping between the integer indices associated with the parameters
in the file names and their actual angles:
1 => -30 deg
2 => -20 deg
3 => -10 deg
4 => 0 deg
5 => +10 deg
6 => +20 deg
7 => +30 deg
This naming convention makes it easy to to create the rows of the Y matrix
for each row of the X matrix. Each row of the X matrix is the vectorized
representation of the pixels in the image and each corresponding row of the
Y matrix consists of the three pose angles associated with that image.
"""
os.chdir(self.image_directory)
try:
assert 'training' and 'testing' in glob.glob('*')
except:
raise AssertionError('''The image directory must contain two subdirectories '''
'''names `training' and `testing' ''')
current_dir = os.getcwd()
os.chdir(current_dir + "/training")
pattern = r'y(.)p(.)r(.)'
for imagename in glob.glob('*.' + self.image_type):
if imagename.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(imagename)
if self.X is None:
self.X = numpy.matrix(pixel_list)
else:
self.X = numpy.vstack((self.X, pixel_list))
basename = os.path.splitext(imagename)[0]
m = re.search(pattern, basename)
yaw,pitch,roll = m.group(1),m.group(2),m.group(3)
yaw,pitch,roll = (int(yaw)-4)*10,(int(pitch)-4)*10,(int(roll)-4)*10
if self.Y is None:
self.Y = numpy.matrix([yaw,pitch,roll])
else:
self.Y = numpy.vstack((self.Y, [yaw,pitch,roll]))
self.N = self.X.shape[0]
if self.debug:
print("\nThe X matrix:")
print(self.X)
print("\nThe Y matrix:")
print(self.Y)
os.chdir(current_dir + "/testing")
for imagename in glob.glob('*.' + self.image_type):
if imagename.endswith( self.image_type ):
pixel_list = self.extract_pixels_from_image(imagename)
if self.Xtest is None:
self.Xtest = numpy.matrix(pixel_list)
else:
self.Xtest = numpy.vstack((self.Xtest, pixel_list))
basename = os.path.splitext(imagename)[0]
m = re.search(pattern, basename)
yaw,pitch,roll = m.group(1),m.group(2),m.group(3)
yaw,pitch,roll = (int(yaw)-4)*10,(int(pitch)-4)*10,(int(roll)-4)*10
if self.Ytest is None:
self.Ytest = numpy.matrix([yaw,pitch,roll])
else:
self.Ytest = numpy.vstack((self.Ytest, [yaw,pitch,roll]))
if self.debug:
print("\nThe Xtest matrix:")
print(self.Xtest)
print("\nThe Ytest matrix:")
print(self.Ytest)
def _construct_X_and_Y_matrices_from_image_vectors_for_PLS1(self, training_or_testing):
if training_or_testing is 'training':
for vector in self.training_positives:
if self.X is None:
self.X = numpy.matrix(vector)
else:
self.X = numpy.vstack((self.X, vector))
if self.Y is None:
self.Y = numpy.matrix([1])
else:
self.Y = numpy.vstack((self.Y, numpy.matrix([1])))
for vector in self.training_negatives:
if self.X is None:
self.X = numpy.matrix(vector)
else:
self.X = numpy.vstack((self.X, vector))
if self.Y is None:
self.Y = numpy.matrix([-1])
else:
self.Y = numpy.vstack((self.Y, numpy.matrix([-1])))
self.N = self.X.shape[0]
elif training_or_testing is 'testing':
for vector in self.testing_positives:
if self.Xtest is None:
self.Xtest = numpy.matrix(vector)
else:
self.Xtest = numpy.vstack((self.Xtest, vector))
if self.Ytest is None:
self.Ytest = numpy.matrix([1])
else:
self.Ytest = numpy.vstack((self.Ytest, numpy.matrix([1])))
for vector in self.testing_negatives:
if self.Xtest is None:
self.Xtest = numpy.matrix(vector)
else:
self.Xtest = numpy.vstack((self.Xtest, vector))
if self.Ytest is None:
self.Ytest = numpy.matrix([-1])
else:
self.Ytest = numpy.vstack((self.Ytest, numpy.matrix([-1])))
if self.debug:
print("\nThe size of the X matrix:")
print(self.X.shape)
print("\nprinting out the Y matrix:")
print(self.Y)
def extract_pixels_from_image(self, imagename):
if self.debug:
cwd = os.getcwd()
print("\nimage name is: ", cwd + "/" + imagename)
im = Image.open(imagename)
im = im.convert('L') ## convert to gray level
im.thumbnail(self.image_size_for_computations, Image.ANTIALIAS)
width,height = im.size
if self.debug: print("width: %d height: %d" % (width, height))
diff_width = self.image_size_for_computations[0] - width
diff_height = self.image_size_for_computations[1] - height
even_diff_width = even_diff_height = None
if diff_width % 2 == 0: even_diff_width = True
if diff_height % 2 == 0: even_diff_height = True
pixel_list = []
for i in range(height + diff_height):
for j in range(width + diff_width):
if i < diff_height // 2:
pixel_list.append(0.0)
elif i >= height + diff_height // 2:
pixel_list.append(0.0)
elif j < diff_width // 2 or j >= (diff_width // 2) + width:
pixel_list.append(0.0)
else:
pixel_list.append(im.getpixel((j-(diff_width // 2),i-(diff_height // 2))))
size_of_pixel_list = len(pixel_list)
if size_of_pixel_list != self.image_size_for_computations[0] * \
self.image_size_for_computations[1]:
print("Image resizing step is not correct. Aborting.")
sys.exit(1)
return pixel_list
def run_evaluation_of_PLS_regression_for_head_pose_estimation(self):
"""
The docstring associated with the method
vectorize_images_and_construct_X_and_Y_matrices_for_head_pose_estimation_with_PLS()
applies here also. The method here uses the Xtest and Yest matrices
constructed by the `vectorize' method named above from the images in the
`testing' directory for evaluating PLS regression for head pose estimation.
"""
if self.Xtest is None:
raise ValueError("There is no data in your Xtest and Ytest matrices. Aborting.")
Y_predicted = (self.Xtest - self.mean0X) * self.B + self. mean0Y
error = numpy.linalg.norm(Y_predicted - self.Ytest) / (self.Ytest.shape[0] * 3)
print("\nAverage error in head pose estimation: " + str(error) + " degrees")
print("\nThe error shown above was calculated by (1) taking the Frobenius norm of\n" +\
"the difference between the true Y matrix (for just the data in the `testing'\n" +\
"directory) and its value estimated by PLS regression; (2) Dividing the norm\n" +\
"by the number of rows in Y to calculate the error per observation; and,\n" +\
"finally, by (3) dividing the result by 3 to estimate the error per degree\n" +\
"of freedom.")
side_by_side_comparison = numpy.hstack((self.Ytest, Y_predicted))
if sys.version_info[0] == 3:
answer = input("\n\nWould you like to see a side-by-side comparison of the\n" +\
"the true values for the pose parameters and the values\n" +\
"as computed by PLS regression? Answer `y' for yes and\n" +\
"`n' for no: ")
else:
answer = raw_input("\n\nWould you like to see a side-by-side comparison of the\n" +\
"the true values for the pose parameters and the values\n" +\
"as computed by PLS regression? Answer `y' for yes and\n" +\
"`n' for no: ")
if answer == 'y':
print("\nDisplay of a side by side comparison. The first three columns show the\n" + \
"true values for the head pose parameters and the last three columns show\n" +\
"the values as estimated by PLS regression.\n")
print(side_by_side_comparison)
return side_by_side_comparison
def run_evaluation_of_PLS_regression_for_face_recognition(self):
"""
The docstring associated with the method
vectorize_images_and_construct_X_and_Y_matrices_for_face_recognition_with_PLS1()
applies here also. The method here uses the Xtest and Yest matrices
constructed by the `vectorize' method named above from the images in the
`testing/positives' and the /testing/negatives/ subdirectories for evaluating
PLS regression for face recognition.
"""
if self.Xtest is None:
raise ValueError("There is no data in your Xtest and Ytest matrices. Aborting.")
Y_predicted = (self.Xtest - self.mean0X) * self.B + self.mean0Y
if self.debug:
print("\nPrinting the predicted Y:")
print(Y_predicted)
y_vals = Y_predicted[:,0].flatten().tolist()[0]
if self.debug:
print("predicted values as a list: ", y_vals)
minval,maxval = min(y_vals),max(y_vals)
if self.debug:
print("min value: ", minval, " and max value: ", maxval)
delta = (maxval - minval) / 100.0
hist = [0] * 101
if self.debug:
print("\nThe bin structure of the histogram --- with empty bins")
print(hist)
for val in y_vals:
bin_index = int((val - minval) / delta)
hist[bin_index] += 1
if self.debug:
print("\nThe populated histogram:")
print(hist)
total_count = functools.reduce(lambda x,y: x+y, hist)
coarseness = 8
probs = [functools.reduce(lambda x,y: x+y, \
hist[coarseness*i:coarseness*i+coarseness])/float(total_count)
for i in range(int(len(hist)/coarseness))]
prob_times_graylevel = [coarseness * i * probs[i] for i in range(len(probs))]
mu_T = functools.reduce(lambda x,y: x+y, prob_times_graylevel) # mean for the image
prob_times_graysquared = [(coarseness * i - mu_T)**2 * probs[i] for i in range(len(probs))]
sigma_squared_T = functools.reduce(lambda x,y: x+y, prob_times_graysquared)
m0 = [functools.reduce(lambda x,y: x+y, probs[:k]) for k in range(1,len(probs)+1)]
m1 = [functools.reduce(lambda x,y: x+y, prob_times_graylevel[:k]) for k in range(1,len(probs)+1)]
sigmaB_squared = [None] * len(m0) # for between-class variance as a func of threshold
sigmaW_squared = [None] * len(m0) # for within-class variance as a func of threshold
variance_ratio = [None] * len(m0) # for the ratio of the two variances
for k in range(len(m0)):
if 0 < m0[k] < 1.0:
sigmaB_squared[k] = (mu_T * m0[k] - m1[k])**2 / (m0[k] * (1.0 - m0[k]))
sigmaW_squared[k] = sigma_squared_T - sigmaB_squared[k]
variance_ratio[k] = sigmaB_squared[k] / sigmaW_squared[k]
otsu_threshold_testdata = variance_ratio.index(max(variance_ratio)) * coarseness
otsu_threshold_testdata = otsu_threshold_testdata * delta + minval
if self.debug: print("\nbest threshold for test data: ", otsu_threshold_testdata)
self.decision_threshold_testdata = otsu_threshold_testdata
if self.debug:
print("\n\nThe testdata decision threshold for binary recognition: ", \
self.decision_threshold_testdata)
Y_predicted_as_list = Y_predicted[:,0].flatten().tolist()[0]
Y_predicted_thresholded = \
list(map(lambda x: 1 if x > self.decision_threshold_testdata else -1, Y_predicted_as_list))
if self.debug:
print(Y_predicted_thresholded)
Y_predicted_thresholded = numpy.matrix(Y_predicted_thresholded).T
Y_comparison = numpy.hstack((self.Ytest, Y_predicted_thresholded))
if self.debug:
print('''\nShowing computed and true labels side by side. Left column is ''' + \
'''computed labels and the right column the true labels:''')
print(Y_comparison)
confusion_matrix = numpy.zeros(shape=(2,2))
confusion_matrix = numpy.asmatrix(confusion_matrix)
testing_pos_images_identified_as_neg = []
testing_neg_images_identified_as_pos = []
total_num_testing_pos = len(self.testing_positives_filenames)
i = 0
for row in Y_comparison:
if (row[0,0] == 1) and (row[0,1] == 1):
confusion_matrix[0,0] += 1
elif (row[0,0] == 1) and (row[0,1] == -1):
confusion_matrix[1,0] += 1
testing_pos_images_identified_as_neg.append(self.testing_positives_filenames[i])
elif (row[0,0] == -1) and (row[0,1] == -1):
confusion_matrix[1,1] += 1
elif (row[0,0] == -1) and (row[0,1] == 1):
confusion_matrix[0,1] += 1
testing_neg_images_identified_as_pos.append(self.testing_negatives_filenames[i - \
total_num_testing_pos])
i += 1
print("\nDisplaying the confusion matrix: \n\n")
print(" true pos true neg ")
print(" ------------------------" )
print(" " )
print("computed pos: " + str(confusion_matrix[0,0]) + " " + str(confusion_matrix[0,1]))
print(" ")
print("computed neg: " + str(confusion_matrix[1,0]) + " " + str(confusion_matrix[1,1]))
print("\n")
true_positive_detection_rate = confusion_matrix[0,0] / confusion_matrix[:,0].sum()
false_positive_detection_rate = confusion_matrix[0,1] / confusion_matrix[:,1].sum()
print("\nEstimated probability for true positives: " + str(true_positive_detection_rate))
print("\nEstimated probability for false positives: " + str(false_positive_detection_rate))
if sys.version_info[0] == 3:
answer = input("\nWould you like to see the names of the image files in the\n" +\
"`testing' directory that were misclassified? Answer `y' for\n" +\
"yes and `n' for no: ")
else:
answer = raw_input("\nWould you like to see the names of the image files in the\n" +\
"`testing' directory that were misclassified? Answer `y' for\n" +\
"yes and `n' for no: ")
if answer == 'n':
sys.exit(0)
else:
alist = sorted(testing_pos_images_identified_as_neg, key = lambda x: int(x.partition('.')[0]))
blist = sorted(testing_neg_images_identified_as_pos, key = lambda x: int(x.partition('.')[0]))
print("\npositive images in the `testing' directory misclassified as negatives: " + str(alist))
print("\nnegative images in the `testing' directory misclassified as positives: " + str(blist))
#----------------------- End of PartialLeastSquares Class Definition --------------------------
#---------------------------------- Test code follows ----------------------------------
if __name__ == '__main__':
XMatrix_file = "X_data.csv"
YMatrix_file = "Y_data.csv"
pls = PartialLeastSquares(
XMatrix_file = XMatrix_file,
YMatrix_file = YMatrix_file,
epsilon = 0.001,
debug = 1,
)
pls.get_XMatrix_from_csv()
pls.get_YMatrix_from_csv()
pls.PLS()