__version__   = '2.0.8'
__author__    = "Avinash Kak (kak@purdue.edu)"
__date__      = '2022-April-16'   
__url__       = 'https://engineering.purdue.edu/kak/distRPG/RegionProposalGenerator-2.0.8.html'
__copyright__ = "(C) 2022 Avinash Kak. Python Software Foundation."


import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as tvt
import torchvision.transforms.functional as F
import torchvision.utils as tutils
import torch.optim as optim
import numpy as np
import time

from PIL import Image
from PIL import ImageDraw
from PIL import ImageTk
from PIL import ImageFont
import sys,os,os.path,glob,signal
import re
import functools
import math
import random
import copy
import gzip
import pickle
if sys.version_info[0] == 3:
    import tkinter as Tkinter
    from tkinter.constants import *
else:
    import Tkinter    
    from Tkconstants import *

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle 
import logging                        ## for suppressing matplotlib warning messages

#____________________________Top level utility functions  ___________________________________
def _gaussian(sigma):
    '''
    A 1-D Gaussian smoothing operator is generated by assuming that the pixel
    sampling interval is a unit distance.  We truncate the operator a 3 times the
    value of sigma.  So when sigma is set to 1, you get a 7-element operator.  On the
    other hand, when sigma is set to 2, you get a 13-element operator, and so on.
    '''
    win_half_width = int(3 * sigma)
    xvals = range(-win_half_width, win_half_width+1)
    gauss = lambda x: math.exp(-((x**2)/(2*float(sigma**2))))
    operator = [gauss(x) for x in xvals]
    summed = functools.reduce( lambda x, y: x+y, operator )
    operator = [x/summed for x in operator]
    return operator

def _convolution_1D(input_array, operator):
    '''
    Since the Gaussian kernel is separable in its x and y dependencies, 2D convolution
    of an image with the kernel can be decomposed into a sequence of 1D convolutions
    first with the rows of the image and then another sequence of 1D convolutions
    with the columns of the output from the first.  This function carries out a 1D
    convolution.
    '''
    height,width = input_array.shape
    result_array = np.zeros((height, width), dtype="float")
    w = len(operator)                   # should be an odd number
    op_half_width = int((w-1)/2)
    for i in range(height):
        for j in range(width):
            accumulated = 0.0
            for k in range(-op_half_width,op_half_width+1):
                if (j+k) >= 0 and (j+k) < width:
                    accumulated += input_array[i,(j+k)] * operator[k + op_half_width]
            result_array[(i,j)] = accumulated
    return result_array

def _convolution_2D(input_array, operator):
    '''
    Since the Gaussian kernel is separable in its x and y dependencies, 2D convolution
    of an image with the kernel can be decomposed into a sequence of 1D convolutions
    first with the rows of the image and then another sequence of 1D convolutions
    with the columns of the output from the first.  This function orchestrates the
    invocation of 1D convolutions.
    '''
    result_conv_along_x = _convolution_1D(input_array, operator)
    result_conv_along_y = _convolution_1D(result_conv_along_x.transpose(), operator)
    final_result = result_conv_along_y.transpose()
    return final_result

def _line_intersection(line1, line2):                  ### needed for interactive extraction of
                                                       ### of an image portion by using mouse clicks
    '''                                                                                                  
    Each line is defined by a 4-tuple, with its first two elements defining the                          
    coordinates of the first endpoint and the two elements defining the coordinates                      
    of the second endpoint.  This function defines a predicate that tells us whether                     
    or not two given line segments intersect.                                                            
    '''
    line1_endpoint1_x = line1[0]
    line1_endpoint1_y = line1[1]
    line1_endpoint2_x = line1[2]
    line1_endpoint2_y = line1[3]
    line2_endpoint1_x = line2[0] + 0.5
    line2_endpoint1_y = line2[1] + 0.5
    line2_endpoint2_x = line2[2] + 0.5
    line2_endpoint2_y = line2[3] + 0.5
    if max([line1_endpoint1_x,line1_endpoint2_x]) <= min([line2_endpoint1_x,line2_endpoint2_x]):
        return 0
    elif max([line1_endpoint1_y,line1_endpoint2_y]) <= min([line2_endpoint1_y,line2_endpoint2_y]):
        return 0
    elif max([line2_endpoint1_x,line2_endpoint2_x]) <= min([line1_endpoint1_x,line1_endpoint2_x]):
        return 0
    elif max([line2_endpoint1_y,line2_endpoint2_y]) <= min([line1_endpoint1_y,line1_endpoint2_y]):
        return 0
    # Use homogeneous representation of lines:      
    hom_rep_line1 = _cross_product((line1_endpoint1_x,line1_endpoint1_y,1),(line1_endpoint2_x,line1_endpoint2_y,1))
    hom_rep_line2 = _cross_product((line2_endpoint1_x,line2_endpoint1_y,1),(line2_endpoint2_x,line2_endpoint2_y,1))
    hom_intersection = _cross_product(hom_rep_line1, hom_rep_line2)
    if hom_intersection[2] == 0:
        return 0
    intersection_x = hom_intersection[0] / (hom_intersection[2] * 1.0)
    intersection_y = hom_intersection[1] / (hom_intersection[2] * 1.0)
    if intersection_x >= line1_endpoint1_x and intersection_x <= line1_endpoint2_x and \
                                      intersection_y >= line1_endpoint1_y and intersection_y <= line1_endpoint2_y:
        return 1
    return 0

def _cross_product(vector1, vector2):             ### needed by the above line intersection tester
    '''
    Returns the vector cross product of two triples
    '''
    (a1,b1,c1) = vector1
    (a2,b2,c2) = vector2
    p1 = b1*c2 - b2*c1
    p2 = a2*c1 - a1*c2
    p3 = a1*b2 - a2*b1
    return (p1,p2,p3)

def ctrl_c_handler( signum, frame ):             
    print("Killed by Ctrl C")                       
    os.kill( os.getpid(), signal.SIGKILL )       
signal.signal( signal.SIGINT, ctrl_c_handler )   


#______________________________  RegionProposalGenerator Class Definition  ________________________________

class RegionProposalGenerator(object):

    # Class variables: 
    region_mark_coords = {}
    drawEnable = startX = startY = 0
    canvas = None

    def __init__(self, *args, **kwargs ):
        if args:
            raise ValueError(  
                   '''RegionProposalGenerator constructor can only be called with keyword arguments for 
                      the following keywords: dataroot_train, dataroot_test, image_size, data_image, 
                      binary_or_gray_or_color, kay, image_size_reduction_factor, max_iterations, sigma, 
                      image_normalization_required, momentum, min_size_for_graph_based_blobs, 
                      max_num_blobs_expected, path_saved_RPN_model, path_saved_single_instance_detector_model,
                      path_saved_yolo_model, learning_rate, epochs, batch_size, classes, debug_train, 
                      debug_test, use_gpu, color_homogeneity_thresh, gray_var_thresh, texture_homogeneity_thresh, 
                      yolo_interval, and debug''')
        dataroot_train = dataroot_test = data_image = sigma = image_size_reduction_factor = kay = momentum = None
        learning_rate = epochs = min_size_for_graph_based_blobs = max_num_blobs_expected = path_saved_RPN_model = None
        path_saved_single_instance_detector_model = batch_size = use_gpu = binary_or_gray_or_color =  max_iterations = None
        image_normalization_required = classes = debug_train = color_homogeneity_thresh = gray_var_thresh = None
        image_size = texture_homogeneity_thresh = debug = debug_test = path_saved_yolo_model = yolo_interval = None

        if 'dataroot_train' in kwargs                :   dataroot_train = kwargs.pop('dataroot_train')
        if 'dataroot_test' in kwargs                 :   dataroot_test = kwargs.pop('dataroot_test')
        if 'image_size' in kwargs                    :   image_size = kwargs.pop('image_size')
        if 'path_saved_RPN_model' in kwargs          :   path_saved_RPN_model = kwargs.pop('path_saved_RPN_model')
        if 'path_saved_single_instance_detector_model' in kwargs   :   
                 path_saved_single_instance_detector_model = kwargs.pop('path_saved_single_instance_detector_model')
        if 'path_saved_yolo_model' in kwargs         :   path_saved_yolo_model = kwargs.pop('path_saved_yolo_model')
        if 'yolo_interval' in kwargs                 :   yolo_interval = kwargs.pop('yolo_interval')

        if 'momentum' in kwargs                      :   momentum = kwargs.pop('momentum')
        if 'learning_rate' in kwargs                 :   learning_rate = kwargs.pop('learning_rate')
        if 'epochs' in kwargs                        :   epochs = kwargs.pop('epochs')
        if 'batch_size' in kwargs                    :   batch_size = kwargs.pop('batch_size')
        if 'classes' in kwargs                       :   classes = kwargs.pop('classes')
        if 'debug_train' in kwargs                   :   debug_train = kwargs.pop('debug_train')
        if 'debug_test' in kwargs                    :   debug_test = kwargs.pop('debug_test')
        if 'use_gpu' in kwargs                       :   use_gpu = kwargs.pop('use_gpu')
        if 'data_image' in kwargs                    :   data_image = kwargs.pop('data_image')
        if 'sigma' in kwargs                         :   sigma = kwargs.pop('sigma')
        if 'kay' in kwargs                           :   kay = kwargs.pop('kay')
        if 'image_size_reduction_factor' in kwargs   :   image_size_reduction_factor = kwargs.pop('image_size_reduction_factor')
        if 'binary_or_gray_or_color' in kwargs       :   binary_or_gray_or_color = kwargs.pop('binary_or_gray_or_color')
        if 'image_normalization_required' in kwargs  :   image_normalization_required = kwargs.pop('image_normalization_required')
        if 'max_iterations' in kwargs                :   max_iterations=kwargs.pop('max_iterations')
        if 'color_homogeneity_thresh' in kwargs      :   color_homogeneity_thresh = kwargs.pop('color_homogeneity_thresh')
        if 'gray_var_thresh' in kwargs               :    gray_var_thresh = kwargs.pop('gray_var_thresh')
        if 'texture_homogeneity_thresh' in kwargs    :   texture_homogeneity_thresh = kwargs.pop('texture_homogeneity_thresh')
        if 'min_size_for_graph_based_blobs' in kwargs :  min_size_for_graph_based_blobs = kwargs.pop('min_size_for_graph_based_blobs')
        if 'max_num_blobs_expected' in kwargs        :  max_num_blobs_expected = kwargs.pop('max_num_blobs_expected')
        if 'debug' in kwargs                         :   debug = kwargs.pop('debug') 
#        if len(kwargs) != 0: raise ValueError('''You have provided unrecognizable keyword args''')
        if dataroot_train:
            self.dataroot_train = dataroot_train
        if dataroot_test:
            self.dataroot_test = dataroot_test
        if image_size:   
            self.image_size = image_size      
        if  path_saved_RPN_model:
            self.path_saved_RPN_model = path_saved_RPN_model
        if  path_saved_single_instance_detector_model:
            self.path_saved_single_instance_detector_model = path_saved_single_instance_detector_model
        if  path_saved_yolo_model:
            self.path_saved_yolo_model = path_saved_yolo_model
        if yolo_interval:
            self.yolo_interval = yolo_interval
        if classes:
            self.class_labels = classes
#            self.class_labels_to_index = {label : classes.index(label) for label in classes}
        if learning_rate:
            self.learning_rate = learning_rate
        else:
            self.learning_rate = 1e-6
        if momentum:
            self.momentum = momentum
        if epochs:
            self.epochs = epochs
        if batch_size:
            self.batch_size = batch_size
        if use_gpu is not None:
            self.use_gpu = use_gpu
            if use_gpu is True:
                if torch.cuda.is_available():
                    self.device = torch.device("cuda:0")
                else:
                    raise Exception("You requested GPU support, but there's no GPU on this machine")
            else:
                self.device = torch.device("cpu")
        if debug_train:                             
            self.debug_train = debug_train
        else:
            self.debug_train = 0
        if debug_test:                             
            self.debug_test = debug_test
        else:
            self.debug_test = 0
        if data_image: 
            self.data_im_name = data_image
            self.data_im =  Image.open(data_image)
            self.original_im = Image.open(data_image)
        if binary_or_gray_or_color:
            self.binary_or_gray_or_color = binary_or_gray_or_color
        if sigma is not None: 
            self.sigma = sigma
        else:
            self.sigma = 0
        if kay is not None:   self.kay = kay
        if image_size_reduction_factor is not None:
            self.image_size_reduction_factor = image_size_reduction_factor
        else:
            self.image_size_reduction_factor = 1
        if image_normalization_required is not None:
            self.image_normalization_required = image_normalization_required
        else:
            self.image_normalization_required = False
        if max_iterations is not None:
            self.max_iterations = max_iterations
        else:
            self.max_iterations = 40
        if color_homogeneity_thresh is not None:
            self.color_homogeneity_thresh = color_homogeneity_thresh
        if gray_var_thresh is not None:
            self.gray_var_thresh = gray_var_thresh
        if texture_homogeneity_thresh is not None:
            self.texture_homogeneity_thresh = texture_homogeneity_thresh
        if min_size_for_graph_based_blobs is not None:
            self.min_size_for_graph_based_blobs = min_size_for_graph_based_blobs
        if max_num_blobs_expected is not None:
            self.max_num_blobs_expected = max_num_blobs_expected
        self.image_portion_delineation_coords = []
        if debug:                             
            self.debug = debug
        else:
            self.debug = 0
        self.iterations_used = 0


    ###%%%
    #######################################################################################################
    ########################  Start Definition of Inner Class PurdueDrEvalDataset  ########################
    class PurdueDrEvalDataset(torch.utils.data.Dataset):        
        """
        This is the dataset to use if you are experimenting with single-instance object
        detection.  The dataset contains three kinds of objects in its images:
        Dr. Eval, and two "objects" in his neighborhood: a house and a watertower.
        Each 128x128 image in the dataset contains one of these objects after it is
        randomly scaled and colored and substantial structured noise in addition to
        20% Gaussian noise.  Examples of these images are shown in Week 8 lecture
        material in Purdue's Deep Learning class.

        In order to understand the implementation of the dataloader for the Dr Eval
        dataset for single-instance-based object detection, note that the top-level
        directory for the dataset is organized as follows:

                                          dataroot
                                             |
                                             |
               ______________________________________________________________________
              |           |           |              |               |               | 
              |           |           |              |               |               |
          Dr_Eval       house      watertower    mask_Dr_Eval    mask_house     mask_watertower
              |           |           |              |               |               |
              |           |           |              |               |               |
            images      images      images      binary images    binary images   binary images
        

        As you can see, the three main image directories are Dr_Eval, house, and
        watertower. For each image in each of these directories, the mask for the
        object of interest is supplied in the corresponding directory whose name
        carries the prefix 'mask'.

        For example, if you have an image named 29.jpg in the Dr_Eval directory, you
        will have an image of the same name in the mask_Dr_Eval directory that will
        just be the mask for the Dr_Eval object in the former image

        As you can see, the dataset does not directly provide the bounding boxes for
        object localization.  So the implementation of the __getitem__() function in
        the dataloader must include code that calculates the bounding boxes from the
        masks.  This you can see in the definition of the dataloader shown below.

        Since this is a ``non-standard'' organization of the of data, the dataloader
        must also provide for the indexing of the images so that they can be subject
        to a fresh randomization that is carried out by PyTorch's
        torch.utils.data.DataLoader class for each epoch of training.  The
        index_dataset() function is provided for that purpose.

        After the dataset is downloaded for the first time, the index_dataset()
        function stores away the information as a PyTorch ``.pt'' file so that it can
        be downloaded almost instantaneously at subsequent attempts.

        One final note about the dataset: Under the hood, the dataset consists of the
        pathnames to the image files --- and NOT the images themselves.  It is the
        job of the multi-threaded ``workers'' provided by torch.utils.data.DataLoader
        to actually download the images from those pathnames.
        """
        def __init__(self, rpg, train_or_test, dataroot_train=None, dataroot_test=None, transform=None):
            super(RegionProposalGenerator.PurdueDrEvalDataset, self).__init__()
            self.rpg = rpg
            self.train_or_test = train_or_test
            self.dataroot_train = dataroot_train
            self.dataroot_test  = dataroot_test
            self.database_train = {}
            self.database_test = {}
            self.dataset_size_train = None
            self.dataset_size_test = None
            if train_or_test == 'train':
                self.training_dataset = self.index_dataset()
            if train_or_test == 'test':
                self.testing_dataset = self.index_dataset()
            self.class_labels = None

        def index_dataset(self):
            if self.train_or_test == 'train':
                dataroot = self.dataroot_train
            elif self.train_or_test == 'test': 
                dataroot = self.dataroot_test
            entry_index = 0
            if self.train_or_test == 'train' and dataroot == self.dataroot_train:
                if '10000' in self.dataroot_train and os.path.exists("torch_saved_Purdue_Dr_Eval_dataset_train_10000.pt"):
                    print("\nLoading training data from torch saved file")
                    self.database_train = torch.load("torch_saved_Purdue_Dr_Eval_dataset_train_10000.pt")
                    self.dataset_size_train =  len(self.database_train)
                else: 
                    print("""\n\n\nLooks like this is the first time you will be loading in\n"""
                          """the dataset for this script. First time loading could take\n"""
                          """up to 3 minutes.  Any subsequent attempts will only take\n"""
                          """a few seconds.\n\n\n""")
                    if os.path.exists(dataroot):      
                        files = glob.glob(dataroot + "/*")  
                        files = [os.path.split(file)[1] for file in files]
                        class_names = sorted([file for file in files if not file.startswith("mask")])
                        if self.train_or_test == 'train':
                            self.class_labels = class_names
                        image_label_dict = {class_names[i] : i for i in range(len(class_names))}
                        for image_class in class_names:
                            image_names = glob.glob(dataroot + image_class + "/*")
                            for image_name in image_names:
                                image_real_name = os.path.split(image_name)[-1]
                                mask_name = dataroot + "mask_" + image_class + "/" + image_real_name
                                if self.train_or_test == 'train':
                                    self.database_train[entry_index] = [image_label_dict[image_class], image_name, mask_name]
                                elif self.train_or_test == 'test':
                                    self.database_test[entry_index] = [image_label_dict[image_class], image_name, mask_name]
                                entry_index += 1
                        if self.train_or_test == 'train':           
                            all_training_images = list(self.database_train.values())
                            random.shuffle(all_training_images)
                            self.database_train = {i : all_training_images[i] for i in range(len(all_training_images))}
                            torch.save(self.database_train, "torch_saved_Purdue_Dr_Eval_dataset_train_10000.pt")
                            self.dataset_size_train = entry_index
                        else:
                            all_testing_images = list(self.database_test.values())
                            random.shuffle(all_testing_images)
                            self.database_test = {i : all_testing_images[i] for i in range(len(all_testing_images))}
                            self.dataset_size_test = entry_index
            else:
                if os.path.exists(dataroot):      
                    files = glob.glob(dataroot + "/*")  
                    files = [os.path.split(file)[1] for file in files]
                    class_names = sorted([file for file in files if not file.startswith("mask")])
                    image_label_dict = {class_names[i] : i for i in range(len(class_names))}
                    for image_class in class_names:
                        image_names = glob.glob(dataroot + image_class + "/*")
                        for image_name in image_names:
                            image_real_name = os.path.split(image_name)[-1]
                            mask_name = dataroot + "mask_" + image_class + "/" + image_real_name
                            if self.train_or_test == 'train':
                                self.database_train[entry_index] = [image_label_dict[image_class], image_name, mask_name]
                            elif self.train_or_test == 'test':
                                self.database_test[entry_index] = [image_label_dict[image_class], image_name, mask_name]
                            entry_index += 1
                    if self.train_or_test == 'train':
                        self.dataset_size_train = entry_index
                    if self.train_or_test == 'test':
                        self.dataset_size_test = entry_index
                    if self.train_or_test == 'train':           
                        all_training_images = self.database_train.values()
                        random.shuffle(all_training_images)
                        self.database_train = {i : all_training_images[i] for i in range(len(all_training_images))}
                        torch.save(self.database_train, "torch_saved_Purdue_Dr_Eval_dataset_train_10000.pt")
                        self.dataset_size_train = entry_index
                    else:
                        all_testing_images = list(self.database_test.values())
                        random.shuffle(all_testing_images)
                        self.database_test = {i : all_testing_images[i] for i in range(len(all_testing_images))}

        def __len__(self):
            if self.train_or_test == 'train':
                return self.dataset_size_train
            elif self.train_or_test == 'test':
                return self.dataset_size_test

        def __getitem__(self, idx):
            if self.train_or_test == 'train':       
                image_label, image_name, mask_name = self.database_train[idx]
            elif self.train_or_test == 'test':
                image_label, image_name, mask_name = self.database_test[idx]
            im = Image.open(image_name)
            mask = Image.open(mask_name)
            mask_data = mask.getdata()
            non_zero_pixels = []                                                                                   
            for k,pixel_val in enumerate(mask_data): 
                x = k % self.rpg.image_size[1]
                y = k // self.rpg.image_size[0]
                if pixel_val != 0:     
                    non_zero_pixels.append((x,y)) 
            ## x-coord increases to the left and y-coord increases going downward;  origin at upper-left
            x_min = min([pixel[0] for pixel in non_zero_pixels])
            x_max = max([pixel[0] for pixel in non_zero_pixels])
            y_min = min([pixel[1] for pixel in non_zero_pixels])
            y_max = max([pixel[1] for pixel in non_zero_pixels])
            bbox = [x_min,y_min,x_max,y_max]
            im_tensor = tvt.ToTensor()(im)
            mask_tensor = tvt.ToTensor()(mask)
            bbox_tensor = torch.tensor(bbox, dtype=torch.float)
            return im_tensor,mask_tensor,bbox_tensor,image_label


    ###%%%
    #######################################################################################################
    ######################  Start Definition of Inner Class PurdueDrEvalMultiDataset  #####################
    class PurdueDrEvalMultiDataset(torch.utils.data.Dataset):        
        """
        This is the dataset to use if you are experimenting with multi-instance object
        detection.  As with the previous dataset, it contains three kinds of objects
        in its images: Dr. Eval, and two "objects" in his neighborhood: a house and a
        watertower.  Each 128x128 image in the dataset contains up to 5 instances of
        these objects. The instances are randomly scaled and colored and exact number
        of instances in each image is also chosen randomly. Subsequently, background
        clutter is added to the images --- these are again randomly chosen
        shapes. The number of clutter objects is also chosen randomly but cannot
        exceed 10.  In addition to the structured clutter, I add 20% Gaussian noise
        to each image.  Examples of these images are shown in Week 8 lecture material
        in Purdue's Deep Learning class.

        On account of the much richer structure of the image annotations, this
        dataset is organized very differently from the previous one:


                                          dataroot
                                             |
                                             |
                                 ___________________________
                                |                           |
                                |                           |
                           annotations.p                  images


        Since each image is allowed to contain instances of the three different types
        of "meaningful" objects, it is not possible to organize the images on the
        basis of what they contain.

        As for the annotations, the annotation for each 128x128 image is a dictionary
        that contains information related to all the object instances in the image. Here
        is an example of the annotation for an image that has three instances in it:

            annotation:  {'filename': None, 
                          'num_objects': 3, 
                          'bboxes': {0: (67, 72, 83, 118), 
                                     1: (65, 2, 93, 26), 
                                     2: (16, 68, 53, 122), 
                                     3: None, 
                                     4: None}, 
                          'bbox_labels': {0: 'Dr_Eval', 
                                          1: 'house', 
                                          2: 'watertower', 
                                          3: None, 
                                          4: None}, 
                          'seg_masks': {0: <PIL.Image.Image image mode=1 size=128x128 at 0x7F5A06C838E0>, 
                                        1: <PIL.Image.Image image mode=1 size=128x128 at 0x7F5A06C837F0>, 
                                        2: <PIL.Image.Image image mode=1 size=128x128 at 0x7F5A06C838B0>, 
                                        3: None, 
                                        4: None}
                         }

        The annotations for the individual images are stored in a global Python
        dictionary called 'all_annotations' whose keys consist of the pathnames to
        the individual image files and the values the annotations dict for the
        corresponding images.  The filename shown above in the keystroke diagram,
        'annotations.p' is what you get by calling 'pickle.dump()' on the
        'all_annotations' dictionary.
        """

        def __init__(self, rpg, train_or_test, dataroot_train=None, dataroot_test=None, transform=None):
            super(RegionProposalGenerator.PurdueDrEvalMultiDataset, self).__init__()
            self.rpg = rpg
            self.train_or_test = train_or_test
            self.dataroot_train = dataroot_train
            self.dataroot_test  = dataroot_test
            self.database_train = {}
            self.database_test = {}
            self.dataset_size_train = None
            self.dataset_size_test = None
            if train_or_test == 'train':
                self.training_dataset = self.index_dataset()
            if train_or_test == 'test':
                self.testing_dataset = self.index_dataset()
            self.class_labels = None

        def index_dataset(self):
            if self.train_or_test == 'train':
                dataroot = self.dataroot_train
            elif self.train_or_test == 'test': 
                dataroot = self.dataroot_test
            if self.train_or_test == 'train' and dataroot == self.dataroot_train:
                if '10000' in self.dataroot_train and os.path.exists("torch_saved_Purdue_Dr_Eval_multi_dataset_train_10000.pt"):
                    print("\nLoading training data from torch saved file")
                    self.database_train = torch.load("torch_saved_Purdue_Dr_Eval_multi_dataset_train_10000.pt")
                    self.dataset_size_train =  len(self.database_train)
                else: 
                    print("""\n\n\nLooks like this is the first time you will be loading in\n"""
                          """the dataset for this script. First time loading could take\n"""
                          """up to 3 minutes.  Any subsequent attempts will only take\n"""
                          """a few seconds.\n\n\n""")
                    if os.path.exists(dataroot):      
                        all_annotations = pickle.load( open( dataroot + '/annotations.p', 'rb') )
                        all_image_paths = sorted(glob.glob(dataroot + "images/*"))
                        all_image_names = [os.path.split(filename)[1] for filename in all_image_paths]
                        for idx,image_name in enumerate(all_image_names):        
                            annotation = all_annotations[image_name]
                            image_path = dataroot + "images/" + image_name
                            self.database_train[idx] = [image_path, annotation]
                        all_training_images = list(self.database_train.values())
                        random.shuffle(all_training_images)
                        self.database_train = {i : all_training_images[i] for i in range(len(all_training_images))}
                        torch.save(self.database_train, "torch_saved_Purdue_Dr_Eval_multi_dataset_train_10000.pt")
                        self.dataset_size_train = len(all_training_images)
            elif self.train_or_test == 'test' and dataroot == self.dataroot_test:
                if os.path.exists(dataroot):      
                    all_annotations = pickle.load( open( dataroot + '/annotations.p', 'rb') )
                    all_image_paths = sorted(glob.glob(dataroot + "images/*"))
                    all_image_names = [os.path.split(filename)[1] for filename in all_image_paths]                    
                    for idx,image_name in enumerate(all_image_names):        
                        annotation = all_annotations[image_name]
                        image_path = dataroot + "images/" + image_name
                        self.database_test[idx] =  [image_path, annotation]
                    all_testing_images = list(self.database_test.values())
                    random.shuffle(all_testing_images)
                    self.database_test = {i : all_testing_images[i] for i in range(len(all_testing_images))}
                    self.dataset_size_test = len(all_testing_images)

        def __len__(self):
            if self.train_or_test == 'train':
                return self.dataset_size_train
            elif self.train_or_test == 'test':
                return self.dataset_size_test

        def __getitem__(self, idx):
            if self.train_or_test == 'train':       
                image_path, annotation = self.database_train[idx]
            elif self.train_or_test == 'test':
                image_path, annotation = self.database_test[idx]
            im = Image.open(image_path)
            im_tensor = tvt.ToTensor()(im)
            seg_mask_tensor = torch.zeros(5,128,128)
            bbox_tensor     = torch.zeros(5,4, dtype=torch.uint8)
            bbox_label_tensor    = torch.zeros(5, dtype=torch.uint8) + 13
            num_objects_in_image = annotation['num_objects']
            obj_class_labels = sorted(self.rpg.class_labels)
            self.obj_class_label_dict = {obj_class_labels[i] : i for i in range(len(obj_class_labels))}
            for i in range(num_objects_in_image):
                seg_mask = annotation['seg_masks'][i]
                bbox     = annotation['bboxes'][i]
                label    = annotation['bbox_labels'][i]
                bbox_label_tensor[i] = self.obj_class_label_dict[label]
                seg_mask_arr = np.array(seg_mask)
                seg_mask_tensor[i] = torch.from_numpy(seg_mask_arr)
                bbox_tensor[i] = torch.LongTensor(bbox)      
            return im_tensor, seg_mask_tensor, bbox_tensor, bbox_label_tensor, num_objects_in_image


    ###%%%
    #######################################################################################################
    ########################  Start Definition of Inner Class SingleInstanceDetector  #####################

    class SingleInstanceDetector(nn.Module):             
        """
        This class demonstrates single-instance object detection on the images in the
        PurdueDrEvalDataset dataset.  Although these image are complex, in the sense
        that each image contains multiple clutter objects in addition to random
        noise, nonetheless we know that each image contains only a single meaningful
        object instance.  The LOADnet network used for detection is adaptation of the
        the LOADnet2 network from DLStudio to the case of 128x128 sized input images.
        The LOADnet network uses the SkipBlock as a building-block element for
        dealing the problems caused by vanishing gradients.
        """
        def __init__(self, rpg):
            super(RegionProposalGenerator.SingleInstanceDetector, self).__init__()
            self.rpg = rpg
            self.dataserver_train = None
            self.dataserver_test =  None
            self.train_dataloader = None
            self.test_dataloader = None

        def show_sample_images_from_dataset(self, rpg):
            data = next(iter(self.train_dataloader))    
            real_batch = data[0]
            first_im = real_batch[0]
            self.rpg.display_tensor_as_image(torchvision.utils.make_grid(real_batch, padding=2, pad_value=1, normalize=True))

        def set_dataloaders(self, train=False, test=False):
            if train:
                self.dataserver_train = RegionProposalGenerator.PurdueDrEvalDataset(self.rpg, 
                                                        "train", dataroot_train=self.rpg.dataroot_train)
                self.train_dataloader = torch.utils.data.DataLoader(self.dataserver_train, 
                                                       self.rpg.batch_size, shuffle=True, num_workers=4)
            if test:
                self.dataserver_test = RegionProposalGenerator.PurdueDrEvalDataset(self.rpg, 
                                                           "test", dataroot_test=self.rpg.dataroot_test)
                self.test_dataloader = torch.utils.data.DataLoader(self.dataserver_test, 
                                                      self.rpg.batch_size, shuffle=False, num_workers=4)


        def check_dataloader(self, how_many_batches_to_show, train=False, test=False):
            if train:      
                dataloader = self.train_dataloader
            if test:
                dataloader = self.test_dataloader
            for i, data in enumerate(dataloader): 
                if i >= how_many_batches_to_show:  
                    break
                im_tensor,mask_tensor,bbox_tensor, image_label = data
                logger = logging.getLogger()
                old_level = logger.level
                logger.setLevel(100)
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(im_tensor, normalize=True,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(mask_tensor, normalize=True,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                logger.setLevel(old_level)
                print("\n\nbbox tensor for batch:")
                print(bbox_tensor)
                print("\n\nimage labels for batch: ", image_label)


        class SkipBlock(nn.Module):
            """
            This is a building-block class that I have used in several networks
            """
            def __init__(self, in_ch, out_ch, downsample=False, skip_connections=True):
                super(RegionProposalGenerator.SingleInstanceDetector.SkipBlock, self).__init__()
                self.downsample = downsample
                self.skip_connections = skip_connections
                self.in_ch = in_ch
                self.out_ch = out_ch
                self.convo1 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                self.convo2 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                norm_layer1 = nn.BatchNorm2d
                norm_layer2 = nn.BatchNorm2d
                self.bn1 = norm_layer1(out_ch)
                self.bn2 = norm_layer2(out_ch)
                if downsample:
                    self.downsampler = nn.Conv2d(in_ch, out_ch, 1, stride=2)
            def forward(self, x):
                identity = x                                     
                out = self.convo1(x)                              
                out = self.bn1(out)                              
                out = torch.nn.functional.relu(out)
                if self.in_ch == self.out_ch:
                    out = self.convo2(out)                              
                    out = self.bn2(out)                              
                    out = torch.nn.functional.relu(out)
                if self.downsample:
                    out = self.downsampler(out)
                    identity = self.downsampler(identity)
                if self.skip_connections:
                    if self.in_ch == self.out_ch:
                        out += identity                              
                    else:
                        out[:,:self.in_ch,:,:] += identity
                        out[:,self.in_ch:,:,:] += identity
                return out

        class LOADnet(nn.Module):
            """
            The acronym 'LOAD' stands for 'LOcalization And Detection'.
            """ 
            def __init__(self, skip_connections=True, depth=8):
                super(RegionProposalGenerator.SingleInstanceDetector.LOADnet, self).__init__()
                if depth not in [8,10,12,14,16]:
                    sys.exit("LOADnet has only been tested for 'depth' values 8, 10, 12, 14, and 16")
                self.depth = depth // 2
                self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
                self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
                self.pool = nn.MaxPool2d(2, 2)
                self.bn1  = nn.BatchNorm2d(64)
                self.bn2  = nn.BatchNorm2d(128)
                self.bn3  = nn.BatchNorm2d(256)
                self.skip64_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip64_arr.append(RegionProposalGenerator.SingleInstanceDetector.SkipBlock(64, 
                                                                                 64,skip_connections=skip_connections))
                self.skip64ds = RegionProposalGenerator.SingleInstanceDetector.SkipBlock(64,64,downsample=True, 
                                                                                     skip_connections=skip_connections)
                self.skip64to128 = RegionProposalGenerator.SingleInstanceDetector.SkipBlock(64, 128, 
                                                                                    skip_connections=skip_connections )
                self.skip128_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip128_arr.append(RegionProposalGenerator.SingleInstanceDetector.SkipBlock(128,128,
                                                                                    skip_connections=skip_connections))
                self.skip128ds = RegionProposalGenerator.SingleInstanceDetector.SkipBlock(128,128,
                                                                    downsample=True, skip_connections=skip_connections)
                self.skip128to256 = RegionProposalGenerator.SingleInstanceDetector.SkipBlock(128, 256, 
                                                                                    skip_connections=skip_connections )
                self.skip256_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip256_arr.append(RegionProposalGenerator.SingleInstanceDetector.SkipBlock(256,256,
                                                                                    skip_connections=skip_connections))
                self.skip256ds = RegionProposalGenerator.SingleInstanceDetector.SkipBlock(256,256,
                                                                    downsample=True, skip_connections=skip_connections)
                self.fc1 =  nn.Linear(8192, 1000)
                self.fc2 =  nn.Linear(1000, 3)
                self.conv_seqn = nn.Sequential(
                    nn.Conv2d(in_channels=64, out_channels=256, kernel_size=3, padding=1),
                    nn.BatchNorm2d(256),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2),
                    nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
                    nn.ReLU(inplace=True),
                    nn.MaxPool2d(2,2)
                )
                self.fc_seqn = nn.Sequential(
                    nn.Linear(65536, 1024),
                    nn.ReLU(inplace=True),
                    nn.Linear(1024, 512),
                    nn.ReLU(inplace=True),
                    nn.Linear(512, 4)
                )

            def forward(self, x):
                x = self.pool(torch.nn.functional.relu(self.conv1(x)))          
                xR = x.clone()
                ## The labeling section:
                x1 = nn.MaxPool2d(2,2)(torch.nn.functional.relu(self.conv2(x)))       
                for i,skip64 in enumerate(self.skip64_arr[:self.depth//4]):
                    x1 = skip64(x1)                
                x1 = self.skip64ds(x1)
                for i,skip64 in enumerate(self.skip64_arr[self.depth//4:]):
                    x1 = skip64(x1)                
                x1 = self.bn1(x1)
                x1 = self.skip64to128(x1)
                for i,skip128 in enumerate(self.skip128_arr[:self.depth//4]):
                    x1 = skip128(x1)                
                x1 = self.bn2(x1)
                x1 = self.skip128ds(x1)
                x1 = x1.view(-1, 8192 )
                x1 = torch.nn.functional.relu(self.fc1(x1))
                x1 = self.fc2(x1)                                  
                ## for bounding box regression:
                x2 = self.conv_seqn(xR)
                x2 = x2.view(x.size(0), -1)
                x2 = self.fc_seqn(x2)          
                return x1,x2
    
        def run_code_for_training_single_instance_detector(self, net, display_images=False):        
            filename_for_out1 = "performance_numbers_" + str(self.rpg.epochs) + "label.txt"
            filename_for_out2 = "performance_numbers_" + str(self.rpg.epochs) + "regres.txt"
            FILE1 = open(filename_for_out1, 'w')
            FILE2 = open(filename_for_out2, 'w')
            net = copy.deepcopy(net)
            net = net.to(self.rpg.device)
            criterion1 = nn.CrossEntropyLoss()
            criterion2 = nn.MSELoss()
            optimizer = optim.SGD(net.parameters(), lr=self.rpg.learning_rate, momentum=self.rpg.momentum)
            print("\n\nStarting training loop...\n\n")
            start_time = time.perf_counter()
            labeling_loss_tally = []
            regression_loss_tally = []
            elapsed_time = 0.0
            for epoch in range(self.rpg.epochs):  
                print("")
                running_loss_labeling = 0.0
                running_loss_regression = 0.0       
                for i, data in enumerate(self.train_dataloader):
                    gt_too_small = False
                    im_tensor,mask_tensor,bbox_tensor, image_label = data
                    if i % 500 == 499:
                        current_time = time.perf_counter()
                        elapsed_time = current_time - start_time 
                        print("\n\n[epoch:%d/%d  iter=%4d  elapsed_time=%5d secs]   Ground Truth:     " % 
                                (epoch+1, self.rpg.epochs, i+1, elapsed_time) + 
                                ' '.join('%15s' % self.rpg.class_labels[image_label[j].item()] 
                                for j in range(self.rpg.batch_size)))
                    im_tensor = im_tensor.to(self.rpg.device)
                    image_label = image_label.to(self.rpg.device)
                    bbox_tensor = bbox_tensor.to(self.rpg.device)
                    optimizer.zero_grad()
                    outputs = net(im_tensor)
                    outputs_label = outputs[0]
                    bbox_pred = outputs[1]
                    bbox_gt = bbox_tensor
                    if i % 500 == 499:
                        inputs_copy = im_tensor.detach().clone()
                        inputs_copy = inputs_copy.cpu()
                        bbox_pc = bbox_pred.detach().clone()
                        bbox_pc[bbox_pc<0] = 0
                        bbox_pc[bbox_pc>127] = 127
                        bbox_pc[torch.isnan(bbox_pc)] = 0
                        _, predicted = torch.max(outputs_label.data, 1)
                        print("[epoch:%d/%d  iter=%4d  elapsed_time=%5d secs]   Predicted Labels: " % 
                         (epoch+1, self.rpg.epochs, i+1, elapsed_time) + 
                         ' '.join('%15s' % self.rpg.class_labels[predicted[j].item()] for j in range(self.rpg.batch_size)))
                        for idx in range(self.rpg.batch_size):
                            i1 = int(bbox_gt[idx][1])
                            i2 = int(bbox_gt[idx][3])
                            j1 = int(bbox_gt[idx][0])
                            j2 = int(bbox_gt[idx][2])
                            k1 = int(bbox_pc[idx][1])
                            k2 = int(bbox_pc[idx][3])
                            l1 = int(bbox_pc[idx][0])
                            l2 = int(bbox_pc[idx][2])
                            print("                    gt_bb:  [%d,%d,%d,%d]"%(j1,i1,j2,i2))
                            print("                  pred_bb:  [%d,%d,%d,%d]"%(l1,k1,l2,k2))
                            inputs_copy[idx,0,i1:i2,j1] = 255
                            inputs_copy[idx,0,i1:i2,j2] = 255
                            inputs_copy[idx,0,i1,j1:j2] = 255
                            inputs_copy[idx,0,i2,j1:j2] = 255
                            inputs_copy[idx,2,k1:k2,l1] = 255                      
                            inputs_copy[idx,2,k1:k2,l2] = 255
                            inputs_copy[idx,2,k1,l1:l2] = 255
                            inputs_copy[idx,2,k2,l1:l2] = 255
                    loss_labeling = criterion1(outputs_label, image_label)
                    loss_labeling.backward(retain_graph=True)        
                    loss_regression = criterion2(bbox_pred, bbox_tensor)
                    loss_regression.backward()
                    optimizer.step()
                    running_loss_labeling += loss_labeling.item()    
                    running_loss_regression += loss_regression.item()                
                    if i % 500 == 499:    
                        avg_loss_labeling = running_loss_labeling / float(500)
                        avg_loss_regression = running_loss_regression / float(500)
                        print("\n[epoch:%d/%d  iter=%4d  elapsed_time=%5d secs]   loss_labeling: %.3f      loss_regression: %.3f  " % 
                                    (epoch + 1, self.rpg.epochs, i + 1, elapsed_time, avg_loss_labeling, avg_loss_regression))
                        FILE1.write("%.3f\n" % avg_loss_labeling)
                        FILE1.flush()
                        FILE2.write("%.3f\n" % avg_loss_regression)
                        FILE2.flush()
                        labeling_loss_tally.append(avg_loss_labeling)
                        regression_loss_tally.append(avg_loss_regression)
                        running_loss_labeling = 0.0
                        running_loss_regression = 0.0
                        if display_images:
                            logger = logging.getLogger()
                            old_level = logger.level
                            logger.setLevel(100)
                            plt.figure(figsize=[15,4])
                            plt.imshow(np.transpose(torchvision.utils.make_grid(inputs_copy, normalize=False, 
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                            plt.show()
                            logger.setLevel(old_level)
            print("\nFinished Training\n")
            self.save_single_instance_detector_model(net)
            plt.figure(figsize=(10,5))
            plt.title("Labeling Loss vs. Iterations")
            plt.plot(labeling_loss_tally)
            plt.xlabel("iterations")
            plt.ylabel("labeling loss")
            plt.legend()
            plt.savefig("labeling_loss.png")
            plt.show()
            plt.title("regression Loss vs. Iterations")
            plt.plot(regression_loss_tally)
            plt.xlabel("iterations")
            plt.ylabel("regression loss")
            plt.legend()
            plt.savefig("regression_loss.png")
            plt.show()
    
        def save_single_instance_detector_model(self, model):
            '''
            Save the trained single instance detector model to a disk file
            '''
            torch.save(model.state_dict(), self.rpg.path_saved_single_instance_detector_model)

        def run_code_for_testing_single_instance_detector(self, net, display_images=False):
            net.load_state_dict(torch.load(self.rpg.path_saved_single_instance_detector_model))
            correct = 0
            total = 0
            confusion_matrix = torch.zeros(len(self.rpg.class_labels), len(self.rpg.class_labels))
            class_correct = [0] * len(self.rpg.class_labels)
            class_total = [0] * len(self.rpg.class_labels)
            with torch.no_grad():
                for i, data in enumerate(self.test_dataloader):
                    im_tensor,mask_tensor,bbox_tensor, image_label = data
                    image_label = image_label.tolist()
                    if i % 50 == 49:
                        print("\n\n[i=%4d]       Ground Truth: " % (i+1) + 
                         ' '.join('%15s' % self.rpg.class_labels[image_label[j]] for j in range(self.rpg.batch_size)))
                    outputs = classifier_net(im_tensor)
                    outputs_label = outputs[0]
                    bbox_pred = outputs[1]
                    bbox_gt = bbox_tensor
                    _, predicted = torch.max(outputs_label.data, 1)
                    if i % 50 == 49:
                        inputs_copy = im_tensor.detach().clone()
                        inputs_copy = inputs_copy.cpu()
                        bbox_pc = bbox_pred.detach().clone()
                        bbox_pc[bbox_pc<0] = 0
                        bbox_pc[bbox_pc>127] = 127
                        bbox_pc[torch.isnan(bbox_pc)] = 0
                        print("[i=%4d]   Predicted Labels: " % (i+1) + 
                             ' '.join('%15s' % self.rpg.class_labels[predicted[j].item()] for j in range(self.rpg.batch_size)))
                        for idx in range(self.rpg.batch_size):
                            i1 = int(bbox_gt[idx][1])
                            i2 = int(bbox_gt[idx][3])
                            j1 = int(bbox_gt[idx][0])
                            j2 = int(bbox_gt[idx][2])
                            k1 = int(bbox_pc[idx][1])
                            k2 = int(bbox_pc[idx][3])
                            l1 = int(bbox_pc[idx][0])
                            l2 = int(bbox_pc[idx][2])
                            print("                    gt_bb:  [%d,%d,%d,%d]"%(j1,i1,j2,i2))
                            print("                  pred_bb:  [%d,%d,%d,%d]"%(l1,k1,l2,k2))
                            inputs_copy[idx,0,i1:i2,j1] = 255
                            inputs_copy[idx,0,i1:i2,j2] = 255
                            inputs_copy[idx,0,i1,j1:j2] = 255
                            inputs_copy[idx,0,i2,j1:j2] = 255
                            inputs_copy[idx,2,k1:k2,l1] = 255                      
                            inputs_copy[idx,2,k1:k2,l2] = 255
                            inputs_copy[idx,2,k1,l1:l2] = 255
                            inputs_copy[idx,2,k2,l1:l2] = 255
                        if display_images:
                            logger = logging.getLogger()
                            old_level = logger.level
                            logger.setLevel(100)
                            plt.figure(figsize=[15,4])
                            plt.imshow(np.transpose(torchvision.utils.make_grid(inputs_copy, normalize=False, 
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                            plt.show()
                            logger.setLevel(old_level)
                    for label,prediction in zip(image_label, predicted):
                        confusion_matrix[label][prediction] += 1
                    total += len(image_label)
                    correct +=  [predicted[ele] == image_label[ele] for ele in range(len(predicted))].count(True)
                    comp = [predicted[ele] == image_label[ele] for ele in range(len(predicted))]

                    for j in range(len(image_label)):
                        label = image_label[j]
                        class_correct[label] += comp[j]
                        class_total[label] += 1
            print("\n")
            for j in range(len(self.rpg.class_labels)):
                print('Prediction accuracy for %5s : %2d %%' % (
              self.rpg.class_labels[j], 100 * class_correct[j] / float(class_total[j])))
            print("\n\n\nOverall accuracy of the network on the 1000 test images: %d %%" % (100 * correct / float(total)))
            print("\n\nDisplaying the confusion matrix:\n")
            out_str = "                "
            for j in range(len(self.rpg.class_labels)):  
                out_str +=  "%15s" % self.rpg.class_labels[j]   
            print(out_str + "\n")
            for i,label in enumerate(self.rpg.class_labels):
                out_percents = [100 * confusion_matrix[i,j] / float(class_total[i]) 
                                 for j in range(len(self.rpg.class_labels))]
                out_percents = ["%.2f" % item.item() for item in out_percents]
                out_str = "%12s:  " % self.rpg.class_labels[i]
                for j in range(len(self.rpg.class_labels)): 
                    out_str +=  "%15s" % out_percents[j]
                print(out_str)
    

    ###%%%
    #######################################################################################################
    ##########################        A class for multi instance detection        #########################
    ##########################  Start Definition of Inner Class YoloLikeDetector  #########################

    class YoloLikeDetector(nn.Module):             
        """
        The primary purpose of this class is to demonstrate multi-instance object detection with YOLO-like 
        logic.  A key parameter of the logic for YOLO-like detection is the variable 'yolo_interval'.  
        The image gridding that is required is based on the value assigned to this variable.  The grid is 
        represented by an SxS array of cells where S is the image width divided by yolo_interval. So for
        images of size 128x128 and 'yolo_interval=20', you will get a 6x6 grid of cells over the image. Since
        my goal is merely to explain the principles of the YOLO logic, I have not bothered with the bottom
        8 rows and the right-most 8 columns of the image that get left out of the area covered by such a grid.

        An important element of the YOLO logic is defining a set of Anchor Boxes for each cell in the SxS 
        grid.  The anchor boxes are characterized by their aspect ratios.  By aspect ratio I mean the
        'height/width' characterization of the boxes.  My implementation provides for 5 anchor boxes for 
        each cell with the following aspect ratios: 1/5, 1/3, 1/1, 3/1, 5/1.  

        At training time, each instance in the image is assigned to that cell whose central pixel is 
        closest to the center of the bounding box for the instance. After the cell assignment, the 
        instance is assigned to that anchor box whose aspect ratio comes closest to matching the aspect 
        ratio of the instance.

        The assigning of an object instance to a <cell, anchor_box> pair is encoded in the form of a 
        '5+C' element long YOLO vector where C is the number of classes for the object instances.  
        In our cases, C is 3 for the three classes 'Dr_Eval', 'house' and 'watertower', therefore we 
        end up with an 8-element vector encoding when we assign an instance to a <cell, anchor_box> 
        pair.  The last C elements of the encoding vector can be thought as a one-hot representation 
        of the class label for the instance.

        The first five elements of the vector encoding for each anchor box in a cell are set as follows: 
        The first element is set to 1 if an object instance was actually assigned to that anchor box. 
        The next two elements are the (x,y) displacements of the center of the actual bounding box 
        for the object instance vis-a-vis the center of the cell. These two displacements are expressed 
        as a fraction of the width and the height of the cell.  The next two elements of the YOLO vector
        are the actual height and the actual width of the true bounding box for the instance in question 
        as a multiple of the cell dimension.

        The 8-element YOLO vectors are packed into a YOLO tensor of shape (num_cells, num_anch_boxes, 8)
        where num_cell is 36 for a 6x6 gridding of an image, num_anch_boxes is 5.

        Classpath:  RegionProposalGenerator  ->  YoloLikeDetector
        """
        def __init__(self, rpg):
            super(RegionProposalGenerator.YoloLikeDetector, self).__init__()
            self.rpg = rpg
            self.train_dataloader = None
            self.test_dataloader = None

        def show_sample_images_from_dataset(self, rpg):
            data = next(iter(self.train_dataloader))    
            real_batch = data[0]
            first_im = real_batch[0]
            self.rpg.display_tensor_as_image(torchvision.utils.make_grid(real_batch, padding=2, pad_value=1, normalize=True))

        def set_dataloaders(self, train=False, test=False):
            if train:
                dataserver_train = RegionProposalGenerator.PurdueDrEvalMultiDataset(self.rpg, 
                                                       "train", dataroot_train=self.rpg.dataroot_train)
                self.train_dataloader = torch.utils.data.DataLoader(dataserver_train, 
                                                      self.rpg.batch_size, shuffle=True, num_workers=4)
            if test:
                dataserver_test = RegionProposalGenerator.PurdueDrEvalMultiDataset(self.rpg, 
                                                          "test", dataroot_test=self.rpg.dataroot_test)
                self.test_dataloader = torch.utils.data.DataLoader(dataserver_test, 
                                                     self.rpg.batch_size, shuffle=False, num_workers=4)

        def check_dataloader(self, how_many_batches_to_show, train=False, test=False):
            if train:      
                dataloader = self.train_dataloader
            if test:
                dataloader = self.test_dataloader
            for idx, data in enumerate(dataloader): 
                if idx >= how_many_batches_to_show:  
                    break
                im_tensor, seg_mask_tensor, bbox_tensor, bbox_label_tensor, num_objects_in_image = data
                print("\n\nNumber of objects in the batch images: ", num_objects_in_image)
                print("\n\nlabels for the objects found:")
                print(bbox_label_tensor)

                mask_shape = seg_mask_tensor.shape
                logger = logging.getLogger()
                old_level = logger.level
                logger.setLevel(100)
                #  Let's now display the batch images:
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(im_tensor, normalize=True,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                #  Let's now display the batch with JUST the masks:
                composite_mask_tensor = torch.zeros(im_tensor.shape[0], 1,128,128)
                for bdx in range(im_tensor.shape[0]):
                    for i in range(num_objects_in_image[bdx]):
                         composite_mask_tensor[bdx] += seg_mask_tensor[bdx][i]
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(composite_mask_tensor, normalize=True,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                #  Let's now display the batch and masks in a side-by-side display:
                display_image_and_mask_tensor = torch.zeros(2*im_tensor.shape[0], 3,128,128)
                display_image_and_mask_tensor[:im_tensor.shape[0],:,:,:]  = im_tensor
                display_image_and_mask_tensor[im_tensor.shape[0]:,:,:,:]  = composite_mask_tensor
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(display_image_and_mask_tensor, normalize=False,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                #  Let's now display the batch with GT bboxes for the objects:
                im_with_bbox_tensor = torch.clone(im_tensor)
                for bdx in range(im_tensor.shape[0]):
                    bboxes_for_image = bbox_tensor[bdx]
                    for i in range(num_objects_in_image[bdx]):
                        ii = bbox_tensor[bdx][i][0].item()
                        ji = bbox_tensor[bdx][i][1].item()
                        ki = bbox_tensor[bdx][i][2].item()
                        li = bbox_tensor[bdx][i][3].item()
                        im_with_bbox_tensor[bdx,:,ji,ii:ki] = 255    
                        im_with_bbox_tensor[bdx,:,li,ii:ki] = 255                
                        im_with_bbox_tensor[bdx,:,ji:li,ii] = 255  
                        im_with_bbox_tensor[bdx,:,ji:li,ki] = 255  
                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(im_with_bbox_tensor, normalize=False,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                #  Let's now display the batch with GT bboxes and the object labels
                im_with_bbox_tensor = torch.clone(im_tensor)
                for bdx in range(im_tensor.shape[0]):
                    labels_for_image = bbox_label_tensor[bdx]
                    bboxes_for_image = bbox_tensor[bdx]
                    for i in range(num_objects_in_image[bdx]):
                        ii = bbox_tensor[bdx][i][0].item()
                        ji = bbox_tensor[bdx][i][1].item()
                        ki = bbox_tensor[bdx][i][2].item()
                        li = bbox_tensor[bdx][i][3].item()
                        im_with_bbox_tensor[bdx,:,ji,ii:ki] = 40    
                        im_with_bbox_tensor[bdx,:,li,ii:ki] = 40                
                        im_with_bbox_tensor[bdx,:,ji:li,ii] = 40  
                        im_with_bbox_tensor[bdx,:,ji:li,ki] = 40  
                        im_pil = tvt.ToPILImage()(im_with_bbox_tensor[bdx]).convert('RGBA')
                        text = Image.new('RGBA', im_pil.size, (255,255,255,0))
                        draw = ImageDraw.Draw(text)
                        horiz = ki-10 if ki>10 else ki
                        vert = li
                        label = self.rpg.class_labels[labels_for_image[i]]
                        label = "wtower" if label == "watertower" else label
                        label = "Dr Eval" if label == "Dr_Eval" else label
                        draw.text( (horiz,vert), label, fill=(255,255,255,200) )
                        im_pil = Image.alpha_composite(im_pil, text)
                        im_with_bbox_tensor[bdx] = tvt.ToTensor()(im_pil.convert('RGB'))

                plt.figure(figsize=[15,4])
                plt.imshow(np.transpose(torchvision.utils.make_grid(im_with_bbox_tensor, normalize=False,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()
                logger.setLevel(old_level)


        class SkipBlock(nn.Module):
            """
            This is a building-block class that I have used in several networks
            """
            def __init__(self, in_ch, out_ch, downsample=False, skip_connections=True):
                super(RegionProposalGenerator.YoloLikeDetector.SkipBlock, self).__init__()
                self.downsample = downsample
                self.skip_connections = skip_connections
                self.in_ch = in_ch
                self.out_ch = out_ch
                self.convo1 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                self.convo2 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                norm_layer1 = nn.BatchNorm2d
                norm_layer2 = nn.BatchNorm2d
                self.bn1 = norm_layer1(out_ch)
                self.bn2 = norm_layer2(out_ch)
                if downsample:
                    self.downsampler = nn.Conv2d(in_ch, out_ch, 1, stride=2)
            def forward(self, x):
                identity = x                                     
                out = self.convo1(x)                              
                out = self.bn1(out)                              
                out = torch.nn.functional.relu(out)
                if self.in_ch == self.out_ch:
                    out = self.convo2(out)                              
                    out = self.bn2(out)                              
                    out = torch.nn.functional.relu(out)
                if self.downsample:
                    out = self.downsampler(out)
                    identity = self.downsampler(identity)
                if self.skip_connections:
                    if self.in_ch == self.out_ch:
                        out += identity                              
                    else:
                        out[:,:self.in_ch,:,:] += identity
                        out[:,self.in_ch:,:,:] += identity
                return out

        class NetForYolo(nn.Module):
            """
            Recall that each YOLO vector is of size 5+C where C is the number of classes.  Since C
            equals 3 for the dataset used in the demo code in the Examples directory, our YOLO vectors
            are 8 elements long.  A YOLO tensor is a tensor representation of all the YOLO vectors
            created for a given training image.  The network shown below assumes that the input to
            the network is a flattened form of the YOLO tensor.  With an 8-element YOLO vector, a
            6x6 gridding of an image, and with 5 anchor boxes for each cell of the grid, the 
            flattened version of the YOLO tensor would be of size 1440.

            In Version 2.0.6 of the RPG module, I introduced a new loss function for this network
            that calls for using nn.CrossEntropyLoss for just the last C elements of each YOLO
            vector. [See Lines 64 through 83 of the code for "run_code_for_training_multi_instance_
            detection()" for how the loss is calculated in 2.0.6.]  Using nn.CrossEntropyLoss 
            required augmenting the last C elements of the YOLO vector with one additional 
            element for the purpose of representing the absence of an object in any given anchor
            box of a cell.  

            With the above mentioned augmentation, the flattened version of a YOLO tensor is
            of size 1620.  That is the reason for the one line change at the end of the 
            constructor initialization code shown below.
            """ 
            def __init__(self, skip_connections=True, depth=8):
                super(RegionProposalGenerator.YoloLikeDetector.NetForYolo, self).__init__()
                if depth not in [8,10,12,14,16]:
                    sys.exit("This network has only been tested for 'depth' values 8, 10, 12, 14, and 16")
                self.depth = depth // 2
                self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
                self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
                self.pool = nn.MaxPool2d(2, 2)
                self.bn1  = nn.BatchNorm2d(64)
                self.bn2  = nn.BatchNorm2d(128)
                self.bn3  = nn.BatchNorm2d(256)
                self.skip64_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip64_arr.append(RegionProposalGenerator.YoloLikeDetector.SkipBlock(64, 64,
                                                                                    skip_connections=skip_connections))
                self.skip64ds = RegionProposalGenerator.YoloLikeDetector.SkipBlock(64,64,downsample=True, 
                                                                                     skip_connections=skip_connections)
                self.skip64to128 = RegionProposalGenerator.YoloLikeDetector.SkipBlock(64, 128, 
                                                                                    skip_connections=skip_connections )
                self.skip128_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip128_arr.append(RegionProposalGenerator.YoloLikeDetector.SkipBlock(128,128,
                                                                                    skip_connections=skip_connections))
                self.skip128ds = RegionProposalGenerator.YoloLikeDetector.SkipBlock(128,128,
                                                                    downsample=True, skip_connections=skip_connections)
                self.skip128to256 = RegionProposalGenerator.YoloLikeDetector.SkipBlock(128, 256, 
                                                                                    skip_connections=skip_connections )
                self.skip256_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip256_arr.append(RegionProposalGenerator.YoloLikeDetector.SkipBlock(256,256,
                                                                                    skip_connections=skip_connections))
                self.skip256ds = RegionProposalGenerator.YoloLikeDetector.SkipBlock(256,256,
                                                                    downsample=True, skip_connections=skip_connections)
                self.fc_seqn = nn.Sequential(
                    nn.Linear(8192, 4096),
                    nn.ReLU(inplace=True),
                    nn.Linear(4096, 2048),
                    nn.ReLU(inplace=True),
#                    nn.Linear(2048, 1440)
                    nn.Linear(2048, 1620)
                )

            def forward(self, x):
                x = self.pool(torch.nn.functional.relu(self.conv1(x)))          
                x = nn.MaxPool2d(2,2)(torch.nn.functional.relu(self.conv2(x)))       
                for i,skip64 in enumerate(self.skip64_arr[:self.depth//4]):
                    x = skip64(x)                
                x = self.skip64ds(x)
                for i,skip64 in enumerate(self.skip64_arr[self.depth//4:]):
                    x = skip64(x)                
                x = self.bn1(x)
                x = self.skip64to128(x)
                for i,skip128 in enumerate(self.skip128_arr[:self.depth//4]):
                    x = skip128(x)                
                x = self.bn2(x)
                x = self.skip128ds(x)
                x = x.view(-1, 8192 )
                x = self.fc_seqn(x)
                return x

        class AnchorBox( nn.Module ):
            """
            About the role of the 'adx' constructor parameter:  Recall that our goal is to use
            the annotations for each batch to fill up the 'yolo_tensor' that was defined above.
            For case of 5 anchor boxes per cell, this tensor has the following shape:

                     torch.zeros( self.rpg.batch_size, num_yolo_cells, 5, 8 )

            The index 'adx' shown below tells us which of the 5 dimensions on the third axis
            of the 'yolo_tensor' be RESERVED for an anchor box.  We will reserve the 
            coordinate 0 on the third axis for the "1/1" anchor boxes, the coordinate 1 for
            the "1/3" anchor boxes, and so on.  This coordinate choice is set by 'adx'. 
            """
            #               aspect_ratio top_left_corner  anchor_box height & width   anchor_box index
            def __init__(self,   AR,     tlc,             ab_height,   ab_width,      adx):     
                super(RegionProposalGenerator.YoloLikeDetector.AnchorBox, self).__init__()
                self.AR = AR
                self.tlc = tlc
                self.ab_height = ab_height
                self.ab_width = ab_width
                self.adx = adx
            def __str__(self):
                return "AnchorBox type (h/w): %s    tlc for yolo cell: %s    anchor-box height: %d     \
                   anchor-box width: %d   adx: %d" % (self.AR, str(self.tlc), self.ab_height, self.ab_width, self.adx)

    
        def run_code_for_training_multi_instance_detection(self, net, display_labels=False, display_images=False):        
            """
            Version 2.0.6 introduced a loss function that respects the semantics of the different elements 
            of the YOLO vector.  Recall that when you assign an object bounding box to an anchor-box in a 
            specific cell of the grid over the images, you create a 5+C element YOLO vector where C is 
            the number of object classes in your dataset.  Since C=3 in our case, the YOLO vectors in our 
            case are 8-element vectors. See Slide 36 of the Week 8 slides for the meaning to be associated 
            with the different elements of a YOLO vector.

            Lines 64 through 83 in the code shown below are the implementation of the new loss function.

            Since the first element of the YOLO vector is to indicate the presence or the absence of object 
            in a specific anchor-box in a specific cell, I use nn.BCELoss for that purpose.  The next four 
            elements carry purely numerical values that indicate the precise location of the object 
            vis-a-vis the center of the cell to which the object is assigned and also the precise height 
            and the width of the object bounding-box, I use nn.MSELoss for these four elements. The last 
            three elements are a one-hot representation of the object class label, so I use the regular 
            nn.CrossEntropyLoss for these elements.

            As I started writing code for incorporating the nn.CrossEntropyLoss mentioned above, I realized
            that (for purpose of loss calculation) I needed to append one more element to the last three 
            class-label elements of the YOLO vector to take care of the case when there is no object 
            instance present in an anchor box.  You see, the dataset assumes that an image can have a 
            maximum of 5 objects. If an image has fewer than 5 objects, that fact is expressed in the 
            annotations by using the label value of 13 for the 'missing' objects.  To illustrate, say a
            training image has just two objects in it, one being Dr. Eval and the other a house. In this
            case, the annotation for the class labels would be the list [0,1,13,13,13].  If I had not 
            augmented the YOLO vector for loss calculation, the network would be forced to choose
            one of the actual class labels --- 0, 1, or 2 --- in the prediction for a YOLO vector even 
            when there was no object present in the training image for that cell and that anchor box. So 
            when the object label is 13, I throw all the probability mass related to class labels into the 
            additional element (the 9th element) for a YOLO vector.

            See Lines 57 through 60 for the above-mentioned augmentation of the YOLO vectors for all the
            anchor boxes in all of the cells of the grid.

            An important consequence of augmenting the YOLO vectors in the manner explained above is that 
            you must factor the augmentations in the processing of the predictions made by the network.
            An example of that is shown in Line 91 where we supply 9 as the size of the vectors that
            need to be recovered from the predictions.
            """
            if self.rpg.batch_size > 1:                                                                                    ## (1)
                sys.exit("YOLO-like multi-instance object detection has only been tested for batch_size of 1")             ## (2)
            yolo_debug = False
            filename_for_out1 = "performance_numbers_" + str(self.rpg.epochs) + "label.txt"                                
            filename_for_out2 = "performance_numbers_" + str(self.rpg.epochs) + "regres.txt"                               
            FILE1 = open(filename_for_out1, 'w')                                                                           
            FILE2 = open(filename_for_out2, 'w')                                                                           
            net = net.to(self.rpg.device)                                                                                  
            criterion1 = nn.BCELoss()                    # For the first element of the 8 element yolo vector              ## (3)
            criterion2 = nn.MSELoss()                    # For the regression elements (indexed 2,3,4,5) of yolo vector   ## (4)
            criterion3 = nn.CrossEntropyLoss()           # For the last three elements of the 8 element yolo vector        ## (5)
            print("\n\nLearning Rate: ", self.rpg.learning_rate)
            optimizer = optim.SGD(net.parameters(), lr=self.rpg.learning_rate, momentum=self.rpg.momentum)                 ## (6)
            print("\n\nStarting training loop...\n\n")
            start_time = time.perf_counter()
            Loss_tally = []
            elapsed_time = 0.0
            yolo_interval = self.rpg.yolo_interval                                                                         ## (7)
            num_yolo_cells = (self.rpg.image_size[0] // yolo_interval) * (self.rpg.image_size[1] // yolo_interval)         ## (8)
            num_anchor_boxes =  5    # (height/width)   1/5  1/3  1/1  3/1  5/1                                            ## (9)
            max_obj_num  = 5                                                                                               ## (10)
            ## The 8 in the following is the size of the yolo_vector for each anchor-box in a given cell.  The 8 elements 
            ## are: [obj_present, bx, by, bh, bw, c1, c2, c3] where bx and by are the delta diffs between the centers
            ## of the yolo cell and the center of the object bounding box in terms of a unit for the cell width and cell 
            ## height.  bh and bw are the height and the width of object bounding box in terms of the cell height and width.
            for epoch in range(self.rpg.epochs):                                                                           ## (11)
                print("")
                running_loss = 0.0                                                                                         ## (12)
                for iter, data in enumerate(self.train_dataloader):   
                    if yolo_debug:
                        print("\n\n\n======================================= iteration: %d ========================================\n" % iter)
                    yolo_tensor = torch.zeros( self.rpg.batch_size, num_yolo_cells, num_anchor_boxes, 8 )                  ## (13)
                    im_tensor, seg_mask_tensor, bbox_tensor, bbox_label_tensor, num_objects_in_image = data                ## (14)
                    im_tensor   = im_tensor.to(self.rpg.device)                                                            ## (15)
                    seg_mask_tensor = seg_mask_tensor.to(self.rpg.device)                 
                    bbox_tensor = bbox_tensor.to(self.rpg.device)
                    bbox_label_tensor = bbox_label_tensor.to(self.rpg.device)
                    yolo_tensor = yolo_tensor.to(self.rpg.device)
                    if yolo_debug:
                        logger = logging.getLogger()
                        old_level = logger.level
                        logger.setLevel(100)
                        plt.figure(figsize=[15,4])
                        plt.imshow(np.transpose(torchvision.utils.make_grid(im_tensor,normalize=True,padding=3,pad_value=255).cpu(), (1,2,0)))
                        plt.show()
                        logger.setLevel(old_level)
                    cell_height = yolo_interval                                                                            ## (16)
                    cell_width = yolo_interval                                                                             ## (17)
                    if yolo_debug:
                        print("\n\nnum_objects_in_image: ")
                        print(num_objects_in_image)
                    num_cells_image_width = self.rpg.image_size[0] // yolo_interval                                        ## (18)
                    num_cells_image_height = self.rpg.image_size[1] // yolo_interval                                       ## (19)
                    height_center_bb = torch.zeros(im_tensor.shape[0], 1).float().to(self.rpg.device)                      ## (20)
                    width_center_bb = torch.zeros(im_tensor.shape[0], 1).float().to(self.rpg.device)                       ## (21)
                    obj_bb_height = torch.zeros(im_tensor.shape[0], 1).float().to(self.rpg.device)                         ## (22)
                    obj_bb_width =  torch.zeros(im_tensor.shape[0], 1).float().to(self.rpg.device)                         ## (23)

                    ## idx is for object index
                    for idx in range(max_obj_num):                                                                         ## (24)
                        ## In the mask, 1 means good image instance in batch, 0 means bad image instance in batch
#                        batch_mask = torch.ones( self.rpg.batch_size, dtype=torch.int8).to(self.rpg.device)
                        if yolo_debug:
                            print("\n\n               ================  object indexed %d ===============              \n\n" % idx)
                        ## Note that the bounding-box coordinates are in the (x,y) format, with x-positive going to
                        ## right and the y-positive going down. A bbox is specified by (x_min,y_min,x_max,y_max):
                        if yolo_debug:
                            print("\n\nshape of bbox_tensor: ", bbox_tensor.shape)
                            print("\n\nbbox_tensor:")
                            print(bbox_tensor)
                        ## in what follows, the first index (set to 0) is for the batch axis
                        height_center_bb =  (bbox_tensor[0,idx,1] + bbox_tensor[0,idx,3]) // 2                             ## (25)
                        width_center_bb =  (bbox_tensor[0,idx,0] + bbox_tensor[0,idx,2]) // 2                              ## (26)
                        obj_bb_height = bbox_tensor[0,idx,3] -  bbox_tensor[0,idx,1]                                       ## (27)
                        obj_bb_width = bbox_tensor[0,idx,2] - bbox_tensor[0,idx,0]                                         ## (28)
                        if (obj_bb_height < 4.0) or (obj_bb_width < 4.0): continue                                         ## (29)

                        cell_row_indx =  (height_center_bb / yolo_interval).int()          ## for the i coordinate         ## (30)
                        cell_col_indx =  (width_center_bb / yolo_interval).int()           ## for the j coordinates        ## (31)
                        cell_row_indx = torch.clamp(cell_row_indx, max=num_cells_image_height - 1)                         ## (32)
                        cell_col_indx = torch.clamp(cell_col_indx, max=num_cells_image_width - 1)                          ## (33)

                        ## The bh and bw elements in the yolo vector for this object:  bh and bw are measured relative 
                        ## to the size of the grid cell to which the object is assigned.  For example, bh is the 
                        ## height of the bounding-box divided by the actual height of the grid cell.
                        bh  =  obj_bb_height.float() / yolo_interval                                                       ## (34)
                        bw  =  obj_bb_width.float()  / yolo_interval                                                       ## (35)

                        ## You have to be CAREFUL about object center calculation since bounding-box coordinates
                        ## are in (x,y) format --- with x-positive going to the right and y-positive going down.
                        obj_center_x =  (bbox_tensor[0,idx][2].float() +  bbox_tensor[0,idx][0].float()) / 2.0             ## (36)
                        obj_center_y =  (bbox_tensor[0,idx][3].float() +  bbox_tensor[0,idx][1].float()) / 2.0             ## (37)
                        ## Now you need to switch back from (x,y) format to (i,j) format:
                        yolocell_center_i =  cell_row_indx*yolo_interval + float(yolo_interval) / 2.0                      ## (38)
                        yolocell_center_j =  cell_col_indx*yolo_interval + float(yolo_interval) / 2.0                      ## (39)
                        del_x  =  (obj_center_x.float() - yolocell_center_j.float()) / yolo_interval                       ## (40)
                        del_y  =  (obj_center_y.float() - yolocell_center_i.float()) / yolo_interval                       ## (41)
                        class_label_of_object = bbox_label_tensor[0,idx].item()                                            ## (42)
                        ## When batch_size is only 1, it is easy to discard an image that has no known objects in it.
                        ## To generalize this notion to arbitrary batch sizes, you will need a batch mask to indicate
                        ## the images in a batch that should not be considered in the rest of this code.
                        if class_label_of_object == 13: continue                                                           ## (43)
                        AR = obj_bb_height.float() / obj_bb_width.float()                                                  ## (44)
                        if AR <= 0.2:               anch_box_index = 0                                                     ## (45)
                        if 0.2 < AR <= 0.5:         anch_box_index = 1                                                     ## (46)
                        if 0.5 < AR <= 1.5:         anch_box_index = 2                                                     ## (47)
                        if 1.5 < AR <= 4.0:         anch_box_index = 3                                                     ## (48)
                        if AR > 4.0:                anch_box_index = 4                                                     ## (49)
                        yolo_vector = torch.FloatTensor([0,del_x.item(), del_y.item(), bh.item(), bw.item(), 0, 0, 0] )    ## (50)
                        yolo_vector[0] = 1                                                                                 ## (51)
                        yolo_vector[5 + class_label_of_object] = 1                                                         ## (52)
                        yolo_cell_index =  cell_row_indx.item() * num_cells_image_width  +  cell_col_indx.item()           ## (53)
                        yolo_tensor[0,yolo_cell_index, anch_box_index] = yolo_vector                                       ## (54)
                        yolo_tensor_aug = torch.zeros(self.rpg.batch_size, num_yolo_cells, \
                                                                   num_anchor_boxes,9).float().to(self.rpg.device)         ## (55) 
                        yolo_tensor_aug[:,:,:,:-1] =  yolo_tensor                                                          ## (56)
                        if yolo_debug: 
                            print("\n\nyolo_tensor specific: ")
                            print(yolo_tensor[0,18,2])
                            print("\nyolo_tensor_aug_aug: ") 
                            print(yolo_tensor_aug[0,18,2])
                    ## If no object is present, throw all the prob mass into the extra 9th ele of yolo_vector
                    for icx in range(num_yolo_cells):                                                                      ## (57)
                        for iax in range(num_anchor_boxes):                                                                ## (58)
                            if yolo_tensor_aug[0,icx,iax,0] == 0:                                                          ## (59)
                                yolo_tensor_aug[0,icx,iax,-1] = 1                                                          ## (60)
                    if yolo_debug:
                        logger = logging.getLogger()
                        old_level = logger.level
                        logger.setLevel(100)
                        plt.figure(figsize=[15,4])
                        plt.imshow(np.transpose(torchvision.utils.make_grid(im_tensor, normalize=True,
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                        plt.show()

                    optimizer.zero_grad()                                                                                  ## (61)
                    output = net(im_tensor)                                                                                ## (62)
                    predictions_aug = output.view(self.rpg.batch_size,num_yolo_cells,num_anchor_boxes,9)                   ## (63)
                    loss = torch.tensor(0.0, requires_grad=True).float().to(self.rpg.device)                               ## (64)
                    for icx in range(num_yolo_cells):                                                                      ## (65)
                        for iax in range(num_anchor_boxes):                                                                ## (66)
                            pred_yolo_vector = predictions_aug[0,icx,iax]                                                  ## (67)
                            target_yolo_vector = yolo_tensor_aug[0,icx,iax]                                                ## (68)
                            ##  Estimating presence/absence of object and the Binary Cross Entropy section:
                            object_presence = nn.Sigmoid()(torch.unsqueeze(pred_yolo_vector[0], dim=0))                    ## (69)
                            target_for_prediction = torch.unsqueeze(target_yolo_vector[0], dim=0)                          ## (70)
                            bceloss = criterion1(object_presence, target_for_prediction)                                   ## (71)
                            loss += bceloss                                                                                ## (72)
                            ## MSE section for regression params:
                            pred_regression_vec = pred_yolo_vector[1:5]                                                    ## (73)
                            pred_regression_vec = torch.unsqueeze(pred_regression_vec, dim=0)                              ## (74)
                            target_regression_vec = torch.unsqueeze(target_yolo_vector[1:5], dim=0)                        ## (75)
                            regression_loss = criterion2(pred_regression_vec, target_regression_vec)                       ## (76)
                            loss += regression_loss                                                                        ## (77)
                            ##  CrossEntropy section for object class label:
                            probs_vector = pred_yolo_vector[5:]                                                            ## (78)
                            probs_vector = torch.unsqueeze( probs_vector, dim=0 )                                          ## (79)
                            target = torch.argmax(target_yolo_vector[5:])                                                  ## (80)
                            target = torch.unsqueeze( target, dim=0 )                                                      ## (81)
                            class_labeling_loss = criterion3(probs_vector, target)                                         ## (82)
                            loss += class_labeling_loss                                                                    ## (83)
                    if yolo_debug:
                        print("\n\nshape of loss: ", loss.shape)
                        print("\n\nloss: ", loss)
                    loss.backward()                                                                                        ## (84)
                    optimizer.step()                                                                                       ## (85)
                    running_loss += loss.item()                                                                            ## (86)
                    if iter%500==499:                                                                                      ## (87)
                        current_time = time.perf_counter()
                        elapsed_time = current_time - start_time 
                        avg_loss = running_loss / float(1000)                                                              ## (88)
                        print("\n[epoch:%d/%d, iter=%4d  elapsed_time=%5d secs]      mean value for loss: %7.4f" % 
                                                            (epoch+1,self.rpg.epochs, iter+1, elapsed_time, avg_loss))     ## (89)
                        Loss_tally.append(running_loss)
                        FILE1.write("%.3f\n" % avg_loss)
                        FILE1.flush()
                        running_loss = 0.0                                                                                 ## (90)
                        if display_labels:
                            predictions = output.view(self.rpg.batch_size,num_yolo_cells,num_anchor_boxes,9)               ## (91)
                            if yolo_debug:
                                print("\n\nyolo_vector for first image in batch, cell indexed 18, and AB indexed 2: ")
                                print(predictions[0, 18, 2])
                            for ibx in range(predictions.shape[0]):                             # for each batch image     ## (92)
                                icx_2_best_anchor_box = {ic : None for ic in range(36)}                                    ## (93)
                                for icx in range(predictions.shape[1]):                         # for each yolo cell       ## (94)
                                    cell_predi = predictions[ibx,icx]                                                      ## (95)
                                    prev_best = 0                                                                          ## (96)
                                    for anchor_bdx in range(cell_predi.shape[0]):                                          ## (97)
                                        if cell_predi[anchor_bdx][0] > cell_predi[prev_best][0]:                           ## (98)
                                            prev_best = anchor_bdx                                                         ## (99)
                                    best_anchor_box_icx = prev_best                                                        ## (100)
                                    icx_2_best_anchor_box[icx] = best_anchor_box_icx                                       ## (101)
                                sorted_icx_to_box = sorted(icx_2_best_anchor_box,                                   
                                      key=lambda x: predictions[ibx,x,icx_2_best_anchor_box[x]][0].item(), reverse=True)   ## (102)
                                retained_cells = sorted_icx_to_box[:5]                                                     ## (103)
                                objects_detected = []                                                                      ## (104)
                                for icx in retained_cells:                                                                 ## (105)
                                    pred_vec = predictions[ibx,icx, icx_2_best_anchor_box[icx]]                            ## (106)
                                    class_labels_predi  = pred_vec[-4:]                                                    ## (107)
                                    class_labels_probs = torch.nn.Softmax(dim=0)(class_labels_predi)                       ## (108)
                                    class_labels_probs = class_labels_probs[:-1]                                           ## (109)
                                    ##  The threshold of 0.25 applies only to the case of there being 3 classes of objects 
                                    ##  in the dataset.  In the absence of an object, the values in the first three nodes
                                    ##  that represent the classes should all be less than 0.25. In general, for N classes
                                    ##  you would want to set this threshold to 1.0/N
                                    if torch.all(class_labels_probs < 0.25):                                               ## (110)
                                        predicted_class_label = None                                                       ## (111)
                                    else:                                                                                
                                        best_predicted_class_index = (class_labels_probs == class_labels_probs.max())      ## (112)
                                        best_predicted_class_index =torch.nonzero(best_predicted_class_index,as_tuple=True)## (113)
                                        predicted_class_label =self.rpg.class_labels[best_predicted_class_index[0].item()] ## (114)
                                        objects_detected.append(predicted_class_label)                                     ## (115)
                                print("[batch image=%d]  objects found in descending probability order: " % ibx, 
                                                                                                     objects_detected)     ## (116)
                        if display_images:
                            logger = logging.getLogger()
                            old_level = logger.level
                            logger.setLevel(100)
                            plt.figure(figsize=[15,4])
                            plt.imshow(np.transpose(torchvision.utils.make_grid(im_tensor, normalize=True,
                                                                             padding=3, pad_value=255).cpu(), (1,2,0)))
                            plt.show()
                            logger.setLevel(old_level)
            print("\nFinished Training\n")
            plt.figure(figsize=(10,5))
            plt.title("Loss vs. Iterations")
            plt.plot(Loss_tally)
            plt.xlabel("iterations")
            plt.ylabel("Loss")
            plt.legend()
            plt.savefig("training_loss.png")
            plt.show()
            torch.save(net.state_dict(), self.rpg.path_saved_yolo_model)
            return net


        def save_yolo_model(self, model):
            '''
            Save the trained yolo model to a disk file
            '''
            torch.save(model.state_dict(), self.rpg.path_saved_yolo_model)


        def run_code_for_testing_multi_instance_detection(self, net, display_images=False):        
            yolo_debug = False
            net.load_state_dict(torch.load(self.rpg.path_saved_yolo_model))
            net = net.to(self.rpg.device)
            yolo_interval = self.rpg.yolo_interval
            num_yolo_cells = (self.rpg.image_size[0] // yolo_interval) * (self.rpg.image_size[1] // yolo_interval)
            num_anchor_boxes =  5    # (height/width)   1/5  1/3  1/1  3/1  5/1
            ##  The next 5 assignment are for the calculations of the confusion matrix:
            confusion_matrix = torch.zeros(3,3)   #  We have only 3 classes:  Dr. Eval, house, and watertower
            class_correct = [0] * len(self.rpg.class_labels)
            class_total = [0] * len(self.rpg.class_labels)
            totals_for_conf_mat = 0
            totals_correct = 0
            ##  We also need to report the IoU values for the different types of objects
            iou_scores = [0] * len(self.rpg.class_labels)
            num_of_validation_images = len(self.test_dataloader)
            print("\n\nNumber of images in the validation dataset: ", num_of_validation_images)
            with torch.no_grad():
                for iter, data in enumerate(self.test_dataloader):
                    ##  In the following, the tensor bbox_label_tensor looks like: tensor([0,0,13,13,13], device='cuda:0',dtype=torch.uint8)
                    ##  where '0' is a genuine class label for 'Dr.Eval' and the number 13 as a label represents the case when there is no
                    ##  object.  You see, each image has a max of 5 objects in it. So the 5 positions in the tensor are for each of those objects.
                    ##  The bounding-boxes for each of those five objects are in the tensor bbox_tensor and segmentation masks in seg_mask_tensor.
                    im_tensor, seg_mask_tensor, bbox_tensor, bbox_label_tensor, num_objects_in_image = data
                    if iter % 50 == 49:
                        if display_images:
                            print("\n\n\n\nShowing output for test image %d: " % (iter+1))
                        im_tensor   = im_tensor.to(self.rpg.device)
                        seg_mask_tensor = seg_mask_tensor.to(self.rpg.device)                 
                        bbox_tensor = bbox_tensor.to(self.rpg.device)
                        output = net(im_tensor)
                        predictions = output.view(self.rpg.batch_size,num_yolo_cells,num_anchor_boxes,9)
                        for ibx in range(predictions.shape[0]):                             # for each batch image
                            ## Our goal is to look through all the cells and identify at most five of the cells/anchor_boxes for 
                            ## the value in the first element of the predicted yolo_vectors is the highest:
                            icx_2_best_anchor_box = {ic : None for ic in range(36)}
                            for icx in range(predictions.shape[1]):                         # for each yolo cell
                                cell_predi = predictions[ibx,icx]               
                                prev_best = 0
                                for anchor_bdx in range(cell_predi.shape[0]):
                                    if cell_predi[anchor_bdx][0] > cell_predi[prev_best][0]:
                                        prev_best = anchor_bdx
                                best_anchor_box_icx = prev_best   
                                icx_2_best_anchor_box[icx] = best_anchor_box_icx
                            sorted_icx_to_box = sorted(icx_2_best_anchor_box, 
                                       key=lambda x: predictions[ibx,x,icx_2_best_anchor_box[x]][0].item(), reverse=True)
                            retained_cells = sorted_icx_to_box[:5]
                        ## We will now identify the objects in the retained cells and also extract their bounding boxes:
                        objects_detected = []
                        predicted_bboxes  = []
                        predicted_labels_for_bboxes = []
                        predicted_label_index_vals = []
                        for icx in retained_cells:
                            pred_vec = predictions[ibx,icx, icx_2_best_anchor_box[icx]]
                            class_labels_predi  = pred_vec[-4:]                        
                            class_labels_probs = torch.nn.Softmax(dim=0)(class_labels_predi)
                            class_labels_probs = class_labels_probs[:-1]
                            if torch.all(class_labels_probs < 0.2): 
                                predicted_class_label = None
                            else:
                                ## Get the predicted class label:
                                best_predicted_class_index = (class_labels_probs == class_labels_probs.max())
                                best_predicted_class_index = torch.nonzero(best_predicted_class_index, as_tuple=True)
                                predicted_label_index_vals.append(best_predicted_class_index[0].item())
                                predicted_class_label = self.rpg.class_labels[best_predicted_class_index[0].item()]
                                predicted_labels_for_bboxes.append(predicted_class_label)
                                ## Analyze the predicted regression elements:
                                pred_regression_vec = pred_vec[1:5].cpu()
                                del_x,del_y = pred_regression_vec[0], pred_regression_vec[1]
                                h,w = pred_regression_vec[2], pred_regression_vec[3]
                                h *= yolo_interval
                                w *= yolo_interval                               
                                cell_row_index =  icx // 6
                                cell_col_index =  icx % 6
                                bb_center_x = cell_col_index * yolo_interval  +  yolo_interval/2  +  del_x * yolo_interval
                                bb_center_y = cell_row_index * yolo_interval  +  yolo_interval/2  +  del_y * yolo_interval
                                bb_top_left_x =  int(bb_center_x - w / 2.0)
                                bb_top_left_y =  int(bb_center_y - h / 2.0)
                                predicted_bboxes.append( [bb_top_left_x, bb_top_left_y, int(w), int(h)] )
                        ## make a deep copy of the predicted_bboxes for eventual visual display:
                        saved_predicted_bboxes =[ predicted_bboxes[i][:] for i in range(len(predicted_bboxes)) ]
                        ##  To account for the batch axis, the bb_tensor is of shape [1,5,4]. At this point, we get rid of the batch axis
                        ##  and turn the tensor into a numpy array.
                        gt_bboxes = torch.squeeze(bbox_tensor).cpu().numpy()
                        ## NOTE:  At this point the GT bboxes are in the (x1,y1,x2,y2) format and the predicted bboxes in the (x,y,h,w) format.
                        ##        where (x1,y1) are the coords of the top-left corner and (x2,y2) of the bottom-right corner of a bbox.
                        ##        The (x,y) for coordinates means x is the horiz coord positive to the right and y is vert coord positive downwards.
                        for pred_bbox in predicted_bboxes:
                            w,h = pred_bbox[2], pred_bbox[3]
                            pred_bbox[2] = pred_bbox[0] + w
                            pred_bbox[3] = pred_bbox[1] + h
                        if yolo_debug:
                            print("\n\nAFTER FIXING:")                        
                            print("\npredicted_bboxes: ")
                            print(predicted_bboxes)
                            print("\nGround Truth bboxes:")
                            print(gt_bboxes)
                        ## These are the mappings from indexes for the predicted bboxes to the indexes for the gt bboxes:
                        mapping_from_pred_to_gt = { i : None for i in range(len(predicted_bboxes))}
                        for i in range(len(predicted_bboxes)):
                            gt_possibles = {k : 0.0 for k in range(5)}      ## 0.0 for IoU 
                            for j in range(len(gt_bboxes)):
                                if all(gt_bboxes[j][x] == 0 for x in range(4)): continue       ## 4 is for the four coords of a bbox
                                gt_possibles[j] = self.IoU_calculator(predicted_bboxes[i], gt_bboxes[j])
                            sorted_gt_possibles =  sorted(gt_possibles, key=lambda x: gt_possibles[x], reverse=True)
                            if display_images:
                                print("For predicted bbox %d: the best gt bbox is: %d" % (i, sorted_gt_possibles[0]))
                            mapping_from_pred_to_gt[i] = (sorted_gt_possibles[0], gt_possibles[sorted_gt_possibles[0]])
                        ##  If you want to see the IoU scores for the overlap between each predicted bbox and all of the individual gt bboxes:
                        if display_images:
                            print("\n\nmapping_from_pred_to_gt: ", mapping_from_pred_to_gt)
                        ## For each predicted bbox, we now know the best gt bbox in terms of the maximal IoU.
                        ## Given a pair of corresponding (pred_bbox, gt_bbox), how do their labels compare is our next question.
                        ## These are the numeric class labels for each of the gt bboxes in the image.
                        gt_labels = torch.squeeze(bbox_label_tensor).cpu().numpy()
                        ## These are the predicted numeric class labels for the predicted bboxes in the image
                        pred_labels_ints = predicted_label_index_vals
                        for i,bbox_pred in enumerate(predicted_bboxes):
                            if display_images:
                                print("for i=%d, the predicted label: %s    the ground_truth label: %s" % (i, predicted_labels_for_bboxes[i], 
                                                                                  self.rpg.class_labels[gt_labels[mapping_from_pred_to_gt[i][0]]]))
                            if gt_labels[pred_labels_ints[i]] != 13:
                                confusion_matrix[gt_labels[mapping_from_pred_to_gt[i][0]]][pred_labels_ints[i]]  +=  1
                            totals_for_conf_mat += 1
                            class_total[gt_labels[mapping_from_pred_to_gt[i][0]]] += 1
                            if gt_labels[mapping_from_pred_to_gt[i][0]] == pred_labels_ints[i]:
                                totals_correct += 1
                                class_correct[gt_labels[mapping_from_pred_to_gt[i][0]]] += 1
                            iou_scores[gt_labels[mapping_from_pred_to_gt[i][0]]] += mapping_from_pred_to_gt[i][1]
                        ## If the user wants to see the image with the predicted bboxes and also the predicted labels:
                        if display_images:
                            predicted_bboxes = saved_predicted_bboxes
                            if yolo_debug:
                                print("[batch image=%d]  objects found in descending probability order: " % ibx, predicted_labels_for_bboxes)
                            logger = logging.getLogger()
                            old_level = logger.level
                            logger.setLevel(100)
                            fig = plt.figure(figsize=[12,12])
                            ax = fig.add_subplot(111)
                            display_scale = 2
                            new_im_tensor = torch.nn.functional.interpolate(im_tensor, scale_factor=display_scale, mode='bilinear', align_corners=False)
                            ax.imshow(np.transpose(torchvision.utils.make_grid(new_im_tensor, normalize=True, padding=3, pad_value=255).cpu(), (1,2,0)))
                            for i,bbox_pred in enumerate(predicted_bboxes):
                                x,y,w,h = np.array(bbox_pred)                                                                     
                                x,y,w,h = [item * display_scale for item in (x,y,w,h)]
                                rect = Rectangle((x,y),w,h,angle=0.0,edgecolor='r',fill = False,lw=2) 
                                ax.add_patch(rect)                                                                      
                                ax.annotate(predicted_labels_for_bboxes[i], (x,y-1), color='red', weight='bold', fontsize=10*display_scale)
                            plt.show()
                            logger.setLevel(old_level)
                                                                                            
            ##  Our next job is to present to the user the information collected for the confusion matrix for the validation dataset:
            if yolo_debug:
                print("\nConfusion Matrix: ", confusion_matrix)
                print("\nclass_correct: ", class_correct)
                print("\nclass_total: ", class_total)
                print("\ntotals_for_conf_mat: ", totals_for_conf_mat)
                print("\ntotals_correct: ", totals_correct)
            for j in range(len(self.rpg.class_labels)):
                print('Prediction accuracy for %5s : %2d %%' % (self.rpg.class_labels[j], 100 * class_correct[j] / class_total[j]))
            print("\n\n\nOverall accuracy of multi-instance detection on %d test images: %d %%" % (num_of_validation_images, 
                                                                                          100 * sum(class_correct) / float(sum(class_total))))
            print("""\nNOTE 1: This accuracy does not factor in the missed detection. This number is related to just the 
       mis-labeling errors for the detected instances.  Percentage of the missed detections are shown in
       the last column of the table shown below.""")
            print("\n\nDisplaying the confusion matrix:\n")
            out_str = "               "
            for j in range(len(self.rpg.class_labels)):  out_str +=  "%15s" % self.rpg.class_labels[j]   
            out_str +=  "%15s" % "missing"
            print(out_str + "\n")
            for i,label in enumerate(self.rpg.class_labels):
                out_percents = [100 * confusion_matrix[i,j] / float(class_total[i]) for j in range(len(self.rpg.class_labels))]
                missing_percent = 100 - sum(out_percents)
                out_percents.append(missing_percent)
                out_percents = ["%.2f" % item.item() for item in out_percents]
                out_str = "%10s:  " % self.rpg.class_labels[i]
                for j in range(len(self.rpg.class_labels)+1): out_str +=  "%15s" % out_percents[j]
                print(out_str)
            print("\n\nNOTE 2: 'missing' means that an object instance of that label was NOT extracted from the image.")   
            print("\nNOTE 3: 'prediction accuracy' means the labeling accuracy for the extracted objects.")   
            print("\nNOTE 4: True labels are in the left-most column and the predicted labels at the top of the table.")

            ##  Finally, we present to the user the IoU scores for each of the object types:
            iou_score_by_label = {self.rpg.class_labels[i] : 0.0 for i in range(len(self.rpg.class_labels))}
            for i,label in enumerate(self.rpg.class_labels):
               iou_score_by_label[self.rpg.class_labels[i]] = iou_scores[i]/float(class_total[i])
            print("\n\nIoU scores for the different types of objects: ")
            for obj_type in iou_score_by_label:
                print("\n    %10s:    %.4f" % (obj_type, iou_score_by_label[obj_type]))



        def IoU_calculator(self, bbox1, bbox2, seg_mask1=None, seg_mask2=None):
            """
            I assume that a bbox is defined by a 4-tuple, with the first two integers standing for the
            top-left coordinate in the (x,y) format and the last two integers for the bottom-right coordinates
            in also the (x,y) format.  By (x,y) format I mean that x stands for the horiz axis with positive to 
            the right and y for the vert coord with positive pointing downwards.  
            """
            union = intersection = 0
            b1x1,b1y1,b1x2,b1y2 = bbox1                             ## b1 refers to bbox1
            b2x1,b2y1,b2x2,b2y2 = bbox2                             ## b2 refers to bbox2
            for x in range(self.rpg.image_size[0]):                 ## image is 128x128
                for y in range(self.rpg.image_size[1]):
                    if  ( ( ( (x >= b1x1) and (x >= b2x1) ) and  ( (y >= b1y1) and (y >= b2y1) ) )  and  \
                        ( ( (x < b1x2) and (x < b2x2) )  and  ((y < b1y2)  and (y < b2y2)) ) ): 
                        intersection += 1
                    if  ( ( (x >= b1x1) and (x <b1x2) ) and  ((y >= b1y1) and (y < b1y2)) ):
                        union += 1            
                    if  ( ( (x >= b2x1) and (x <b2x2) ) and  ((y >= b2y1) and (y < b2y2)) ):
                        union += 1            
            union = union - intersection
            if union == 0.0:
                raise Exception("something_wrong")
            iou = intersection / float(union)
            return iou


    ###%%%
    #######################################################################################################
    ###################  Start Definition of Inner Class RPN (Region Proposal Network)  ###################
    class RPN(nn.Module):             
        """
        I have not yet mentioned this class in the documentation page for this module
        because its implementation is not finished.
        """
        def __init__(self, rpg):
            super(RegionProposalGenerator.RPN, self).__init__()
            self.rpg = rpg
            self.train_dataloader = None
            self.test_dataloader = None

        def show_sample_images_from_dataset(self, rpg):
            data = next(iter(self.train_dataloader))    
            real_batch = data[0]
            first_im = real_batch[0]
            self.rpg.display_tensor_as_image(torchvision.utils.make_grid(real_batch, padding=2, pad_value=1, normalize=True))

        def set_dataloaders(self, train=False, test=False):
            if train:
                dataserver_train = RegionProposalGenerator.PurdueDrEvalDataset(self.rpg, "train", 
                                                                                      dataroot_train=self.rpg.dataroot_train)
                self.train_dataloader = torch.utils.data.DataLoader(dataserver_train, self.rpg.batch_size, 
                                                                                                 shuffle=True, num_workers=4)
            if test:
                dataserver_test = RegionProposalGenerator.PurdueDrEvalDataset(self.rpg, "test", 
                                                                                        dataroot_test=self.rpg.dataroot_test)
                self.test_dataloader = torch.utils.data.DataLoader(dataserver_test, self.rpg.batch_size, shuffle=False, 
                                                                                                               num_workers=4)

        def check_dataloader(self, train=False, test=False):
            if train:      
                dataloader = self.train_dataloader
            if test:
                dataloader = self.test_dataloader
            for i, data in enumerate(dataloader):          
                im_tensor,mask_tensor,bbox_tensor, image_label = data
                for idx in range(im_tensor.shape[0]):
                    self.rpg.display_tensor_as_image( im_tensor[idx], "batch number: %d" % i)
                    print("\n\nbbox tensor: ", bbox_tensor[idx])
                    self.rpg.display_tensor_as_image( mask_tensor[idx], "batch number: %d" % i)
                im_tensor   = im_tensor.to(self.rpg.device)
                mask_tensor = mask_tensor.type(torch.FloatTensor)
                mask_tensor = mask_tensor.to(self.rpg.device)                 
                bbox_tensor = bbox_tensor.to(self.rpg.device)

        class SkipBlockDN(nn.Module):
            def __init__(self, in_ch, out_ch, downsample=False, skip_connections=True):
                super(RegionProposalGenerator.RPN.SkipBlockDN, self).__init__()
                self.downsample = downsample
                self.skip_connections = skip_connections
                self.in_ch = in_ch
                self.out_ch = out_ch
                self.convo1 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                self.convo2 = nn.Conv2d(in_ch, out_ch, 3, stride=1, padding=1)
                self.bn1 = nn.BatchNorm2d(out_ch)
                self.bn2 = nn.BatchNorm2d(out_ch)
                if downsample:
                    self.downsampler = nn.Conv2d(in_ch, out_ch, 1, stride=2)
            def forward(self, x):
                identity = x                                     
                out = self.convo1(x)                              
                out = self.bn1(out)                              
                out = torch.nn.functional.relu(out)
                if self.in_ch == self.out_ch:
                    out = self.convo2(out)                              
                    out = self.bn2(out)                              
                    out = torch.nn.functional.relu(out)
                if self.downsample:
                    out = self.downsampler(out)
                    identity = self.downsampler(identity)
                if self.skip_connections:
                    if self.in_ch == self.out_ch:
                        out += identity                              
                    else:
                        out[:,:self.in_ch,:,:] += identity
                        out[:,self.in_ch:,:,:] += identity
                return out

        class SkipBlockUP(nn.Module):
            def __init__(self, in_ch, out_ch, upsample=False, skip_connections=True):
                super(RegionProposalGenerator.RPN.SkipBlockUP, self).__init__()
                self.upsample = upsample
                self.skip_connections = skip_connections
                self.in_ch = in_ch
                self.out_ch = out_ch
                self.convoT1 = nn.ConvTranspose2d(in_ch, out_ch, 3, padding=1)
                self.convoT2 = nn.ConvTranspose2d(in_ch, out_ch, 3, padding=1)
                self.bn1 = nn.BatchNorm2d(out_ch)
                self.bn2 = nn.BatchNorm2d(out_ch)
                if upsample:
                    self.upsampler = nn.ConvTranspose2d(in_ch, out_ch, 1, stride=2, dilation=2, output_padding=1, padding=0)

            def forward(self, x):
                identity = x                                     
                out = self.convoT1(x)                              
                out = self.bn1(out)                              
                out = torch.nn.functional.relu(out)
                if self.in_ch == self.out_ch:
                    out = self.convoT2(out)                              
                    out = self.bn2(out)                              
                    out = torch.nn.functional.relu(out)
                if self.upsample:
                    out = self.upsampler(out)
                    identity = self.upsampler(identity)
                if self.skip_connections:
                    if self.in_ch == self.out_ch:
                        out += identity                              
                    else:
                        out += identity[:,self.out_ch:,:,:]
                return out

        class mUnet_for_RPN(nn.Module):
            def __init__(self, skip_connections=True, depth=16):
                super(RegionProposalGenerator.RPN.mUnet_for_RPN, self).__init__()
                self.depth = depth // 2
                self.conv_in = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
                self.pool = nn.MaxPool2d(2, 2)
                ##  For the DN arm of the U:
                self.bn1DN  = nn.BatchNorm2d(64)
                self.bn2DN  = nn.BatchNorm2d(128)
                self.skip64DN_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip64DN_arr.append(RegionProposalGenerator.RPN.SkipBlockDN(64, 64, skip_connections=skip_connections))
                self.skip64dsDN = RegionProposalGenerator.RPN.SkipBlockDN(64, 64, downsample=True, skip_connections=skip_connections)
                self.skip64to128DN = RegionProposalGenerator.RPN.SkipBlockDN(64, 128, skip_connections=skip_connections )
                self.skip128DN_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip128DN_arr.append(RegionProposalGenerator.RPN.SkipBlockDN(128, 128, skip_connections=skip_connections))
                self.skip128dsDN = RegionProposalGenerator.RPN.SkipBlockDN(128,128, downsample=True, skip_connections=skip_connections)

                ##  For the UP arm of the U:
                self.bn1UP  = nn.BatchNorm2d(128)
                self.bn2UP  = nn.BatchNorm2d(64)
                self.skip64UP_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip64UP_arr.append(RegionProposalGenerator.RPN.SkipBlockUP(64, 64, skip_connections=skip_connections))
                self.skip64usUP = RegionProposalGenerator.RPN.SkipBlockUP(64, 64, upsample=True, skip_connections=skip_connections)
                self.skip128to64UP = RegionProposalGenerator.RPN.SkipBlockUP(128, 64, skip_connections=skip_connections )
                self.skip128UP_arr = nn.ModuleList()
                for i in range(self.depth):
                    self.skip128UP_arr.append(RegionProposalGenerator.RPN.SkipBlockUP(128, 128, skip_connections=skip_connections))
                self.skip128usUP = RegionProposalGenerator.RPN.SkipBlockUP(128,128,
                                            upsample=True, skip_connections=skip_connections)
                self.conv_out = nn.ConvTranspose2d(64, 2, 3, stride=2,dilation=2,output_padding=1,padding=2)

            def forward(self, x):
                x = self.pool(torch.nn.functional.relu(self.conv_in(x)))          
                for i,skip64 in enumerate(self.skip64DN_arr[:self.depth//4]):
                    x = skip64(x)                
                num_channels_to_save1 = x.shape[1] // 2
                save_for_upside_1 = x[:,:num_channels_to_save1,:,:].clone()
                x = self.skip64dsDN(x)
                for i,skip64 in enumerate(self.skip64DN_arr[self.depth//4:]):
                    x = skip64(x)                
                x = self.bn1DN(x)
                num_channels_to_save2 = x.shape[1] // 2
                save_for_upside_2 = x[:,:num_channels_to_save2,:,:].clone()
                x = self.skip64to128DN(x)
                for i,skip128 in enumerate(self.skip128DN_arr[:self.depth//4]):
                    x = skip128(x)                
                x = self.bn2DN(x)
                num_channels_to_save3 = x.shape[1] // 2
                save_for_upside_3 = x[:,:num_channels_to_save3,:,:].clone()
                for i,skip128 in enumerate(self.skip128DN_arr[self.depth//4:]):
                    x = skip128(x)                
                x = self.skip128dsDN(x)

                ## Coming up from the bottom of U on the other side:
                x = self.skip128usUP(x)          
                for i,skip128 in enumerate(self.skip128UP_arr[:self.depth//4]):
                    x = skip128(x)                
                x[:,:num_channels_to_save3,:,:] =  save_for_upside_3
                x = self.bn1UP(x)
                for i,skip128 in enumerate(self.skip128UP_arr[:self.depth//4]):
                    x = skip128(x)                
                x = self.skip128to64UP(x)
                for i,skip64 in enumerate(self.skip64UP_arr[self.depth//4:]):
                    x = skip64(x)                
                x[:,:num_channels_to_save2,:,:] =  save_for_upside_2
                x = self.bn2UP(x)
                x = self.skip64usUP(x)
                for i,skip64 in enumerate(self.skip64UP_arr[:self.depth//4]):
                    x = skip64(x)                
                x[:,:num_channels_to_save1,:,:] =  save_for_upside_1
                x = self.conv_out(x)
                return x

        class SegmentationLoss(nn.Module):
            def __init__(self, batch_size):
                super(RegionProposalGenerator.RPN.SegmentationLoss, self).__init__()
                self.batch_size = batch_size
            def forward(self, output, mask_tensor):
                composite_loss = torch.zeros(1,self.batch_size)
                mask_based_loss = torch.zeros(1,5)
                for idx in range(self.batch_size):
                    outputh = output[idx,0,:,:]
                    for mask_layer_idx in range(mask_tensor.shape[0]):
                        mask = mask_tensor[idx,mask_layer_idx,:,:]
                        element_wise = (outputh - mask)**2                   
                        mask_based_loss[0,mask_layer_idx] = torch.mean(element_wise)
                    composite_loss[0,idx] = torch.sum(mask_based_loss)
                return torch.sum(composite_loss) / self.batch_size


        def run_code_for_training_RPN(self, net):        
            filename_for_out1 = "performance_numbers_" + str(self.rpg.epochs) + ".txt"
            FILE1 = open(filename_for_out1, 'w')
            net = copy.deepcopy(net)
            net = net.to(self.rpg.device)
            criterion1 = nn.MSELoss()
            optimizer = optim.SGD(net.parameters(), lr=self.rpg.learning_rate, momentum=self.rpg.momentum)
            print("\n\nStarting training loop...\n\n")
            start_time = time.perf_counter()
            Loss_tally = []
            composite_mask_tensor = torch.zeros(self.rpg.batch_size, 2, self.rpg.image_size[0], self.rpg.image_size[1])
            for epoch in range(self.rpg.epochs):  
                print("")
                running_loss_segmentation = 0.0
                for i, data in enumerate(self.train_dataloader):   
                    im_tensor,mask_tensor,bbox_tensor, image_label = data
                    im_tensor   = im_tensor.to(self.rpg.device)
                    mask_tensor = mask_tensor.type(torch.FloatTensor)
                    mask_tensor = mask_tensor.to(self.rpg.device)                 
                    bbox_tensor = bbox_tensor.to(self.rpg.device)
                    im_tensor_masked = im_tensor * mask_tensor
                    mask_tensor_complement = 1 - mask_tensor
                    composite_mask_tensor[:,0,:,:] = mask_tensor[:,0,:,:]
                    composite_mask_tensor[:,1,:,:] = mask_tensor_complement[:,0,:,:]
                    composite_mask_tensor = composite_mask_tensor.to(self.rpg.device)
                    optimizer.zero_grad()
                    output = net(im_tensor_masked) 
                    segmentation_loss = criterion1(output, composite_mask_tensor)  
                    segmentation_loss.backward()
                    optimizer.step()
                    running_loss_segmentation += segmentation_loss.item()    
                    if i%100==99:    
                        current_time = time.perf_counter()
                        elapsed_time = current_time - start_time 
                        avg_loss_segmentation = running_loss_segmentation / float(100)
                        print("[epoch:%d/%d, iter=%4d  elapsed_time=%5d secs]      mean MSE loss: %7.4f" % 
                                                         (epoch+1,self.rpg.epochs, i+1, elapsed_time, avg_loss_segmentation))
                        Loss_tally.append(running_loss_segmentation)
                        FILE1.write("%.3f\n" % avg_loss_segmentation)
                        FILE1.flush()
                        running_loss_segmentation = 0.0
            print("\nFinished Training\n")
            plt.figure(figsize=(10,5))
            plt.title("RPN Training: Loss vs. Iterations")
            plt.plot(Loss_tally)
            plt.xlabel("iterations")
            plt.ylabel("Loss")
            plt.legend()
            plt.savefig("rpn_training_loss.png")
            plt.show()
            self.save_RPN_model(net)
            return net


        def save_RPN_model(self, model):
            '''
            Save the trained RPN model to a disk file
            '''
            torch.save(model.state_dict(), self.rpg.path_saved_RPN_model)

        def run_code_for_testing_RPN(self, net):
            net.load_state_dict(torch.load(self.rpg.path_saved_RPN_model))
            with torch.no_grad():
                for i, data in enumerate(self.test_dataloader):
                    im_tensor,mask_tensor,bbox_tensor, image_label = data
                    if self.rpg.debug_test and i % 50 == 0:
                        print("\n\n\n\nShowing output for test batch %d: " % (i+1))
                        outputs = net(im_tensor)                           #### Network Output
                        outputs = outputs[:,0,:,:]
                        outputs_smoothed = outputs.clone().detach().cpu()
                        display_tensor = torch.zeros(self.rpg.batch_size,3, 4 * self.rpg.image_size[0],
                                                                           self.rpg.image_size[1], dtype=float).cpu()
                        display_tensor[:,:,:128,:]     =  im_tensor
                        display_tensor[:,:,128:256,:]  =  mask_tensor
                        display_tensor[:,:,256:384,:]  =  torch.unsqueeze(outputs,1)
                        display_tensor[:,:,384:512,:]  =  torch.unsqueeze(outputs_smoothed,1)
                        for idx in range(self.rpg.batch_size):
                            bb_tensor = bbox_tensor[idx]
                            i1 = int(bb_tensor[1]) + 384
                            i2 = int(bb_tensor[3]) + 384
                            j1 = int(bb_tensor[0])
                            j2 = int(bb_tensor[2])
                            display_tensor[idx,0,i1:i2,j1] = 1.0
                            display_tensor[idx,0,i1:i2,j2] = 1.0
                            display_tensor[idx,0,i1,j1:j2] = 1.0
                            display_tensor[idx,0,i2,j1:j2] = 1.0
                        display_tensor[:,:,126:129,:] = 1.0
                        display_tensor[:,:,254:257,:] = 1.0
                        display_tensor[:,:,382:385,:] = 1.0
                        plt.imshow(np.transpose(torchvision.utils.make_grid(display_tensor, normalize=False, 
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                        plt.show()

        def test_rpn_model_on_one_image(self, image, rpn_model=None):
            if rpn_model is None:
                rpn_model = self.mUnet_for_RPN(skip_connections=True, depth=16)
                rpn_model.load_state_dict(torch.load(self.rpg.path_saved_RPN_model))
            resize_and_normalize = tvt.Compose( [tvt.Resize((128,128)), 
                                                 tvt.ToTensor(), 
                                                 tvt.Normalize(mean=[0.5], std=[0.5]) ] )
            display_tensor = torch.zeros(1,3,self.rpg.image_size[0], 2*self.rpg.image_size[1], dtype=float).cpu()
            with torch.no_grad():
                image = Image.open(image)
                im_tensor = resize_and_normalize(image)
                display_tensor[:,:,:,:128] = im_tensor
                im_tensor = torch.unsqueeze(im_tensor,0)
                output = rpn_model(im_tensor)     

                output = output[:,0,:,:]
                output[output <= 0.0] = 0
                output[output > 0.0] = 1.0
                display_tensor[:,:,:,128:] = output
                plt.imshow(np.transpose(torchvision.utils.make_grid(display_tensor, normalize=False, 
                                                                         padding=3, pad_value=255).cpu(), (1,2,0)))
                plt.show()


    ###%%%
    #######################################################################################################    
    ################################ Class based utility functions #######################################

    def graying_resizing_binarizing(self, image_file, polarity=1, area_threshold=0, min_brightness_level=100):
        '''
        This is a demonstration of some of the more basic and commonly used image
        transformations from the torchvision.transformations module.  The large comments
        blocks are meant to serve as tutorial introduction to the syntax used for invoking
        these transformations.  The transformations shown can be used for converting a
        color image into a grayscale image, for resizing an image, for converting a
        PIL.Image into a tensor and a tensor back into an PIL.Image object, and so on.
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)
        self.displayImage6(im_pil, "input_image")

        ###  The next three lines of code that follow are three examples of calls to the
        ###  constructor of the torchvision.tranforms.Compose class whose contract, as its
        ###  name implies, is to compose a sequence of transformations to be applied to an
        ###  image.  The instance of Compose constructed in line (A) has only one
        ###  transformation in it, which would resize an image to a 64x64 array of pixels.
        ###  On the other hand, the instance constructed in line (B) includes two
        ###  transformations: the first transformation is for converting an image from
        ###  "RGB" to gray scale, and the second for resizing an image as before to an
        ###  array of 64x64 pixels.  The instance of Compose constructed in line (C)
        ###  incorporates a sequence of five transformations. It invoked on a color image,
        ###  it will convert the image into grayscale, then resize it to an array of 64x64
        ###  pixels, convert the array to a tensor, normalize the array so that its mean
        ###  and the standard deviation both equal 0.5, and, finally, convert the tensor
        ###  into a PIL image object.  
        ###
        ###  A most important thing to note here is that each of the instances returned in
        ###  lines (A), (B), and (C) is a callable object, meaning that the instance can
        ###  be called directly, with the image to which the transformation are to be
        ###  applied, as the argument to the instance.
        ###
        ###  Note that in the Compose instance constructed in line (C), we had to
        ###  interpose the "ToTensor" transformation between the Resize and the Normalize
        ###  transformations because the Resize transformation returns an Image object that
        ###  cannot be normalized directly.  That is, the Normalize transformation is
        ###  meant for the normalization of tensors --- it takes a tensor as its input and
        ###  returns a tensor at its output.  If you want the final result of the sequence
        ###  of transformations in line (C) to return an Image, then you would also like
        ###  to call the ToPILImage transformation as shown.
        ###  
        resize_xform = tvt.Compose( [ tvt.Resize((64,64)) ] )                               ## (A)

        gray_and_resize = tvt.Compose( [tvt.Grayscale(num_output_channels = 1),  
                                        tvt.Resize((64,64)) ] )                             ## (B)

        gray_resize_normalize = tvt.Compose( [tvt.Grayscale(num_output_channels = 1), 
                                              tvt.Resize((64,64)), 
                                              tvt.ToTensor(), 
                                              tvt.Normalize(mean=[0.5], std=[0.5]), 
                                              tvt.ToPILImage() ] )                          ## (C)

        ###  As explained in the previous comment block, the three statements shown above
        ###  are merely calls to the constructor of the Compose class for the creation of
        ###  instances.  As also mentioned previously, these instances are designed to be
        ###  callable; that is, they can be treated like function objects for actually
        ###  applying the transformations to a given image.  This is shown in the lines of
        ###  code that follow.
        ###
        ###  Applying the resize_xform of line (A) to an image:
        img = resize_xform( im_pil )
        self.displayImage6(img, "output_of_resize_xform")

        ###  Applying gray_and_resize of line (B) to an image:
        img = gray_and_resize( im_pil )
        self.displayImage6(img, "output_of_gray_and_resize")

        ###  Applying gray_resize_normalize of line (C) to an image:
        img = gray_resize_normalize( im_pil )
        self.displayImage6(img, "output_of_gray_resize_normalize")

        ###  Demonstrating the ToTensor transformation all by itself.  As in earlier
        ###  examples, first construct a callable instance of Compose and then invoke it
        ###  on the image which must of type PIL.Image.
        img_tensor = tvt.Compose([tvt.ToTensor()])
        img_data = img_tensor(img)
        print("\nshape of the img_data tensor: %s" % str(img_data.shape))               ##  (1,64,64)
        print("\n\n\nimg_tensor: %s" % str(img_data))
                           #
                           #  tensor([[[0.9333, 0.9569, 0.9647,  ..., 0.6745, 0.5882, 0.5569],
                           #           [0.8392, 0.8392, 0.7922,  ..., 0.6275, 0.6980, 0.7922],
                           #           [0.9255, 0.9176, 0.8157,  ..., 0.9725, 0.9725, 0.9882],
                           #           ...,
                           #           [0.4431, 0.4745, 0.5882,  ..., 0.6588, 0.7373, 0.6667],
                           #           [0.4431, 0.5098, 0.5725,  ..., 0.4667, 0.5255, 0.5412],
                           #           [0.5098, 0.5490, 0.5255,  ..., 0.4980, 0.6118, 0.5804]]])
                           #               

        ###  With the image in its 1x64x64 numeric tensor representation, we can apply a
        ###  comparison operator to the individual elements of the tensor to threshold the
        ###  the image data.  Since the pixel values in a grayscale image (we have
        ###  grayscale because of an earlier transformation to the originally color image)
        ###  are between 0 and 255 and since the normalization is going to convert these
        ###  numbers into floating point numbers between 0.0 and 1.0, the thresholding
        ###  operation applied below is going to set to FALSE all pixel values that are
        ###  below 128 and to TRUE all pixel values that are above 128.
        img_data = img_data > 0.5                                                           ## (D)
        print("\n\n\nimg_data: %s" % str(img_data))
                           #
                           #  tensor([[[ True,  True,  True,  ...,  True,  True,  True],
                           #           [ True,  True,  True,  ...,  True,  True,  True],
                           #           [ True,  True,  True,  ...,  True,  True,  True],
                           #           ...,
                           #           [False, False,  True,  ...,  True,  True,  True],
                           #           [False,  True,  True,  ..., False,  True,  True],
                           #           [ True,  True,  True,  ..., False,  True,  True]]])

        ###  In order to visualize the thresholding effect achieved above, we need to
        ###  convert the Boolean pixel values back into numbers, which we can do by
        ###  calling float() on the output image tensor as shown below:
        img_data = img_data.float()                                                         ## (E)
        ###  Now we need to construct a Compose instance with the ToPILImage
        ###  transformation at its heart.  This we can do by:
        to_image_xform = tvt.Compose([tvt.ToPILImage()])                                    ## (F)
        ###  Invoking the callable to_image_xform instance on the tensor returned by the
        ###  call in line (E) gives us the desired PIL.Image object that can be
        ###  visualized.
        img = to_image_xform(img_data)
        self.displayImage6(img, "after_thresholding")

    def display_tensor_as_image2(self, tensor, title=""):
        '''
        This method converts the argument tensor into a photo image that you can display
        in your terminal screen. It can convert tensors of three different shapes
        into images: (3,H,W), (1,H,W), and (H,W), where H, for height, stands for the
        number of pixels in the vertical direction and W, for width, for the same
        along the horizontal direction.  When the first element of the shape is 3,
        that means that the tensor represents a color image in which each pixel in
        the (H,W) plane has three values for the three color channels.  On the other
        hand, when the first element is 1, that stands for a tensor that will be
        shown as a grayscale image.  And when the shape is just (H,W), that is
        automatically taken to be for a grayscale image.
        '''
        tensor_range = (torch.min(tensor).item(), torch.max(tensor).item())
        if tensor_range == (-1.0,1.0):
            ##  The tensors must be between 0.0 and 1.0 for the display:
            print("\n\n\nimage un-normalization called")
            tensor = tensor/2.0 + 0.5     # unnormalize
        plt.figure(title)
        ###  The call to plt.imshow() shown below needs a numpy array. We must also
        ###  transpose the array so that the number of channels (the same thing as the
        ###  number of color planes) is in the last element.  For a tensor, it would be in
        ###  the first element.
        if tensor.shape[0] == 3 and len(tensor.shape) == 3:
#            plt.imshow( tensor.numpy().transpose(1,2,0) )
            plt.imshow( tensor.numpy().transpose(1,2,0) )
        ###  If the grayscale image was produced by calling torchvision.transform's
        ###  ".ToPILImage()", and the result converted to a tensor, the tensor shape will
        ###  again have three elements in it, however the first element that stands for
        ###  the number of channels will now be 1
        elif tensor.shape[0] == 1 and len(tensor.shape) == 3:
            tensor = tensor[0,:,:]
            plt.imshow( tensor.numpy(), cmap = 'gray' )
        ###  For any one color channel extracted from the tensor representation of a color
        ###  image, the shape of the tensor will be (W,H):
        elif len(tensor.shape) == 2:
            plt.imshow( tensor.numpy(), cmap = 'gray' )
        else:
            sys.exit("\n\n\nfrom 'display_tensor_as_image2()': tensor for image is ill formed -- aborting")
        plt.show()


    def display_tensor_as_image(self, tensor, title=""):
        '''
        This method converts the argument tensor into a photo image that you can display
        in your terminal screen. It can convert tensors of three different shapes
        into images: (3,H,W), (1,H,W), and (H,W), where H, for height, stands for the
        number of pixels in the vertical direction and W, for width, for the same
        along the horizontal direction.  When the first element of the shape is 3,
        that means that the tensor represents a color image in which each pixel in
        the (H,W) plane has three values for the three color channels.  On the other
        hand, when the first element is 1, that stands for a tensor that will be
        shown as a grayscale image.  And when the shape is just (H,W), that is
        automatically taken to be for a grayscale image.
        '''
        print("\n\n\ndisplay_tensor_as_image() called with a tensor of type: %s" % tensor.type())
                                                                                  ##  torch.FloatTensor
        ###  The 'plt' in the following statement stands for the plotting module 
        ###  matplotlib.pyplot.  See the module import declarations at the beginning of
        ###  this module.
        plt.figure(title)

        ###  The call to plt.imshow() shown below needs a numpy array. We must also
        ###  transpose the array so that the number of channels (the same thing as the
        ###  number of color planes) is in the last element.  For a tensor, it would be in
        ###  the first element.
        if tensor.shape[0] == 3 and len(tensor.shape) == 3:
            plt.imshow( tensor.numpy().transpose(1,2,0) )

        ###  If the grayscale image was produced by calling torchvision.transform's
        ###  ".ToPILImage()", and the result converted to a tensor, the tensor shape will
        ###  again have three elements in it, however the first element that stands for
        ###  the number of channels will now be 1
        elif tensor.shape[0] == 1 and len(tensor.shape) == 3:
            tensor = tensor[0,:,:]
            plt.imshow( tensor.numpy(), cmap = 'gray' )

        ###  For any one color channel extracted from the tensor representation of a color
        ###  image, the shape of the tensor will be (W,H):
        elif len(tensor.shape) == 2:
            plt.imshow( tensor.numpy(), cmap = 'gray' )
        else:
            sys.exit("\n\n\ntensor for image is ill formed -- aborting")
        plt.show()
            
    def accessing_one_color_plane(self, image_file, n):
        '''
        This method shows how can access the n-th color plane of the argument color image.
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)

        ###  In order to access the color planes individually, it is best to first convert
        ###  the image into a tensor of shape 3xWxH where 3 is for the three color planes,
        ###  W for the width of the image in pixels, and H for the height of the image in
        ###  pixels. To accomplish this PIL.Image to tensor conversion, we first need to
        ###  construct an instance of the ToTensor class by calling its constructor.
        ###  Since the resulting instance will be a callable object, we can treat it like
        ###  a function object and invoke it directly as shown below while supplying the
        ###  image to it as its argument.
        image_to_tensor_converter = tvt.ToTensor()
        image_as_tensor = image_to_tensor_converter(im_pil)

        ###  IT IS VERY IMPORTANT TO REALIZE that while the pixels in the original color
        ###  image are one-byte integers, with values between 0 and 255 for each of the
        ###  color channels, after the image is turned into a tensor, the three values at
        ###  each pixel are converted into a floating point number between 0.0 and 1.0.
        print("\n\n\nimage as tensor: %s" % str(image_as_tensor))
                            #
                            #  tensor([[[0.4588, 0.4588, 0.4627,  ..., 0.2667, 0.2627, 0.2549],    r-plane
                            #           [0.4588, 0.4627, 0.4667,  ..., 0.2784, 0.2745, 0.2667],
                            #           [0.4588, 0.4667, 0.4745,  ..., 0.2784, 0.2745, 0.2667],
                            #           ...,
                            #           [0.2078, 0.2235, 0.2392,  ..., 0.2941, 0.2627, 0.2392],
                            #           [0.2118, 0.2314, 0.2431,  ..., 0.2902, 0.2706, 0.2549],
                            #           [0.2235, 0.2392, 0.2471,  ..., 0.2706, 0.2588, 0.2510]],
                            # 
                            #          [[0.4784, 0.4784, 0.4824,  ..., 0.2902, 0.2863, 0.2784],    g-plane
                            #           [0.4745, 0.4784, 0.4824,  ..., 0.3020, 0.2980, 0.2902],
                            #           [0.4824, 0.4902, 0.4980,  ..., 0.3020, 0.2980, 0.2902],
                            #           ...,
                            #           [0.2510, 0.2667, 0.2824,  ..., 0.3529, 0.3216, 0.2980],
                            #           [0.2549, 0.2745, 0.2863,  ..., 0.3373, 0.3176, 0.3020],
                            #           [0.2667, 0.2824, 0.2902,  ..., 0.3098, 0.2980, 0.2902]],
                            # 
                            #          [[0.2275, 0.2275, 0.2314,  ..., 0.1490, 0.1529, 0.1451],    b-plane
                            #           [0.2353, 0.2392, 0.2431,  ..., 0.1608, 0.1569, 0.1490],
                            #           [0.2392, 0.2471, 0.2549,  ..., 0.1529, 0.1490, 0.1490],
                            #           ...,
                            #           [0.1176, 0.1333, 0.1490,  ..., 0.2000, 0.1686, 0.1451],
                            #           [0.1216, 0.1412, 0.1529,  ..., 0.1882, 0.1686, 0.1529],
                            #           [0.1333, 0.1490, 0.1569,  ..., 0.1647, 0.1529, 0.1451]]])
                            # 
        ###  Two different ways of checking the type of the tensor.  The second call is more
        ###  informative
        print("\n\n\nType of image_as_tensor: %s" % type(image_as_tensor))       ## <class 'torch.Tensor'>
        print("\n[More informative]  Type of image_as_tensor: %s" % image_as_tensor.type())    
                                                                                 ## <class 'torch.FloatTensor'>
        print("\n\n\nShape of image_as_tensor: %s" % str(image_as_tensor.shape)) ## (3, 366, 320)

        ###  The following function will automatically re-convert the 0.0 to 1.0 floating
        ###  point values for at the pixels into the integer one-byte representations for
        ###  displaying the image.
        self.display_tensor_as_image(image_as_tensor,"color image in 'accessing each color plane method'")

        ###  n=0 means the R channel, n=1 the G channel, and n=2 the B channel
        channel_image = image_as_tensor[n]
        print("\n\n\nchannel image: %s" % str(channel_image))
                            # tensor([[0.4588, 0.4588, 0.4627,  ..., 0.2667, 0.2627, 0.2549],
                            #         [0.4588, 0.4627, 0.4667,  ..., 0.2784, 0.2745, 0.2667],
                            #         [0.4588, 0.4667, 0.4745,  ..., 0.2784, 0.2745, 0.2667],
                            #         ...,
                            #         [0.2078, 0.2235, 0.2392,  ..., 0.2941, 0.2627, 0.2392],
                            #         [0.2118, 0.2314, 0.2431,  ..., 0.2902, 0.2706, 0.2549],
                            #         [0.2235, 0.2392, 0.2471,  ..., 0.2706, 0.2588, 0.2510]]) 

        self.display_tensor_as_image(channel_image, "showing just the designated channel" )

        ###   In the statement shown below, the coefficients (0.4, 0.4, 0.2) are a measure
        ###   of how sensitive the human visual system is to the three different color
        ###   channels.  Index 0 is for R, index 1 for G, and the index 2 for B.
        ###
        ###   Note that these weights are predicated on the pixel values being
        ###   represented by floating-point numbers between 0.0 and 1.0 (as opposed
        ###   to the more commonly used one-byte integers).
        gray_tensor = 0.4 * image_as_tensor[0]  +   0.4 * image_as_tensor[1]   + 0.2 * image_as_tensor[2]
        self.display_tensor_as_image(gray_tensor, "showing the grayscale version")


    def extract_data_pixels_in_bb(self, image_file, bounding_box):
        '''
        Mainly used for testing
        '''
        im_arr  =  np.asarray(Image.open(image_file))
        height,width,_ = im_arr.shape
        hmin,hmax = bounding_box[0],bounding_box[2]
        wmin,wmax = bounding_box[1],bounding_box[3]
        im_arr_portion = im_arr[hmin:hmax,wmin:wmax,:]
        return im_arr_portion

    def working_with_hsv_color_space(self, image_file, test=False):
        ''' 
        Shows color image conversion to HSV
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)

        ###   Get the HsV representation of the PIL Image object by invoking
        ###   "convert('HSV')" on it as shown below:
        hsv_image = im_pil.convert('HSV')
        hsv_arr = np.asarray(hsv_image)
        np.save("hsv_arr.npy", hsv_arr)
        image_to_tensor_converter = tvt.ToTensor()
        hsv_image_as_tensor = image_to_tensor_converter( hsv_image )
        ###   The index "1" as the last argument means that we want the three images
        ###   to be concatenated horizontally (meaning, along the 'width' dimension
        ###   as opposed to the 'height' dimension).  If you change that value to
        ###   "0", you will see the three images lined up vertically.
        if test is False:
            self.display_tensor_as_image(torch.cat((hsv_image_as_tensor[0], hsv_image_as_tensor[1], 
                                           hsv_image_as_tensor[2] ),1), "displaying the HSV channels separately")


    def histogramming_the_image(self, image_file):
        '''
        PyTorch based experiments with histogramming the grayscale and the color values in an
        image
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)

        image_to_tensor_converter = tvt.ToTensor()
        color_image_as_tensor = image_to_tensor_converter( im_pil )

        ###   Let's first plot the histogram of the grayscale version of the image:
        gray_tensor = 0.4 * color_image_as_tensor[0]  +   0.4 * color_image_as_tensor[1]   + 0.2 * color_image_as_tensor[2]
        hist_gray = torch.histc(gray_tensor, bins = 10, min = 0.0, max = 1.0)
        hist_gray = hist_gray.div( hist_gray.sum() )

        fig = plt.figure("histogram of the grayscale")
        ax = fig.add_subplot(111)
        ax.bar( np.linspace(1.0, 10.0, num = 10), hist_gray.numpy(), color='black' )
        plt.show()

        ###   We will now plot separately the histogram for each color channel
        ###
        r_tensor = color_image_as_tensor[0]
        g_tensor = color_image_as_tensor[1]
        b_tensor = color_image_as_tensor[2]
        
        ###  Computing the hist for each color channel separately
        hist_r = torch.histc(r_tensor, bins = 10, min = 0.0, max = 1.0)
        hist_g = torch.histc(g_tensor, bins = 10, min = 0.0, max = 1.0)
        hist_b = torch.histc(b_tensor, bins = 10, min = 0.0, max = 1.0)
        
        ###  Normalizing the channel based hists so that the bin counts in each sum to 1.
        hist_r = hist_r.div(hist_r.sum())
        hist_g = hist_g.div(hist_g.sum())
        hist_b = hist_b.div(hist_b.sum())
        
        ### Displaying the channel histograms separately in one figure:
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey = True)
        fig.title = "histogramming the color components separately"
        ax1.bar(np.linspace(1.0, 10.0, num = 10), hist_r.numpy(), color='r')
        ax2.bar(np.linspace(1.0, 10.0, num = 10), hist_g.numpy(), color='g')
        ax3.bar(np.linspace(1.0, 10.0, num = 10), hist_b.numpy(), color='b')
        plt.show();


    def displaying_and_histogramming_images_in_batch1(self, dir_name, batch_size):
        '''
        This method is the first of three such methods in this module for illustrating the
        functionality of matplotlib for simultaneously displaying multiple images and
        the results obtained on them in gridded arrangements.  In the implementation
        shown below, the core idea in this method is to call
        "plt.subplots(2,batch_size)" to create 'batch_size' number of subplot
        objects, called "axes", in the form of a '2xbatch_size' array. We use the first
        row of this grid to display each image in its own subplot object.  And we use
        the second row the grid to display the histogram of the corresponding image
        in the first row.
        '''
        fig, axes = plt.subplots(2,batch_size)
        image_files = glob.glob(dir_name + '/*.jpg')[:batch_size]
        images = list(map(Image.open, image_files))
        images = [tvt.Grayscale()(x) for x in images]
        images = [tvt.Resize((64,64), Image.ANTIALIAS)(x) for x in images]
        im_tensors = [tvt.ToTensor()(x) for x in images]
        im_tensors = [tvt.Normalize(mean=[0.5], std=[0.5])(x) for x in im_tensors]

        for j in range(batch_size):
            axes[0,j].imshow(im_tensors[j][0,:,:].numpy(), cmap='gray')

        hists = [torch.histc(x,  bins=10) for x in im_tensors]
        total_counts = list(map(sum, hists)) 
        hists_normed = [hists[i] / total_counts[i] for i in range(len(hists))]
        for j in range(batch_size):
            axes[1,j].bar(np.linspace(1.0, 10.0, num = 10), hists_normed[j].numpy())  
            axes[1,j].set_yticks([])
        plt.show()

    def displaying_and_histogramming_images_in_batch2(self, dir_name, batch_size):
        '''
        I now show a second approach to display multiple images and their corresponding
        histograms in a gridded display.  Unlike in the previous implementation of
        this method, now we do not call on "plt.subplots()" to create a grid
        structure for displaying the images.  On the other hand, we now call on
        "torchvision.utils.make_grid()" to construct a grid for us.  The grid is
        created by giving an argument like "nrow=4" to it.  When using this method,
        an important thing to keep in mind is that the first argument to make_grip()
        must be a tensor of shape "(B, C, H, W)" where B stands for batch_size, C for
        channels (3 for color, 1 for gray), and (H,W) for the height and width of the
        image. What that means in our example is that we need to synthesize a tensor
        of shape "(8,1,64,64)" in order to be able to call the "make_grid()"
        function. Note that the object returned by the call to make_grid() is a
        tensor unto itself.  For the example shown, if we had called
        "print(grid.shape)" on the "grid" returned by "make_grid()", the answer would
        be "torch.Size([3, 158, 306])" which, after it is converted into a numpy
        array, can be construed by a plotting function as a color image of size
        158x306.
        '''
        image_files = glob.glob(dir_name + '/*.jpg')[:batch_size]
        images = list(map(Image.open, image_files))
        images = [tvt.Grayscale()(x) for x in images]
        images = [tvt.Resize((64,64), Image.ANTIALIAS)(x) for x in images]
        im_tensors = [tvt.ToTensor()(x) for x in images]
        im_tensors = [tvt.Normalize(mean=[0.5], std=[0.5])(x) for x in im_tensors]
        IM_Tensor = torch.zeros(batch_size,1,64,64, dtype=float)
        for i in range(batch_size):
            IM_Tensor[i,0,:,:] = im_tensors[i][0,:,:]
        # for the display:
        grid = tutils.make_grid(IM_Tensor, nrow=4, padding=10, normalize=True)
        npgrid = grid.cpu().numpy()
        plt.imshow(np.transpose(npgrid, (1,2,0)), interpolation='nearest')
        plt.show()

        hists = [torch.histc(x,  bins=10) for x in im_tensors]
        total_counts = list(map(sum, hists)) 
        hists_normed = [hists[i] / total_counts[i] for i in range(len(hists))]
        fig, axes = plt.subplots(nrows=2, ncols=4, sharey = True)    

        for i in range(2):
            for j in range(batch_size // 2):
                k = i * (batch_size//2) + j
                axes[i,j].bar(np.linspace(1.0, 10.0, num = 10), hists_normed[k].numpy())  
        plt.show();

    def displaying_and_histogramming_images_in_batch3(self, dir_name, batch_size):
        '''
        The core idea here is to illustrate two things: (1) The syntax used for the
        'singular' version of the subplot function "plt.subplot()" --- although I'll
        be doing so by actually calling "fig.add_subplot()".  And (2) How you can put
        together multiple multi-image plots by creating multiple Figure objects.
        Figure is the top-level container of plots in matplotlib.  In the 
        implementation shown below, the key statements are: 

            fig1 = plt.figure(1)    
            axis = fig1.add_subplot(241)              
                                                                                                                      
        Calling "add_subplot()" on a Figure object returns an "axis" object.  The
        word "axis" is a misnomer for what should really be called a "subplot".
        Subsequently, you can call display functions lime "imshow()", "bar()", etc.,
        on the axis object to display an individual plot in a gridded arrangement.

        The argument "241" in the first call to "add_subplot()" means that your
        larger goal is to create a 2x4 display of plots and that you are supplying
        the 1st plot for that grid.  Similarly, the argument "242" in the next call
        to "add_subplot()" means that for your goal of creating a 2x4 gridded
        arrangement of plots, you are now supplying the second plot.  Along the same
        lines, the argument "248" toward the end of the code block that you are now
        supplying the 8th plot for the 2x4 arrangement of plots.

        Note how we create a second Figure object in the second major code block.  We
        use it to display the histograms for each of the images shown in the first
        Figure object.  The two Figure containers will be shown in two separate
        windows on your laptop screen.
        '''
        image_files = glob.glob(dir_name + '/*.jpg')[:batch_size]
        images = list(map(Image.open, image_files))
        images = [tvt.Grayscale()(x) for x in images]
        images = [tvt.Resize((64,64), Image.ANTIALIAS)(x) for x in images]
        im_tensors = [tvt.ToTensor()(x) for x in images]
        im_tensors = [tvt.Normalize(mean=[0.5], std=[0.5])(x) for x in im_tensors]

        # Let's make a Figure for the 8 images:
        fig1 = plt.figure(1)
        axis = fig1.add_subplot(241)
        axis.imshow(im_tensors[0][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(242)
        axis.imshow(im_tensors[1][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(243)
        axis.imshow(im_tensors[2][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(244)
        axis.imshow(im_tensors[3][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(245)
        axis.imshow(im_tensors[4][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(246)
        axis.imshow(im_tensors[5][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(247)
        axis.imshow(im_tensors[6][0,:,:].numpy(), cmap='gray') 
        axis = fig1.add_subplot(248)
        axis.imshow(im_tensors[7][0,:,:].numpy(), cmap='gray') 

        # Now let's make a second figure for the 8 corresponding histograms:
        hists = [torch.histc(x,  bins=10) for x in im_tensors]
        total_counts = list(map(sum, hists)) 
        hists_normed = [hists[i] / total_counts[i] for i in range(len(hists))]
        fig2 = plt.figure(2)
        axis = fig2.add_subplot(241)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[0].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(242)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[1].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(243)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[2].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(244)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[3].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(245)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[4].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(246)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[5].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(247)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[6].numpy())
        axis.set_yticks([])
        axis = fig2.add_subplot(248)
        axis.bar(np.linspace(1.0, 10.0, num = 10), hists_normed[7].numpy())
        axis.set_yticks([])
        plt.show()

    def histogramming_and_thresholding(self, image_file):
        '''
        PyTorch based experiments with histogramming and thresholding
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)

        image_to_tensor_converter = tvt.ToTensor()
        ###   Note that "self.original_im" is a PIL Image object
        color_image_as_tensor = image_to_tensor_converter( im_pil )
        print("\n\n\nshape of the image tensor: %s" % str(color_image_as_tensor.shape))
        print("\n\n\ndisplaying the original color image")
        self.display_tensor_as_image(color_image_as_tensor, "original color image")
        ###   Let's first plot the histogram of the grayscale version of the image:
        gray_tensor = 0.4 * color_image_as_tensor[0]  +   0.4 * color_image_as_tensor[1]   + 0.2 * color_image_as_tensor[2]
        print("\n\n\ndisplaying the grayscale version of the color image")
        self.display_tensor_as_image(gray_tensor, "grayscale version of color image")
#        hist_gray = torch.histc(gray_tensor, bins = 255, min = 0.0, max = 256.0)
        hist_gray = torch.histc(gray_tensor, bins = 255, min = 0.0, max = 1.0)
        fig = plt.figure("plot of the histogram")
        ax = fig.add_subplot(111)
        ax.bar( np.linspace(1.0, 256, num = 255), hist_gray.numpy(), color='black' )
        print("\n\n\ndisplaying the histogram of the graylevels")
        plt.show()
        print("\n\n\nNumber of pixels in the histogram: %s" % str(hist_gray.sum()))
        print("\n\n\nhist_gray: %s" % str(hist_gray))
        prob = hist_gray.div( hist_gray.sum() )
        cumulative_prob = prob.cumsum(0)           ##  this gives us a cumulative probability distribution
        print("\n\n\ncumulative_probability: %s" % str(cumulative_prob))
        print("\n\n\nnumber of bins in the cumulative prob: %s" % str(len(cumulative_prob)))       ## 255

        ###  For the rest of the implementation of the Otsu algo, the fact that the
        ###  histogram of the gray levels was calculated with the grayscale values scaled
        ###  to floating point numbers between 0 and 1 by the tensor representation IS NOT
        ###  NOT NOT NOT an issue.  That is because cumulative_prob is an array of 256
        ###  numbers, which each number corresponding one of 256 gray levels.
        hist = prob
        cumu_hist = cumulative_prob
        sigmaBsquared = {k : None for k in range(255)}
        for k in range(255):
             ###   Notice calling ".item()" on one-element tensors to extract the number being
             ###   held by them:
            omega0 = cumu_hist[k].item()
            omega1 = 1 - omega0
            if omega0 > 0 and omega1 > 0:
                mu0 = (1.0/omega0) * sum([i * hist[i].item() for i in range(0,k+1)])     
                mu1 = (1.0/omega1) * sum([i * hist[i].item() for i in range(k+1,255)])      
                sigmaBsquared[k] = omega0 * omega1 * (mu1 - mu0) ** 2
        sigmaBsquared = {k : sigmaBsquared[k] for k in range(255) if sigmaBsquared[k] is not None}

        sorted_thresholds = sorted(sigmaBsquared.items(), key=lambda x: x[1], reverse=True)
        print("\nThe threshold discovered by Otsu: %d" % sorted_thresholds[0][0])
        otsu_threshold = sorted_thresholds[0][0]
        thresholded_gray_image_as_tensor =  torch.clamp( gray_tensor, min=(otsu_threshold / float(256) ) )
#        thresholded_gray_image_as_tensor =  torch.clone(gray_tensor)
        tensor_shape = thresholded_gray_image_as_tensor.shape
        for i in range(tensor_shape[0]):
            for j in range(tensor_shape[1]):
                if thresholded_gray_image_as_tensor[i,j] < (otsu_threshold / float(256)):
                    thresholded_gray_image_as_tensor[i,j] = 0.0
        print("\n\n\nDisplaying the Otsu thresholded image")
        self.display_tensor_as_image(thresholded_gray_image_as_tensor, "otsu thresholded version")


    def convolutions_with_pytorch(self, image_file, kernel):
        '''
        Using torch.nn.functional.conv2d() for demonstrating a single image convolution with
        a specified kernel
        '''
        if os.path.isfile(image_file):
            im_pil = Image.open(image_file)
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)

        image_to_tensor_converter = tvt.ToTensor()
        color_image_as_tensor = image_to_tensor_converter( im_pil )
        gray_tensor = 0.4 * color_image_as_tensor[0]  +   0.4 * color_image_as_tensor[1]   + 0.2 * color_image_as_tensor[2] 
        self.display_tensor_as_image(gray_tensor, "grayscale version for demonstrating convolution")

        gray_section = gray_tensor[100:200, 100:250]
        self.display_tensor_as_image(gray_section, "showing a 100x150 section of the grayscale image")

        ###  converting the convolutional kernel into a tensor:
        op = torch.Tensor( kernel )

        ###  The input to the "nn.functional.conv2d()" requires a tensor of shape
        ###
        ###       (batch_size, num_input_channels, image_height, image_width)
        ###
        ###  Since we are dealing with a single image for demonstrating how you can carry
        ###  out a convolution, in our case batch_size=1.  Additionally, the convolution
        ###  demo is on a grayscale image, so we must set num_input_channels=1.
        input = torch.Tensor(1, 1, gray_section.shape[0], gray_section.shape[1])

        ###   The second arg to the "nn.functional.conv2d()" invoked below is the kernel
        ###   you want to use for the convolution.  This kernel must have the same shape
        ###   as the input image.  So the first two shape parameters are the same as for
        ###   the input image.
        kernel = torch.Tensor(1, 1, op.shape[0], op.shape[1])

        print("\n\n\nshape of input: %s" % str(input.shape))
        print("\n\n\nshape of kernel: %s" % str(kernel.shape))  

        ###   Now we must stuff the gray image where it belongs in the input tensor:
        input[0,0,:,:] = gray_section

        ###   Finally, we must stuff the kernel where it belongs in the kernel tensor:
        kernel[0,0,:,:] = op
        output = nn.functional.conv2d( input, kernel, stride=1, padding=1 )
        print(output)

        ###   In order to respect the input conditions required by the display function,
        ###   we now extract the convolved output from the result of the convolution:
        output = output[0,0,:,:]
        self.display_tensor_as_image( output, "the result of convolution on the image section" )


    def gaussian_smooth(self, pil_grayscale_image):
        '''
        This method smooths an image with a Gaussian of specified sigma.
        '''
        sigma = self.sigma
        width,height = pil_grayscale_image.size
#        gray_image = self.data_im
        gray_image_as_array = np.zeros((height, width), dtype="float")
        for i in range(0, height):
            for j in range(0, width):
                gray_image_as_array[(i,j)] = pil_grayscale_image.getpixel((j,i))
        self.gray_image_as_array = gray_image_as_array
        smoothing_op = _gaussian(sigma)
        smoothed_image_array = _convolution_2D(gray_image_as_array, smoothing_op)
#        self._display_and_save_array_as_image( smoothed_image_array, "_gaussian_smoothed__" + str(sigma) )
        height,width = smoothed_image_array.shape
        maxVal = smoothed_image_array.max()
        minVal = smoothed_image_array.min()
        newimage = Image.new("L", (width,height), (0,))
        for i in range(0, height):
            for j in range(0, width):
                displayVal = int( (smoothed_image_array[(i,j)] - minVal) * (255/(maxVal-minVal)) )
                newimage.putpixel((j,i), displayVal)
        self.displayImage3(newimage, "Gaussian Smoother: close window when done viewing")
#        image_name = "smoothed" + "_" + self.data_im_name
        image_name = "smoothed.png"
        newimage.save(image_name)        
        return newimage


    def visualize_segmentation_in_pseudocolor(self, pixel_blobs, color_map, label=""):
        '''
        Assigns a random color to each blob in the output of an image segmentation algorithm
        '''
        height,width = self.im_array.shape
        colorized_mask_image = Image.new("RGB", (width,height), (0,0,0))
        for blob_idx in sorted(pixel_blobs, key=lambda x: len(pixel_blobs[x]), reverse=True):
            for (i,j) in pixel_blobs[blob_idx]:
                colorized_mask_image.putpixel((j,i), color_map[blob_idx])
        seg_mask_image = colorized_mask_image.resize((width*self.image_size_reduction_factor,
                                          height*self.image_size_reduction_factor), Image.ANTIALIAS)
        self.displayImage6(seg_mask_image, label + "_segmentation")

    def visualize_segmentation_with_mean_gray(self, pixel_blobs, label=""):
        '''
        Assigns the mean color to each each blob in the output of an image segmentation algorithm
        '''
        height,width = self.im_array.shape
        gray_mask_image = Image.new("L", (width,height), (0))
        for blob_idx in sorted(pixel_blobs, key=lambda x: len(pixel_blobs[x]), reverse=True):
            pixel_blob = pixel_blobs[blob_idx]
            pixel_vals = np.array([self.im_array[pixel] for pixel in pixel_blob])
            gray_mean = int(np.mean(pixel_vals))
            for (i,j) in pixel_blobs[blob_idx]:
                gray_mask_image.putpixel((j,i), gray_mean)
        seg_mask_image = gray_mask_image.resize((width*self.image_size_reduction_factor,
                                        height*self.image_size_reduction_factor), Image.ANTIALIAS)
        self.displayImage6(seg_mask_image, label)

    def repair_blobs(self, merged_blobs, color_map, all_pairwise_similarities):
        '''
        The goal here to do a final clean-up of the blob by merging tiny pixel blobs with
        an immediate neighbor, etc.  Such a cleanup requires adjacency info regarding the
        blobs in order to figure out which larger blob to merge a small blob with.
        '''
        pairwise_adjacency  =  all_pairwise_similarities['adjacency']
        pairwise_color_homogeneity_val  =  all_pairwise_similarities['color_homogeneity']
        pairwise_gray_var_comp =  all_pairwise_similarities['gray_var']
        pairwise_texture_comp = all_pairwise_similarities['texture']

        singleton_blobs = [blob_id for blob_id in merged_blobs if len(merged_blobs[blob_id]) == 1]
        sorted_blobs = sorted(merged_blobs, key=lambda x: len(merged_blobs[x]))
        for blob_id in singleton_blobs:
            if blob_id not in merged_blobs: continue
            for blb_id in sorted_blobs:            
                if blb_id == blob_id: continue
                if blb_id not in merged_blobs: continue
                if blb_id > blob_id:
                    pair_label = "%d,%d" % (blb_id,blob_id)
                else:
                    pair_label = "%d,%d" % (blob_id,blb_id)                    
                if blb_id in merged_blobs and blob_id in merged_blobs and pairwise_adjacency[pair_label] is True:
                    merged_blobs[blb_id] += merged_blobs[blob_id]
                    del merged_blobs[blob_id]
        sorted_blobs = sorted(merged_blobs, key=lambda x: len(merged_blobs[x]))
        for blob_id in sorted_blobs:
            if len(merged_blobs[blob_id]) > 200: continue
            neighboring_blobs = []          
            for blb_id in sorted_blobs:
                if blb_id == blob_id: continue
                if blb_id > blob_id:
                    pair_label = "%d,%d" % (blb_id,blob_id)
                else:
                    pair_label = "%d,%d" % (blob_id,blb_id)                    

                if ( (pairwise_adjacency[pair_label] is True) and 
                     (pairwise_color_homogeneity_val[pair_label] < self.color_homogeneity_thresh) and 
                     (pairwise_gray_var_comp[pair_label] < self.gray_var_thresh) and 
                     (pairwise_texture_comp[pair_label] < self.texture_homogeneity_thresh) ):
                    neighboring_blobs.append(blb_id)
            if self.debug: 
                print("\n\n\nneighboring_blobs for blob %d: %s" % (blob_id, str(neighboring_blobs)))
            if len(neighboring_blobs) == 1 and len(merged_blobs[neighboring_blobs[0]]) > len(merged_blobs[blob_id]):
                merged_blobs[neighboring_blobs[0]] += merged_blobs[blob_id] 
                del merged_blobs[blob_id]
        return merged_blobs,color_map                              


    def selective_search_for_region_proposals(self, graph, image_name):
        '''
        This method implements the Selective Search (SS) algorithm proposed by Uijlings,
        van de Sande, Gevers, and Smeulders for creating region proposals for object
        detection.  As mentioned elsewhere here, that algorithm sits on top of the graph
        based image segmentation algorithm that was proposed by Felzenszwalb and
        Huttenlocher.  The parameter 'pixel_blobs' required by the method presented here
        is supposed to be the pixel blobs produced by the Felzenszwalb and Huttenlocher
        algorithm.
        '''
        def are_two_blobs_adjacent(blob1, blob2):
            '''
            We say that two pixel blobs with no pixels in common are adjacent if at
            least one pixel in one block is 8-adjacent to any of the pixels in the other
            pixel blob.
            '''
            for pixel_u in blob1:
                for pixel_v in blob2:
                    if abs(pixel_u[0] - pixel_v[0])  <= 1 and abs(pixel_u[1] - pixel_v[1]) <= 1:
                        return True
            return False

        def estimate_lbp_texture(blob, im_array):
            '''
             This method implements the Local Binary Patterns (LBP) method of characterizing image
            textures. This algorithm, proposed by Ojala, Pietikainen, and Maenpaa
            generates a grayscale and rotationally invariant texture signature through
            what is referred to as an LBP histogram.  For a tutorial introduction to this
            method, see:
                   https://engineering.purdue.edu/kak/Tutorials/TextureAndColor.pdf
            The code presented below is borrowed from this tutorial.
            '''
            import BitVector
            height_coords = [p[0] for p in blob]
            width_coords  = [p[1] for p in blob]
            bb_height_min = min(height_coords)
            bb_height_max = max(height_coords)
            bb_width_min  = min(width_coords)
            bb_width_max  = max(width_coords) 
            ###  Create bounding box for each blob to make it more convenient to apply
            ###  the LBP logic to the blob:
            bb = [[0 for w in range(bb_width_max - bb_width_min + 1)] 
                              for h in range(bb_height_max - bb_height_min + 1)]
            for h in range(bb_height_max - bb_height_min + 1):
                for w in range(bb_width_max - bb_width_min + 1):
                    if (h+bb_height_min, w+bb_width_min) in blob:
                        bb[h][w] = im_array[h+bb_height_min,w+bb_width_min]
            if self.debug:
                print("\n\n\nbb: %s" % str(bb))
            R,P = 1,8
            rowmax,colmax = bb_height_max-bb_height_min+1 - R, bb_width_max - bb_width_min + 1 - R
            lbp_hist = {t:0 for t in range(P+2)}                                  
            ###  Visit each pixel and find the LBP vector at that pixel.            
            for h in range(rowmax):           
                for w in range(colmax):       
                    pattern = [] 
                    for p in range(P):                                               
                        #  We use the index k to point straight down and l to point to the 
                        #  right in a circular neighborhood around the point (i,j). And we 
                        #  use (del_k, del_l) as the offset from (i,j) to the point on the 
                        #  R-radius circle as p varies.
                        del_k,del_l = R*math.cos(2*math.pi*p/P), R*math.sin(2*math.pi*p/P)  
                        if abs(del_k) < 0.001: del_k = 0.0                                  
                        if abs(del_l) < 0.001: del_l = 0.0                                  
                        k, l =  h + del_k, w + del_l                                        
                        k_base,l_base = int(k),int(l)                                       
                        delta_k,delta_l = k-k_base,l-l_base                                 
                        if (delta_k < 0.001) and (delta_l < 0.001):                         
                            image_val_at_p = float(bb[k_base][l_base])                   
                        elif (delta_l < 0.001):                                             
                            image_val_at_p = (1 - delta_k) * bb[k_base][l_base] +  \
                                                          delta_k * bb[k_base+1][l_base] 
                        elif (delta_k < 0.001):                                             
                            image_val_at_p = (1 - delta_l) * bb[k_base][l_base] +  \
                                                          delta_l * bb[k_base][l_base+1] 
                        else:                                                               
                            image_val_at_p = (1-delta_k)*(1-delta_l)*bb[k_base][l_base] + \
                                             (1-delta_k)*delta_l*bb[k_base][l_base+1]  + \
                                             delta_k*delta_l*bb[k_base+1][l_base+1]  + \
                                             delta_k*(1-delta_l)*bb[k_base+1][l_base]   
                        if image_val_at_p >= bb[h][w]:                                  
                            pattern.append(1)                                              
                        else:                                                              
                            pattern.append(0)                                              
                    if self.debug:
                        print("pattern: %s" % pattern)                                         
                    bv =  BitVector.BitVector( bitlist = pattern )                         
                    intvals_for_circular_shifts  =  [int(bv << 1) for _ in range(P)]       
                    minbv = BitVector.BitVector( intVal = min(intvals_for_circular_shifts), size = P )   
                    if self.debug:
                        print("minbv: %s" % minbv)                                               
                    bvruns = minbv.runs()                                                    
                    encoding = None
                    if len(bvruns) > 2:                                                
                        lbp_hist[P+1] += 1                                             
                        encoding = P+1                                                 
                    elif len(bvruns) == 1 and bvruns[0][0] == '1':                     
                        lbp_hist[P] += 1                                               
                        encoding = P                                                   
                    elif len(bvruns) == 1 and bvruns[0][0] == '0':                     
                        lbp_hist[0] += 1                                               
                        encoding = 0                                                   
                    else:                                                              
                        lbp_hist[len(bvruns[1])] += 1                                  
                        encoding = len(bvruns[1])                                      
                    if self.debug:
                        print("encoding: %s" % encoding)                                   
            if self.debug:
                print("\nLBP Histogram: %s" % lbp_hist)                                    
            lbp_array = np.zeros(len(lbp_hist))
            for i in range(len(lbp_hist)): lbp_array[i] = lbp_hist[i]
            return lbp_array
            ###  End of Texture operator definition

        ###  BEGIN CODE FOR SELECTIVE-SEARCH BASED MERGING OF THE BLOBS
        ###  BUT FIRST WE COMPUTE UNARY AND BINARY ATTRIBUTES OF THE BLOBS.
        pixel_blobs,E = graph
        ###  We need access to the underlying image to fetch the pixel values for the blobs
        ###  in question:
        im_array_color  =    np.asarray(self.low_res_PIL_image_color)
        im_array_gray = self.im_array
        ###  Compute unary properties of blobs:
        color_mean_vals = {}
        gray_mean_vals = {}
        gray_vars = {}
        texture_vals = {}
        sorted_blobs = sorted(pixel_blobs, key=lambda x: len(pixel_blobs[x]), reverse=True)
        for blob_id in sorted_blobs:
            pixel_blob = pixel_blobs[blob_id]
#            pixel_vals_color = np.array([im_array_color[pixel] for pixel in pixel_blob])
            pixel_vals_color = [im_array_color[pixel[0],pixel[1],:].tolist() for pixel in pixel_blob]
            pixel_vals_gray =  np.array([im_array_gray[pixel] for pixel in pixel_blob])
            color_mean_vals[blob_id]  = [ float(sum([pix[j] for pix in pixel_vals_color])) / float(len(pixel_vals_color)) for j in range(3) ]
            gray_mean_vals[blob_id] =  np.mean(pixel_vals_gray)
            gray_vars[blob_id] = np.var(pixel_vals_gray)
            texture_vals[blob_id] = estimate_lbp_texture(pixel_blob, im_array_gray)
        if self.debug:
            print("\n\n\ncolor_mean_vals: %s" % str(color_mean_vals))
            print("\n\n\ngray_mean_vals: %s" % str(gray_mean_vals))
            print("\n\n\ngray_vars: %s" % str(gray_vars))
            print("\n\n\ntexture_vals: %s" % str(texture_vals))

        ###  Compute pairwise similarity scores:
        all_pairwise_similarities = {}
        pairwise_adjacency = {}
        pairwise_gray_homogeneity_val = {}
        pairwise_color_homogeneity_val = {}
        pairwise_gray_var_comp = {}
        pairwise_texture_comp   = {}
        all_pairwise_similarities['adjacency'] = pairwise_adjacency
        all_pairwise_similarities['color_homogeneity'] = pairwise_color_homogeneity_val
        all_pairwise_similarities['gray_var'] = pairwise_gray_var_comp
        all_pairwise_similarities['texture'] = pairwise_texture_comp

        for blob_id_1 in pixel_blobs:
            for blob_id_2 in pixel_blobs:
                if blob_id_1 > blob_id_2:
                    pair_id = str("%d,%d" % (blob_id_1,blob_id_2))
                    pairwise_adjacency[pair_id] = True if pair_id in E else False
                    pairwise_gray_homogeneity_val[pair_id] = abs(gray_mean_vals[blob_id_1]
                                                                  - gray_mean_vals[blob_id_2])
                    pairwise_color_homogeneity_val[pair_id] = [abs(color_mean_vals[blob_id_1][j] 
                                               - color_mean_vals[blob_id_2][j]) for j in range(3)]
                    pairwise_gray_var_comp[pair_id] = abs(gray_vars[blob_id_1] - gray_vars[blob_id_2])
                    pairwise_texture_comp[pair_id] =  np.linalg.norm(texture_vals[blob_id_1] - 
                                                                     texture_vals[blob_id_2])
        if self.debug:
            print("\n\n\npairwise_adjacency: %s" % str(pairwise_adjacency))
            print("\n\n\npairwise_gray_homogeneity_val: %s" % str(pairwise_gray_homogeneity_val))
            print("\n\n\npairwise_color_homogeneity_val: %s" % str(pairwise_color_homogeneity_val))
            print("\n\n\npairwise_gray_var_comp: %s" % str(pairwise_gray_var_comp))
            print("\n\n\npairwise_texture_comp: %s" % str(pairwise_texture_comp)) 

        ###  Initialize merged blocks
        merged_blobs = pixel_blobs
        if self.debug:
            print("\n\n\ninitial blobs: %s" % str(pixel_blobs))
        next_blob_id = max(merged_blobs.keys()) + 1
        ###  You have to be careful with the program flow in the blob merging block of
        ###  code shown below in order to deal with the fact that you are modifying the
        ###  blobs as you iterate through them.  You merge two blobs because they are
        ###  adjacent and because they are color and texture homogeneous.  However, when
        ###  you merge two blobs, the original blobs must be deleted from the blob
        ###  dictionary.  At the same time, you must compute the unary properties of the
        ###  newly formed blob and also estimate its pairwise properties with respect to
        ###  all the other blobs in the blob dictionary.
        ss_iterations = 0
        '''
        In this version, we will make only one pass through the 'while' loop shown below
        because, in the UPDATE of the PAIRWISE PROPERTIES, I have not yet included
        those pairs that involve the latest LATEST NEW blob vis-a-vis the other older
        NEWLY DISCOVERED blobs.  In any case, my experience has shown that you need
        just one pass for the images in the Examples directory.  However, it is
        possible that, for complex imagery, you may need multiple (even an
        indeterminate number of) passes through the blob merging code shown below.
        '''
        while ss_iterations < 1:
            sorted_up_blobs = sorted(merged_blobs, key=lambda x: len(merged_blobs[x]))
            sorted_down_blobs = sorted(merged_blobs, key=lambda x: len(merged_blobs[x]), reverse=True)
            for blob_id_1 in sorted_up_blobs:
                if blob_id_1 not in merged_blobs: continue
                for blob_id_2 in sorted_down_blobs[:-1]:        # the largest blob is typically background
                    if blob_id_2 not in merged_blobs: continue
                    if blob_id_1 not in merged_blobs: break
                    if blob_id_1 > blob_id_2:
                        pair_id = "%d,%d" % (blob_id_1,blob_id_2)   
#                        if (pair_id in pairwise_adjacency) and (pairwise_adjacency[pair_id] is True):
                        if (pairwise_color_homogeneity_val[pair_id][0] < self.color_homogeneity_thresh[0])\
                           and                                                                   \
                           (pairwise_color_homogeneity_val[pair_id][1] < self.color_homogeneity_thresh[1])\
                           and                                                                   \
                           (pairwise_color_homogeneity_val[pair_id][2] < self.color_homogeneity_thresh[2])\
                           and                                                                    \
                           (pairwise_gray_var_comp[pair_id] < self.gray_var_thresh)               \
                           and                                                                    \
                           (pairwise_texture_comp[pair_id] < self.texture_homogeneity_thresh):          
                            if self.debug:
                                print("\n\n\nmerging blobs of id %d and %d" % (blob_id_1, blob_id_2))
                            new_merged_blob = merged_blobs[blob_id_1] +  merged_blobs[blob_id_2] 
                            merged_blobs[next_blob_id] = new_merged_blob
                            del merged_blobs[blob_id_1]
                            del merged_blobs[blob_id_2]
                            ###  We need to estimate the unary properties of the newly created
                            ###  blob:
                            pixel_vals_color = [im_array_color[pixel[0],pixel[1],:].tolist() for pixel in 
                                                                                    new_merged_blob]
                            pixel_vals_gray = np.array([im_array_gray[pixel] for pixel in new_merged_blob])
                            color_mean_vals[next_blob_id]  = [float(sum([pix[j] for pix in \
                                pixel_vals_color])) / float(len(pixel_vals_color)) for j in range(3)]
                            gray_mean_vals[next_blob_id] = np.mean(pixel_vals_gray)
                            gray_vars[next_blob_id] = np.var(pixel_vals_gray)
                            texture_vals[next_blob_id] = estimate_lbp_texture(new_merged_blob, im_array_gray)
                            ###  Now that we have merged two blobs, we need to create entries
                            ###  in pairwise dictionaries for entries related to this new blob
                            for blb_id in sorted_up_blobs:
                                if blb_id not in merged_blobs: continue
                                if next_blob_id > blb_id:
                                    pair_id = "%d,%d" % (next_blob_id, blb_id)
                                    pairwise_adjacency[pair_id] = \
                  True if are_two_blobs_adjacent(new_merged_blob, pixel_blobs[blb_id]) else False
                                    pairwise_color_homogeneity_val[pair_id] = \
                  [abs(color_mean_vals[next_blob_id][j]  - color_mean_vals[blb_id][j]) for j in range(3)]
                                    pairwise_gray_homogeneity_val[pair_id] = \
                                         abs(gray_mean_vals[next_blob_id] - gray_mean_vals[blb_id])
                                    pairwise_gray_var_comp[pair_id] = \
                                         abs(gray_vars[next_blob_id] - gray_vars[blb_id])
                                    pairwise_texture_comp[pair_id] =  \
                                np.linalg.norm(texture_vals[next_blob_id] - texture_vals[blb_id])
                    next_blob_id += 1                           
            ss_iterations += 1

        num_pixels_in_final_merged_blobs = sum( [len(blob) for _,blob in merged_blobs.items()] )
        print("\n\n\ntotal number of pixels in all merged blobs: %d" % num_pixels_in_final_merged_blobs)
        ###  color_map is a dictionary with blob_ids as keys and the assigned values the color
        ###  assigned to each blob for its visualization

        bounding_boxes = {}
        retained_vertex_list = []
        total_pixels_in_output = 0
        color_map = {}
        for blob_idx in sorted(merged_blobs, key=lambda x: len(merged_blobs[x]), reverse=True)[:self.max_num_blobs_expected]:
            color_map[blob_idx] = (random.randint(0,255), random.randint(0,255),random.randint(0,255))
            all_pixels_in_blob = merged_blobs[blob_idx]
            total_pixels_in_output += len(all_pixels_in_blob)
            retained_vertex_list.append(blob_idx)
            height_coords = [p[0] for p in all_pixels_in_blob]
            width_coords  = [p[1] for p in all_pixels_in_blob]
            bb_height_min = min(height_coords)
            bb_height_max = max(height_coords)
            bb_width_min  = min(width_coords)
            bb_width_max  = max(width_coords)
            bounding_boxes[blob_idx] = [bb_height_min, bb_width_min, bb_height_max, bb_width_max]

        print("\n\n\nTotal number of pixels in output blobs: %d" % total_pixels_in_output)
        title = "selective_search_based_bounding_boxes"
        arr_height,arr_width = im_array_gray.shape
        colorized_mask_image = Image.new("RGB", (arr_width,arr_height), (0,0,0))
        for blob_idx in retained_vertex_list:
            for (i,j) in merged_blobs[blob_idx]:
                colorized_mask_image.putpixel((j,i), color_map[blob_idx])
        mw = Tkinter.Tk()
        winsize_w,winsize_h = None,None
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_w = int(0.5 * screen_width)
            winsize_h = int(winsize_w * (arr_height * 1.0 / arr_width))            
        else:
            winsize_h = int(0.5 * screen_height)
            winsize_w = int(winsize_h * (arr_width * 1.0 / arr_height))
        scaled_image =  colorized_mask_image.copy().resize((winsize_w,winsize_h), Image.ANTIALIAS)
        mw.title(title) 
        mw.configure( height = winsize_h, width = winsize_w )         
        canvas = Tkinter.Canvas( mw,                         
                             height = winsize_h,            
                             width = winsize_w,             
                             cursor = "crosshair" )   
        canvas.pack(fill=BOTH, expand=True)
        frame = Tkinter.Frame(mw)                            
        frame.pack( side = 'bottom' )                             
        Tkinter.Button( frame,         
                text = 'Save',                                    
                command = lambda: canvas.postscript(file = title + ".eps") 
              ).pack( side = 'left' )                             
        Tkinter.Button( frame,                        
                text = 'Exit',                                    
                command = lambda: mw.destroy(),                    
              ).pack( side = 'right' )                            
        photo = ImageTk.PhotoImage( scaled_image )
        canvas.create_image(winsize_w//2,winsize_h//2,image=photo)
        scale_w = winsize_w / float(arr_width)
        scale_h = winsize_h / float(arr_height)
        for v in bounding_boxes:
            bb = bounding_boxes[v]
            canvas.create_rectangle( (bb[1]*scale_w,bb[0]*scale_h,(bb[3]+1)*scale_w,(bb[2]+1)*scale_h), width='3', outline='red' )
        canvas.update()
        mw.update()
        print("\n\n\nIterations used: %d" % self.iterations_used)
        print("\n\n\nNumber of region proposals: %d" % len(bounding_boxes))
        mw.mainloop()
        if os.path.isfile(title + ".eps"):
            Image.open(title + ".eps").save(title + ".png")
            os.remove(title + ".eps")
        retained_vertices = {}
        for u in retained_vertex_list:
            retained_vertices[u] = merged_blobs[u]
        return retained_vertices, color_map

        def are_two_blobs_color_homogeneous(blob1, blob2, image):
            color_in_1 = [image[pixel] for pixel in blob1]
            color_in_2 = [image[pixel] for pixel in blob2]
            mean_diff = abs(np.mean(color_in_1) - np.mean(color_in_2))
            var1 = np.var(color_in_1)
            var2 = np.var(color_in_2)
            if var1 < self.var_threshold and var2 < self.var_thresh and mean_diff < self.mean_diff_thresh:
                return True
            return False

        def are_two_blobs_texture_homogeneous(blob1, blob2, image):
            lbp_hist_1 = estimate_lbp_texture(blob1)
            lpb_hist_2 = estimate_lbp_texture(blob2)
            if np_norm( np.to_array(lbp_hist_1) - np.to_array(lbp_hist_2) ) < self.texture_thresh:
                return True
            return False               

    def graph_based_segmentation(self, image_name, num_blobs_wanted=None):
        '''
        This is an implementation of the Felzenszwalb and Huttenlocher algorithm for
        graph-based segmentation of images.  At the moment, it is limited to working
        on grayscale images.
        '''
        ###  image_name may be a file, in which case it needs to be opened, or directly
        ###  a PIL.Image object
        try:
            image_pil_color = Image.open(image_name)
        except:
            image_pil_color = image_name                           ### needed for the interactive mode
        width,height = image_pil_color.size
        kay = self.kay
        print("\n\n\nImage of WIDTH=%d  and  HEIGHT=%d   being processed by graph_based_segmentation" % (width,height))
        self.displayImage6(image_pil_color, "input_image -- size: width=%d height=%d" % (width,height))
        kay = self.kay
        input_image_gray = image_pil_color.copy().convert("L")      ## first convert the image to grayscale
        if self.sigma > 0:
            smoothed_image_gray = self.gaussian_smooth(input_image_gray) 
            ##  we do NOT need a smoothed version of the original color image
        else:
            smoothed_image_gray = input_image_gray
        image_size_reduction_factor = self.image_size_reduction_factor
        width_to = width // image_size_reduction_factor
        height_to = height // image_size_reduction_factor
        if self.image_normalization_required:
            gray_resized_normalized = tvt.Compose( [tvt.Grayscale(num_output_channels = 1), tvt.Resize((height_to,width_to)), tvt.ToTensor(), tvt.Normalize(mean=[0.5], std=[0.5]) ] )
            color_resized_normalized = tvt.Compose( [tvt.Resize((height_to,width_to)), tvt.ToTensor(), tvt.Normalize(mean=[0.5], std=[0.5]) ] )
            img_tensor_gray = gray_resized_normalized(smoothed_image_gray)
            ### we do NOT need a smoothed version of the color image:
            img_tensor_color = color_resized_normalized(image_pil_color)
            to_image_xform = tvt.Compose([tvt.ToPILImage()])
            low_res_PIL_image_gray = to_image_xform(img_tensor_gray)
            low_res_PIL_image_color = to_image_xform(img_tensor_color)
        else:
            low_res_PIL_image_gray = smoothed_image_gray.resize((width_to,height_to), Image.ANTIALIAS)
            low_res_PIL_image_color = image_pil_color.resize((width_to,height_to), Image.ANTIALIAS)
        self.low_res_PIL_image_gray = low_res_PIL_image_gray
        self.low_res_PIL_image_color = low_res_PIL_image_color
        self.displayImage6(low_res_PIL_image_gray, "low_res_version_of_original")
        ###   VERY IMPORTANT:  In PIL.Image, the first coordinate refers to the width-wise coordinate
        ###                    and the second coordinate to the height-wise coordinate pointing downwards
        ###   However:         In numpy and tensor based representations, the first coordinate is the
        ###                    height-wise coordinate and the second coordinate the width-wise coordinate.
        ###   Since the tensor operations cause the grayscale image to be represented
        ###   by a 3D array, with the first dimension set to the number of channels
        ###   (which would be 1 for a grayscale image), we need to ignore it:
        img_array = np.asarray(low_res_PIL_image_gray)
        self.im_array = img_array
        arr_height,arr_width = img_array.shape
        print("\n\n\nimage array size: height=%d  width=%d" % (arr_height,arr_width))

        initial_num_graph_vertices = arr_height * arr_width
        print("\n\n\nnumber of vertices in graph: %d" % initial_num_graph_vertices)
        initial_graph_vertices = {i : None for i in range(initial_num_graph_vertices)}
        for i in range(initial_num_graph_vertices):
            h,w  =  i // arr_width, i - (i // arr_width)*arr_width
            initial_graph_vertices[i] = [(h,w)]
      
        initial_graph_edges = {}
        MInt = {}
        for i in range(initial_num_graph_vertices):
            hi,wi =  initial_graph_vertices[i][0]
            for j in range(initial_num_graph_vertices):
                hj,wj =  initial_graph_vertices[j][0]
                if i > j:
                    if abs(hi - hj) <= 1 and abs(wi - wj) <= 1:
                        ###  In order to take care of the error report: "overflow encountered in 
                        ###  ubyte_scalars":
                        ###  Since pixels are stored as the uint8 datatype (which implies that
                        ###  their values are only expected to be between 0 and 255), any 
                        ###  arithmetic on them could violate that range.  So we must first convert
                        ###  into the int datatype:
                        initial_graph_edges["%d,%d" % (i,j) ]  =  abs(int(img_array[hi,wi]) - int(img_array[hj,wj]))
                        MInt[ "%d,%d" % (i,j) ] = kay

        ###   INTERNAL DIFFERENCE property at the initial vertices in the graph
        ###   Internal Difference is defined as the max edge weight between the pixels in the pixel
        ###   blob represented by a graph vertex.
        Int_prop = {v : 0.0 for v in initial_graph_vertices}
        MInt_prop = {v : kay for v in initial_graph_vertices}   
        if self.debug:
            print("\n\n\ninitial graph_vertices: %s" % str(sorted(initial_graph_vertices.items())))
            print("\n\n\nnumber of vertices in initial graph: %d" % len(initial_graph_vertices))
            print("\n\n\ninitial graph_edges: %s" % str(sorted(initial_graph_edges.items())))
            print("\n\n\nnumber of edges in initial graph: %d" % len(initial_graph_edges))
            print("\n\n\ninitial MInt: %s" % str(sorted(MInt.items())))
            print("\n\n\nnumber of edges in initial MInt: %d" % len(MInt))

        initial_graph = (copy.deepcopy(initial_graph_vertices), copy.deepcopy(initial_graph_edges))

        def find_all_connections_for_a_vertex(vert, graph):
            vertices = graph[0]
            edges    = graph[1]
            print("pixels in vertex %d: %s" % (vert, str(vertices[vert])))
            connected_verts_in_graph = []
            for edge in edges:
                end1,end2 = int(edge[:edge.find(',')]), int(edge[edge.find(',')+1 :])                 
                if vert == end1:
                    connected_verts_in_graph.append(end2)
                elif vert == end2:
                    connected_verts_in_graph.append(end1)
            return connected_verts_in_graph

        index_for_new_vertex = len(initial_graph_vertices)
        master_iteration_index = 0
        self.iterations_terminated = False
        ###   graph = (V,E) with both V and E as dictionaries.
        ###   NOTE: The edges E in the graph stand for 'Dif(C1,C2)' in F&H
        def seg_gen( graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay ):
            print("\n\n\n=========================== Starting iteration %d ==========================" % master_iteration_index)
            V,E = graph
            if num_blobs_wanted is not None and len(initial_graph[0]) > num_blobs_wanted:
                if num_blobs_wanted is not None and len(V) <= num_blobs_wanted: return graph
            if self.debug:
                print("\n\n\nV: %s" % str(V))
                print("\n\n\nE: %s" % str(E))
                print("\n\n\nMInt: %s" % str(MInt))
            max_iterations = self.max_iterations
            print("\n\n\nNumber of region proposals at the current level of merging: %d" % len(V))
            if len(E) == 0:
                print("\n\n\nThe graph has no edges left")
                return graph
            sorted_vals_and_edges = list( sorted( (v,k) for k,v in E.items() ) )
            sorted_edges = [x[1] for x in sorted_vals_and_edges]
            print("\n\n\n[Iter Index: %d] Dissimilarity value associated with the most similar edge: %s" % (master_iteration_index, str(sorted_vals_and_edges[0])))
            print("\nOne dot represents 100 possible merge operations in the graph representation of the image\n")
            edge_counter = 0

            ###  You have to be careful when debugging the code in the following 'for' loop.  The
            ###  problem is that the sorted edge list is made from the original edge list which is
            ###  modified by the code in the 'for' loop.  Let's say that the edge 'u,v' is a good
            ###  candidate for the merging of the pixel blobs corresponding to u and v.  After the
            ###  'for' loop has merged these two blobs corresponding to these two vertices, the 'u'
            ###  and 'v' vertices in the graph do not exist and must be deleted.  Deleting these two
            ###  vertices requires that we must also delete from E all the other edges that connect
            ###  with either u and v.  So if you are not careful, it is possible that in the next
            ###  go-around in the 'for' loop you will run into one of those edges as the next
            ###  candidate for the merging of two vertices.
            for edge in sorted_edges:
                if edge not in E: continue
                edge_counter += 1
                if edge_counter % 100 == 0: 
                    sys.stdout.write(". ")
                    sys.stdout.flush()
                ###  This is the fundamental condition for merging the pixel blobs corresponding to
                ###  two different vertices: The 'Diff' edge weight, which is represented by the
                ###  edge weight E[edge], must be LESS than the minimum of the Internal component
                ###  edge weight, the minimum being over the two vertices for the two pixel blobs.
                if E[edge] >  MInt[edge]: 
                    del E[edge]
                    del MInt[edge]
                    continue 
                ###  Let us now find the identities of the vertices of the edge whose two vertices
                ###  are the best candidates for the merging of the two pixel blobs.
                vert1,vert2 = int(edge[:edge.find(',')]), int(edge[edge.find(',')+1 :])
                if self.debug:
                    print("\n\n\n[Iter Index: %d] The least dissimilar two vertices in the graph are: %s and %s" % 
                                                                              (master_iteration_index, vert1, vert2))
                ###   Since we want to go through all the edges in 'sorted_edges" WHILE we are
                ###   deleting the vertices that are merged and the edges that are no longer
                ###   relevant because of vertex deletion, we need to be careful going forward:
                if (vert1 not in V) or (vert2 not in V): continue
                affected_edges = []
                for edg in E:
                    end1,end2 = int(edg[:edg.find(',')]), int(edg[edg.find(',')+1 :])                
                    if (vert1 == end1) or (vert1 == end2) or (vert2 == end1) or (vert2 == end2):
                        affected_edges.append(edg)
                if self.debug:
                    print("\n\n\naffected edges to be deleted: %s" % str(affected_edges))
                for edg in affected_edges:
                    del E[edg]
                    del MInt[edg]
                merged_blob = V[vert1] + V[vert2]
#                change_flag = True
                if self.debug:
                    print("\n\n\nAdded vertex %d to V" % index_for_new_vertex)
                V[index_for_new_vertex] = merged_blob
                if self.debug:
                    print("\n\n\n[Iter Index: %d] index for new vertex: %d   and the merged blob: %s" % 
                                                   (master_iteration_index, index_for_new_vertex, str(merged_blob)))
                ###   We will now calculate the Int (Internal Difference) and MInt property to be
                ###   to be associated with the newly created vertex in the graph:
                within_blob_edge_weights = []
                for u1 in merged_blob:
                    i = u1[0] * arr_width + u1[1]
                    for u2 in merged_blob:
                        j = u2[0] * arr_width + u2[1]
                        if i > j:
                            ij_key = "%d,%d" % (i,j)
                            if ij_key in initial_graph_edges:
                                within_blob_edge_weights.append( initial_graph_edges[ ij_key ] )
                Int_prop[index_for_new_vertex] = max(within_blob_edge_weights) 
                MInt_prop[index_for_new_vertex] = Int_prop[index_for_new_vertex] + kay / float(len(merged_blob))
                ###   Now we must calculate the new graph edges formed by the connections between the newly
                ###   formed node and all other nodes.  However, we first must delete the two nodes that
                ###   we just merged:
                del V[vert1] 
                del V[vert2]
                del Int_prop[vert1]
                del Int_prop[vert2]
                del MInt_prop[vert1]
                del MInt_prop[vert2]
                if self.debug:
                    print("\n\n\nThe modified vertices: %s" % str(V))
                for v in sorted(V):
                    if v == index_for_new_vertex: continue
                    ###   we need to store the edge weights for the pixel-to-pixel edges
                    ###   in the initial graph with one pixel in the newly constructed
                    ###   blob and other in a target blob
                    pixels_in_v = V[v]
                    for u_pixel in merged_blob:
                        i = u_pixel[0] * arr_width + u_pixel[1]
                        inter_blob_edge_weights = []
                        for v_pixel in pixels_in_v:
                            j = v_pixel[0] * arr_width + v_pixel[1]
                            if i > j: 
                                ij_key = "%d,%d" % (i,j)
                            else:
                                ij_key = "%d,%d" % (j,i)
                            if ij_key in initial_graph_edges:
                                inter_blob_edge_weights.append( initial_graph_edges[ij_key ] )
                        if len(inter_blob_edge_weights) > 0:
                            uv_key = str("%d,%d" % (index_for_new_vertex,v))
                            E[uv_key] = min(inter_blob_edge_weights)                        
                            MInt[uv_key] = min( MInt_prop[index_for_new_vertex], MInt_prop[v] )
                if self.debug:
                    print("\n\n\nAt the bottom of for-loop for edges ---   E: %s" % str(E))
                    print("\n\nMInt: %s" % str(MInt))
                index_for_new_vertex = index_for_new_vertex + 1   
#                if change_flag is False: break
    
            new_graph = (copy.deepcopy(V), copy.deepcopy(E))
            MInt = copy.deepcopy(MInt)
            if self.debug:
                print("\n\n\nnew graph at end of iteration: %s" % str(new_graph))
            if master_iteration_index == max_iterations:
                return new_graph
            else:
                self.iterations_used = master_iteration_index
                master_iteration_index += 1
                if self.iterations_terminated:
                    return new_graph
                else:
                    return seg_gen(new_graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay)  
        segmented_graph = seg_gen(initial_graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay)
        if self.debug:
            print("\n\n\nsegmented_graph: %s" % str(segmented_graph))
        bounding_boxes = {}
        total_pixels_in_output = 0
        retained_vertex_list = []
        for vertex in sorted(segmented_graph[0]):
            all_pixels_in_blob = segmented_graph[0][vertex]
            total_pixels_in_output += len(all_pixels_in_blob)
            if len(all_pixels_in_blob) > self.min_size_for_graph_based_blobs:
                print("\n\n\npixels in blob indexed %d: %s" % (vertex, str(segmented_graph[0][vertex])))
                retained_vertex_list.append(vertex)
                height_coords = [p[0] for p in all_pixels_in_blob]
                width_coords  = [p[1] for p in all_pixels_in_blob]
                bb_height_min = min(height_coords)
                bb_height_max = max(height_coords)
                bb_width_min  = min(width_coords)
                bb_width_max  = max(width_coords)
                """
                if (abs(bb_width_max - bb_width_min) <= 2 or abs(bb_height_max - bb_height_min) <= 2): continue
                if abs(bb_width_max - bb_width_min) < 0.1 * abs(bb_height_max - bb_height_min): continue
                if abs(bb_height_max - bb_height_min)  <  0.1 * abs(bb_width_max - bb_width_min): continue
                """
                bounding_boxes[vertex] = [bb_height_min, bb_width_min, bb_height_max, bb_width_max]

        print("\n\n\nTotal number of pixels in output blobs: %d" % total_pixels_in_output)
        title = "graph_based_bounding_boxes"
        mw = Tkinter.Tk()
        winsize_w,winsize_h = None,None
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_w = int(0.5 * screen_width)
            winsize_h = int(winsize_w * (arr_height * 1.0 / arr_width))            
        else:
            winsize_h = int(0.5 * screen_height)
            winsize_w = int(winsize_h * (arr_width * 1.0 / arr_height))
        scaled_image =  image_pil_color.copy().resize((winsize_w,winsize_h), Image.ANTIALIAS)
        mw.title(title) 
        mw.configure( height = winsize_h, width = winsize_w )         
        canvas = Tkinter.Canvas( mw,                         
                             height = winsize_h,            
                             width = winsize_w,             
                             cursor = "crosshair" )   
        canvas.pack(fill=BOTH, expand=True)
        frame = Tkinter.Frame(mw)                            
        frame.pack( side = 'bottom' )                             
        Tkinter.Button( frame,         
                text = 'Save',                                    
                command = lambda: canvas.postscript(file = title + ".eps") 
              ).pack( side = 'left' )                             
        Tkinter.Button( frame,                        
                text = 'Exit',                                    
                command = lambda: mw.destroy(),                    
              ).pack( side = 'right' )                            
        photo = ImageTk.PhotoImage( scaled_image )
        canvas.create_image(winsize_w//2,winsize_h//2,image=photo)
        scale_w = winsize_w / float(arr_width)
        scale_h = winsize_h / float(arr_height)
        for v in bounding_boxes:
            bb = bounding_boxes[v]
            print("\n\n\nFor region proposal with ID %d, the bounding box is: %s" % (v, str(bb)))
            canvas.create_rectangle( (bb[1]*scale_w,bb[0]*scale_h,(bb[3]+1)*scale_w,(bb[2]+1)*scale_h), width='3', outline='red' )
        canvas.update()
        mw.update()
        print("\n\n\nIterations used: %d" % self.iterations_used)
        print("\n\n\nNumber of region proposals: %d" % len(bounding_boxes))
        mw.mainloop()
        if os.path.isfile(title + ".eps"):
            Image.open(title + ".eps").save(title + ".png")
            os.remove(title + ".eps")

        retained_vertices = {}
        retained_edges = {}
        for u in retained_vertex_list:
            retained_vertices[u] = segmented_graph[0][u]
            for v in retained_vertex_list:
                if u > v:
                    uv_label = "%d,%d"%(u,v)
                    if uv_label in segmented_graph[1]:
                        retained_edges[uv_label] = segmented_graph[1][uv_label]
        output_segmentation_graph = (retained_vertices, retained_edges)

        ###  color_map is a dictionary with blob_ids as keys and the assigned values the color
        ###  assigned to each blob for its visualization
        color_map = {}
        for blob_idx in sorted(output_segmentation_graph[0], key=lambda x: len(output_segmentation_graph[0][x]), reverse=True):
            if blob_idx not in color_map:
                color_map[blob_idx] = (random.randint(0,255), random.randint(0,255),random.randint(0,255))
        return output_segmentation_graph, color_map

    def graph_based_segmentation_for_arrays(self, which_one):
        '''
        This method is provided to enable the user to play with small arrays when
        experimenting with graph-based logic for image segmentation.  At the moment, it
        provides three small arrays, one under the "which_one==1" option, one under the
        "which_one==2" option, and the last under the "which_one==3" option.
        '''
        print("\nExperiments with selective-search logic on made-up arrays")
        kay = self.kay
        if which_one == 1:
            img_array = np.zeros((20,24), dtype = np.float)             ## height=20  width=24
            arr_height,arr_width = img_array.shape
            print("\n\n\nimage array size: height=%d  width=%d" % (arr_height,arr_width))
            for h in range(arr_height):
                for w in range(arr_width):
                    if ((4 < h < 8) or (12 < h < 16)) and ((4 < w < 10) or (14 < w < 20)):
                        img_array[h,w] = 200
            print("\n\n\nimg_array:")
            print(img_array)
            image_pil = Image.fromarray(img_array.astype('uint8'), 'L')
            self.displayImage3(image_pil, "made-up image")
            image_pil.save("array1.png")
        elif which_one == 2:
            img_array = np.zeros((6,10), dtype = np.float)             ## height=6  width=10
            arr_height,arr_width = img_array.shape
            print("\n\n\nimage array size: height=%d  width=%d" % (arr_height,arr_width))
            for h in range(arr_height):
                for w in range(arr_width):
                    if (1 < h < 4) and ((1 < w < 4) or (6 < w < 9)):
                        img_array[h,w] = 128
            print("\n\n\nimg_array:")
            print(img_array)
            image_pil = Image.fromarray(img_array.astype('uint8'), 'L')
            self.displayImage3(image_pil, "made-up image")
            image_pil.save("array2.png")
        else:
            img_array = np.zeros((20,24), dtype = np.float)            ## height=20  width=24
            arr_height,arr_width = img_array.shape
            print("\n\n\nimage array size: height=%d  width=%d" % (arr_height,arr_width))
            for h in range(4,arr_height-4):
                for w in range(4,arr_width-4):
                    img_array[h,w] = 100
            for h in range(8,arr_height-8):
                for w in range(8,arr_width-8):
                    img_array[h,w] = 200
            print("\n\n\nimg_array:")
            print(img_array)
            image_pil = Image.fromarray(img_array.astype('uint8'), 'L')
            self.displayImage3(image_pil, "made-up image")
            image_pil.save("array3.png")

        initial_num_graph_vertices = arr_height * arr_width
        print("\n\n\nnumber of vertices in graph: %d" % initial_num_graph_vertices)
        initial_graph_vertices = {i : None for i in range(initial_num_graph_vertices)}
        for i in range(initial_num_graph_vertices):
            h,w  =  i // arr_width, i - (i // arr_width)*arr_width
            initial_graph_vertices[i] = [(h,w)]
      
        initial_graph_edges = {}
        MInt = {}
        for i in range(initial_num_graph_vertices):
            hi,wi =  initial_graph_vertices[i][0]
            for j in range(initial_num_graph_vertices):
                hj,wj =  initial_graph_vertices[j][0]
                if i > j:
                    if abs(hi - hj) <= 1 and abs(wi - wj) <= 1:
                        initial_graph_edges[ "%d,%d" % (i,j) ]  =  abs(img_array[hi,wi] - img_array[hj,wj])
                        MInt[ "%d,%d" % (i,j) ] = kay

        ###   INTERNAL DIFFERENCE property at the initial vertices in the graph
        ###   Internal Difference is defined as the max edge weight between the pixels in the pixel
        ###   blob represented by a graph vertex.
        Int_prop = {v : 0.0 for v in initial_graph_vertices}
        ###   MInt_prop at each vertex is the Int_prop plus the kay divided by the cardinality of the blob
        MInt_prop = {v : kay for v in initial_graph_vertices}   
        if self.debug:
            print("\n\n\ninitial graph_vertices: %s" % str(sorted(initial_graph_vertices.items())))
            print("\n\n\nnumber of vertices in initial graph: %d" % len(initial_graph_vertices))
            print("\n\n\ninitial graph_edges: %s" % str(sorted(initial_graph_edges.items())))
            print("\n\n\nnumber of edges in initial graph: %d" % len(initial_graph_edges))
            print("\n\n\ninitial MInt: %s" % str(sorted(MInt.items())))
            print("\n\n\nnumber of edges in initial MInt: %d" % len(MInt))

        initial_graph = (copy.deepcopy(initial_graph_vertices), copy.deepcopy(initial_graph_edges))

        def find_all_connections_for_a_vertex(vert, graph):
            vertices = graph[0]
            edges    = graph[1]
            print("pixels in vertex %d: %s" % (vert, str(vertices[vert])))
            connected_verts_in_graph = []
            for edge in edges:
                end1,end2 = int(edge[:edge.find(',')]), int(edge[edge.find(',')+1 :])                 
                if vert == end1:
                    connected_verts_in_graph.append(end2)
                elif vert == end2:
                    connected_verts_in_graph.append(end1)
            return connected_verts_in_graph

        index_for_new_vertex = len(initial_graph_vertices)
        master_iteration_index = 0
        self.iterations_terminated = False

        ###   graph = (V,E) with both V and E as dictionaries.
        ###   NOTE: The edges E in the graph stand for 'Dif(C1,C2)' in F&H
        def seg_gen( graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay ):
            print("\n\n\n=========================== Starting iteration %d ========================== \n\n\n" % 
                                                                                             master_iteration_index)
            V,E = graph
            if self.debug:
                print("\n\n\nV: %s" % str(V))
                print("\n\n\nE: %s" % str(E))
                print("\n\n\nMInt: %s" % str(MInt))
            max_iterations = self.max_iterations
            print("\n\n\nNumber of region proposals at the current level of merging: %d" % len(V))
            if len(E) == 0:
                print("\n\n\nThe graph has no edges left")
                return graph
            sorted_vals_and_edges = sorted( (v,k) for k,v in E.iteritems() )
            sorted_edges = [x[1] for x in sorted_vals_and_edges]
            print("\n\n\n[Iter Index: %d] Dissimilarity value associated with the most similar edge: %s" % 
                                                              (master_iteration_index, str(sorted_vals_and_edges[0])))
            """    
            if sorted_vals_and_edges[0][0] > 0.5:
                print("\n\n\nIterations terminated at iteration index: %d" % master_iteration_index)
                self.iterations_terminated = True
                return graph
            """
#            change_flag = False
            print("\nOne dot represents TEN possible merge operations in the graph representation of the image\n")
            if self.debug:
                print("\n\n\nBefore entering the edge loop --- sorted_edges: %s" % str(sorted_edges))
                print("\n\n\nBefore entering the edge loop --- E: %s" % str(E))
                print("\n\n\nBefore entering the edge loop --- MInt: %s" % str(MInt))
                print("\n\n\nBefore entering the edge loop --- vertices: %s" % str(V))
            edge_counter = 0
            for edge in sorted_edges:
                if edge not in E: continue
                edge_counter += 1
                if edge_counter % 10 == 0: 
                    sys.stdout.write(". ") 
                    sys.stdout.flush()
                if edge not in MInt:
                    sys.exit("MInt does not have an entry for %s" % edge)
                if edge not in E:
                    sys.exit("\n\n\nE does not have an entry for %s" % edge)
                if E[edge] >  MInt[edge]: 
                    del E[edge]
                    del MInt[edge]
                    continue 
                vert1,vert2 = int(edge[:edge.find(',')]), int(edge[edge.find(',')+1 :])
                if self.debug:
                    print("\n\n\n[Iter Index: %d] The least dissimilar two vertices in the graph are: %s and %s" % 
                                                                                (master_iteration_index, vert1, vert2))
                ###   Since we want to go through all the edges in 'sorted_edges" WHILE we are
                ###   deleting the vertices that are merged and the edges that are no longer
                ###   relevant because of vertex deletion, we need to be careful going forward:
                if (vert1 not in V) or (vert2 not in V): continue
                affected_edges = []
                for edg in E:
                    end1,end2 = int(edg[:edg.find(',')]), int(edg[edg.find(',')+1 :])                
                    if (vert1 == end1) or (vert1 == end2) or (vert2 == end1) or (vert2 == end2):
                        affected_edges.append(edg)
                if self.debug:
                    print("\n\n\naffected edges to be deleted: %s" % str(affected_edges))
                for edg in affected_edges:
                    del E[edg]
                    del MInt[edg]
                merged_blob = V[vert1] + V[vert2]
#                change_flag = True
                if self.debug:
                    print("\n\n\nAdded vertex %d to V" % index_for_new_vertex)
                V[index_for_new_vertex] = merged_blob
                if self.debug:
                    print("\n\n\n[Iter Index: %d] index for new vertex: %d   and the merged blob: %s" % 
                                                     (master_iteration_index, index_for_new_vertex, str(merged_blob)))
                ###   We will now calculate the Int (Internal Difference) and MInt property to be
                ###   to be associated with the newly created vertex in the graph:
                within_blob_edge_weights = []
                for u1 in merged_blob:
                    i = u1[0] * arr_width + u1[1]
                    for u2 in merged_blob:
                        j = u2[0] * arr_width + u2[1]
                        if i > j:
                            ij_key = "%d,%d" % (i,j)
                            if ij_key in initial_graph_edges:
                                within_blob_edge_weights.append( initial_graph_edges[ ij_key ] )
                Int_prop[index_for_new_vertex] = max(within_blob_edge_weights) 
                MInt_prop[index_for_new_vertex] = Int_prop[index_for_new_vertex] + kay / float(len(merged_blob))
                ###   Now we must calculate the new graph edges formed by the connections between the newly
                ###   formed node and all other nodes.  However, we first must delete the two nodes that
                ###   we just merged:
                del V[vert1] 
                del V[vert2]
                del Int_prop[vert1]
                del Int_prop[vert2]
                del MInt_prop[vert1]
                del MInt_prop[vert2]
                if self.debug:
                    print("\n\n\nThe modified vertices: %s" % str(V))
                for v in sorted(V):
                    if v == index_for_new_vertex: continue
                    ###   we need to store the edge weights for the pixel-to-pixel edges
                    ###   in the initial graph with one pixel in the newly constructed
                    ###   blob and other in a target blob
                    pixels_in_v = V[v]
                    for u_pixel in merged_blob:
                        i = u_pixel[0] * arr_width + u_pixel[1]
                        inter_blob_edge_weights = []
                        for v_pixel in pixels_in_v:
                            j = v_pixel[0] * arr_width + v_pixel[1]
                            if i > j: 
                                ij_key = "%d,%d" % (i,j)
                            else:
                                ij_key = "%d,%d" % (j,i)
                            if ij_key in initial_graph_edges:
                                inter_blob_edge_weights.append( initial_graph_edges[ij_key ] )
                        if len(inter_blob_edge_weights) > 0:
                            uv_key = "%d,%d" % (index_for_new_vertex,v)
                            E[uv_key] = min(inter_blob_edge_weights)                        
                            MInt[uv_key] = min( MInt_prop[index_for_new_vertex], MInt_prop[v] )
                if self.debug:
                    print("\n\n\nAt the bottom of for-loop for edges ---   E: %s" % str(E))
                    print("\n\nMInt: %s" % str(MInt))
                index_for_new_vertex = index_for_new_vertex + 1   
#                if change_flag is False: break
            new_graph = (copy.deepcopy(V), copy.deepcopy(E))
            MInt = copy.deepcopy(MInt)
            if self.debug:
                print("\n\n\nnew graph at end of iteration: %s" % str(new_graph))
            if master_iteration_index == max_iterations:
                return new_graph
            else:
                self.iterations_used = master_iteration_index - 1
                master_iteration_index += 1
                if self.iterations_terminated:
                    return new_graph
                else:
                    return seg_gen(new_graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay)  
        segmented_graph = seg_gen(initial_graph, MInt, index_for_new_vertex, master_iteration_index, Int_prop, MInt_prop, kay)
        if self.debug:
            print("\n\n\nsegmented_graph: %s" % str(segmented_graph))
        bounding_boxes = {}
        total_pixels_in_output = 0
        for vertex in sorted(segmented_graph[0]):
            all_pixels_in_blob = segmented_graph[0][vertex]
            total_pixels_in_output += len(all_pixels_in_blob)
            if len(all_pixels_in_blob) > 1:
                print("\n\n\npixels in blob indexed %d: %s" % (vertex, str(segmented_graph[0][vertex])))
                height_coords = [p[0] for p in all_pixels_in_blob]
                width_coords  = [p[1] for p in all_pixels_in_blob]
                bb_height_min = min(height_coords)
                bb_height_max = max(height_coords)
                bb_width_min  = min(width_coords)
                bb_width_max  = max(width_coords)
                """
                if (abs(bb_width_max - bb_width_min) <= 2 or abs(bb_height_max - bb_height_min) <= 2): continue
                if abs(bb_width_max - bb_width_min) < 0.1 * abs(bb_height_max - bb_height_min): continue
                if abs(bb_height_max - bb_height_min)  <  0.1 * abs(bb_width_max - bb_width_min): continue
                """
                bounding_boxes[vertex] = [bb_height_min, bb_width_min, bb_height_max, bb_width_max]

        print("\n\n\nTotal number of pixels in output blobs: %d" % total_pixels_in_output)
        title = "graph_based_bounding_boxes"
        mw = Tkinter.Tk()
        winsize_x,winsize_y = None,None
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_w = int(0.5 * screen_width)
            winsize_h = int(winsize_w * (arr_height * 1.0 / arr_width))            
        else:
            winsize_h = int(0.5 * screen_height)
            winsize_w = int(winsize_h * (arr_width * 1.0 / arr_height))
        scaled_image =  image_pil.copy().resize((winsize_w,winsize_h), Image.ANTIALIAS)
        mw.title(title) 
        mw.configure( height = winsize_h, width = winsize_x )         
        canvas = Tkinter.Canvas( mw,                         
                             height = winsize_h,            
                             width = winsize_w,             
                             cursor = "crosshair" )   
        canvas.pack(fill=BOTH, expand=True)
        frame = Tkinter.Frame(mw)                            
        frame.pack( side = 'bottom' )                             
        Tkinter.Button( frame,         
                text = 'Save',                                    
                command = lambda: canvas.postscript(file = title + ".eps") 
              ).pack( side = 'left' )                             
        Tkinter.Button( frame,                        
                text = 'Exit',                                    
                command = lambda: mw.destroy(),                    
              ).pack( side = 'right' )                            
        photo = ImageTk.PhotoImage( scaled_image )
        canvas.create_image(winsize_w//2,winsize_h//2,image=photo)
        scale_w = winsize_w / float(arr_width)
        scale_h = winsize_h / float(arr_height)
        for v in bounding_boxes:
            bb = bounding_boxes[v]
            print("\n\n\nFor region proposal with ID %d, the bounding box is: %s" % (v, str(bb)))
            canvas.create_rectangle( (bb[1]*scale_w,bb[0]*scale_h,(bb[3]+1)*scale_w,(bb[2]+1)*scale_h), width='3', outline='red' )
        canvas.update()
        mw.update()
        print("\n\n\nIterations used: %d" % self.iterations_used)
        print("\n\n\nNumber of region proposals: %d" % len(bounding_boxes))
        mw.mainloop()
        if os.path.isfile(title + ".eps"):
            Image.open(title + ".eps").save(title + ".png")
            os.remove(title + ".eps")
        return segmented_graph[0]


    def extract_image_region_interactively_by_dragging_mouse(self, image_name):
        '''
        This is one method you can use to apply selective search algorithm to just a
        portion of your image.  This method extract the portion you want.  You click
        at the upper left corner of the rectangular portion of the image you are
        interested in and you then drag the mouse pointer to the lower right corner.
        Make sure that you click on "save" and "exit" after you have delineated the
        area.
        '''
        global delineator_image      ### global so that methods like _on_mouse_motion etc can access it
        global delineator_polygon    ###                  """
        print("Drag the mouse pointer to delineate the portion of the image you want to extract:")
        RegionProposalGenerator.image_portion_delineation_coords = []
        pil_image = Image.open(image_name).convert("L")
        RegionProposalGenerator.image_type = "L"
        image_portion_delineation_coords = []
        mw = Tkinter.Tk() 
        mw.title("Click and then drag the mouse pointer --- THEN CLICK SAVE and EXIT")
        width,height = pil_image.size

        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_x = int(0.5 * screen_width)
            winsize_y = int(winsize_x * (height * 1.0 / width))            
        else:
            winsize_y = int(0.5 * screen_height)
            winsize_x = int(winsize_y * (width * 1.0 / height))
        display_pil_image = pil_image.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        scale_w = width / float(winsize_x)
        scale_h = height / float(winsize_y)
        delineator_image =  display_pil_image.copy()
        extracted_image =  display_pil_image.copy()
        self.extracted_image_portion_file_name = os.path.basename(image_name)
        mw.configure(height = winsize_y, width = winsize_x) 
        photo_image = ImageTk.PhotoImage(display_pil_image)
        canvasM = Tkinter.Canvas( mw,   
                                  width = winsize_x,
                                  height =  winsize_y,
                                  cursor = "crosshair" )  
        canvasM.pack( side = 'top' )   
        frame = Tkinter.Frame(mw)  
        frame.pack( side = 'bottom' ) 
        Tkinter.Button( frame, 
                        text = 'Save', 
                        command = lambda: RegionProposalGenerator.extracted_image.save(self.extracted_image_portion_file_name) 
                      ).pack( side = 'left' )  
        Tkinter.Button( frame,  
                        text = 'Exit',
                        command = lambda: mw.destroy()
                      ).pack( side = 'right' )  
        canvasM.bind("<ButtonPress-1>", lambda e: self._start_mouse_motion(e, delineator_image))
        canvasM.bind("<ButtonRelease-1>", lambda e:self._stop_mouse_motion(e, delineator_image))
        canvasM.bind("<B1-Motion>", lambda e: self._on_mouse_motion(e, delineator_image))
        canvasM.create_image( 0,0, anchor=NW, image=photo_image)
        canvasM.pack(fill=BOTH, expand=1)
        mw.mainloop()       
        self.displayImage3(RegionProposalGenerator.extracted_image, "extracted image -- close window when done viewing")
        extracted_image = RegionProposalGenerator.extracted_image
        width_ex, height_ex = extracted_image.size
        extracted_image = extracted_image.resize( (int(width_ex * scale_w), int(height_ex * scale_h)), Image.ANTIALIAS )
        self.displayImage6(extracted_image, "extracted image")
        return extracted_image


    def extract_image_region_interactively_through_mouse_clicks(self, image_file):
        '''
        This method allows a user to use a sequence of mouse clicks in order to specify a
        region of the input image that should be subject to further processing.  The
        mouse clicks taken together define a polygon. The method encloses the
        polygonal region by a minimum bounding rectangle, which then becomes the new
        input image for the rest of processing.
        '''
        global delineator_image
        global delineator_coordinates
        print("Click mouse in a clockwise fashion to specify the portion you want to extract:")
        RegionProposalGenerator.image_portion_delineation_coords = []

        if os.path.isfile(image_file):
            pil_image = Image.open(image_file).convert("L")
        else:
            sys.exit("the image file %s does not exist --- aborting" % image_file)
        RegionProposalGenerator.image_type = "L"
        mw = Tkinter.Tk() 
        mw.title("Place mouse clicks clockwise --- THEN CLICK SAVE and EXIT")
        width,height = pil_image.size
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_x = int(0.5 * screen_width)
            winsize_y = int(winsize_x * (height * 1.0 / width))            
        else:
            winsize_y = int(0.5 * screen_height)
            winsize_x = int(winsize_y * (width * 1.0 / height))
        display_pil_image = pil_image.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        scale_w = width / float(winsize_x)
        scale_h = height / float(winsize_y)
        delineator_image =  display_pil_image.copy()
        extracted_image =  display_pil_image.copy()
        self.extracted_image_portion_file_name = "image_portion_of_" + image_file
        mw.configure(height = winsize_y, width = winsize_x) 
        photo_image = ImageTk.PhotoImage(display_pil_image)
        canvasM = Tkinter.Canvas( mw,   
                                  width = width,
                                  height =  height,
                                  cursor = "crosshair" )  
        canvasM.pack( side = 'top' )   
        frame = Tkinter.Frame(mw)  
        frame.pack( side = 'bottom' ) 
        Tkinter.Button( frame, 
                        text = 'Save', 
                         command = RegionProposalGenerator._extract_and_save_image_portion_polygonal
                      ).pack( side = 'left' )  
        Tkinter.Button( frame,  
                        text = 'Exit',
                        command = lambda: mw.destroy()
                      ).pack( side = 'right' )  
        canvasM.bind("<Button-1>", lambda e: self._image_portion_delineator(e, delineator_image))
        canvasM.create_image( 0,0, anchor=NW, image=photo_image)
        canvasM.pack(fill=BOTH, expand=1)
        mw.mainloop()       
#        self.displayImage3(RegionProposalGenerator.extracted_image, "extracted image -- close window when done viewing")
        self.displayImage3(extracted_image, "extracted image -- close window when done viewing")
        extracted_image = RegionProposalGenerator.extracted_image
        width_ex, height_ex = extracted_image.size
        extracted_image = extracted_image.resize( (int(width_ex * scale_w), int(height_ex * scale_h)), Image.ANTIALIAS )
        self.displayImage6(extracted_image, "extracted image")
        return extracted_image


    def extract_rectangular_masked_segment_of_image(self, horiz_start, horiz_end, vert_start, vert_end):
        '''
        Keep in mind the following convention used in the PIL's Image class: the first
        coordinate in the args supplied to the getpixel() and putpixel() methods is for
        the horizontal axis (the x-axis, if you will) and the second coordinate for the
        vertical axis (the y-axis).  On the other hand, in the args supplied to the
        array and matrix processing functions, the first coordinate is for the row
        index (meaning the vertical) and the second coordinate for the column index
        (meaning the horizontal).  In what follows, I use the index 'i' with its
        positive direction going down for the vertical coordinate and the index 'j'
        with its positive direction going to the right as the horizontal coordinate. 
        The origin is at the upper left corner of the image.
        '''
        masked_image = self.original_im.copy()
        width,height = masked_image.size 
        mask_array = np.zeros((height, width), dtype="float")
        for i in range(0, height):
            for j in range(0, width):
                if (vert_start < i < vert_end) and (horiz_start < j < horiz_end):
                    mask_array[(i,j)] = 1
        self._display_and_save_array_as_image( mask_array, "_mask__" )
        for i in range(0, height):
            for j in range(0, width):
                if mask_array[(i,j)] == 0:
                    masked_image.putpixel((j,i), (0,0,0)) 
        self.displayImage3(masked_image, "a segment of the image")

    def displayImage(self, argimage, title=""):
        '''
        Displays the argument image.  The display stays on for the number of seconds
        that is the first argument in the call to tk.after() divided by 1000.
        '''
        width,height = argimage.size
        winsize_x,winsize_y = width,height
        if width > height:
            winsize_x = 600
            winsize_y = int(600.0 * (height * 1.0 / width))
        else:
            winsize_y = 600
            winsize_x = int(600.0 * (width * 1.0 / height))
        display_image = argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        tk = Tkinter.Tk()
        tk.title(title)   
        frame = Tkinter.Frame(tk, relief=RIDGE, borderwidth=2)
        frame.pack(fill=BOTH,expand=1)
        photo_image = ImageTk.PhotoImage( display_image )
        label = Tkinter.Label(frame, image=photo_image)
        label.pack(fill=X, expand=1)
        tk.after(1000, self._callback, tk)    # display will stay on for just one second
        tk.mainloop()

    def displayImage2(self, argimage, title=""):
        '''
        Displays the argument image.  The display stays on until the user closes the
        window.  If you want a display that automatically shuts off after a certain
        number of seconds, use the previous method displayImage().
        '''
        width,height = argimage.size
        winsize_x,winsize_y = width,height
        if width > height:
            winsize_x = 600
            winsize_y = int(600.0 * (height * 1.0 / width))
        else:
            winsize_y = 600
            winsize_x = int(600.0 * (width * 1.0 / height))
        display_image = argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        tk = Tkinter.Tk()
        tk.title(title)   
        frame = Tkinter.Frame(tk, relief=RIDGE, borderwidth=2)
        frame.pack(fill=BOTH,expand=1)
        photo_image = ImageTk.PhotoImage( display_image )
        label = Tkinter.Label(frame, image=photo_image)
        label.pack(fill=X, expand=1)
        tk.mainloop()

    def displayImage3(self, argimage, title=""):
        '''
        Displays the argument image (which must be of type Image) in its actual size.  The 
        display stays on until the user closes the window.  If you want a display that 
        automatically shuts off after a certain number of seconds, use the method 
        displayImage().
        '''
        width,height = argimage.size
        tk = Tkinter.Tk()
        winsize_x,winsize_y = None,None
        screen_width,screen_height = tk.winfo_screenwidth(),tk.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_x = int(0.5 * screen_width)
            winsize_y = int(winsize_x * (height * 1.0 / width))            
        else:
            winsize_y = int(0.5 * screen_height)
            winsize_x = int(winsize_y * (width * 1.0 / height))
        display_image = argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        tk.title(title)   
        frame = Tkinter.Frame(tk, relief=RIDGE, borderwidth=2)
        frame.pack(fill=BOTH,expand=1)
        photo_image = ImageTk.PhotoImage( display_image )
        label = Tkinter.Label(frame, image=photo_image)
        label.pack(fill=X, expand=1)
        tk.mainloop()

    def displayImage4(self, argimage, title=""):
        '''
        Displays the argument image (which must be of type Image) in its actual size without 
        imposing the constraint that the larger dimension of the image be at most half the 
        corresponding screen dimension.
        '''
        width,height = argimage.size
        tk = Tkinter.Tk()
        tk.title(title)   
        frame = Tkinter.Frame(tk, relief=RIDGE, borderwidth=2)
        frame.pack(fill=BOTH,expand=1)
        photo_image = ImageTk.PhotoImage( argimage )
        label = Tkinter.Label(frame, image=photo_image)
        label.pack(fill=X, expand=1)
        tk.mainloop()

    def displayImage5(self, argimage, title=""):
        '''
        This does the same thing as displayImage4() except that it also provides for
        "save" and "exit" buttons.  This method displays the argument image with more 
        liberal sizing constraints than the previous methods.  This method is 
        recommended for showing a composite of all the segmented objects, with each
        object displayed separately.  Note that 'argimage' must be of type Image.
        '''
        width,height = argimage.size
        winsize_x,winsize_y = None,None
        mw = Tkinter.Tk()
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_x = int(0.8 * screen_width)
            winsize_y = int(winsize_x * (height * 1.0 / width))            
        else:
            winsize_y = int(0.8 * screen_height)
            winsize_x = int(winsize_y * (width * 1.0 / height))
        mw.configure(height = winsize_y, width = winsize_x)         
        mw.title(title)   
        canvas = Tkinter.Canvas( mw,                         
                             height = winsize_y,
                             width = winsize_x,
                             cursor = "crosshair" )   
        canvas.pack( side = 'top' )                               
        frame = Tkinter.Frame(mw)                            
        frame.pack( side = 'bottom' )                             
        Tkinter.Button( frame,         
                text = 'Save',                                    
                command = lambda: canvas.postscript(file = title + ".eps") 
              ).pack( side = 'left' )                             
        Tkinter.Button( frame,                        
                text = 'Exit',                                    
                command = lambda: mw.destroy(),                    
              ).pack( side = 'right' )                            
        photo = ImageTk.PhotoImage(argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS))
        canvas.create_image(winsize_x/2,winsize_y/2,image=photo)
        mw.mainloop()
        if os.path.isfile(title + ".eps"):
            Image.open(title + ".eps").save(title + ".png")
            os.remove(title + ".eps")

    def displayImage6(self, argimage, title=""):
        '''
        For the argimge which must be of type PIL.Image, this does the same thing as
        displayImage3() except that it also provides for "save" and "exit" buttons.
        '''
        width,height = argimage.size
        mw = Tkinter.Tk()
        winsize_x,winsize_y = None,None
        screen_width,screen_height = mw.winfo_screenwidth(),mw.winfo_screenheight()
        if screen_width <= screen_height:
            winsize_x = int(0.5 * screen_width)
            winsize_y = int(winsize_x * (height * 1.0 / width))            
        else:
            winsize_y = int(0.5 * screen_height)
            winsize_x = int(winsize_y * (width * 1.0 / height))
        display_image = argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS)
        mw.title(title)   
        canvas = Tkinter.Canvas( mw,                         
                             height = winsize_y,
                             width = winsize_x,
                             cursor = "crosshair" )   
        canvas.pack( side = 'top' )                               
        frame = Tkinter.Frame(mw)                            
        frame.pack( side = 'bottom' )                             
        Tkinter.Button( frame,         
                text = 'Save',                                    
                command = lambda: canvas.postscript(file = title + ".eps") 
              ).pack( side = 'left' )                             
        Tkinter.Button( frame,                        
                text = 'Exit',                                    
                command = lambda: mw.destroy(),                    
              ).pack( side = 'right' )                            
        photo = ImageTk.PhotoImage(argimage.resize((winsize_x,winsize_y), Image.ANTIALIAS))
        canvas.create_image(winsize_x/2,winsize_y/2,image=photo)
        mw.mainloop()
        if os.path.isfile(title + ".eps"):
            Image.open(title + ".eps").save(title + ".png")
            os.remove(title + ".eps")

    @staticmethod    
    def _start_mouse_motion(evt, input_image):
        global delineator_image
        display_width, display_height = delineator_image.size
        canvasM = evt.widget   
        markX, markY = evt.x, evt.y   
        RegionProposalGenerator.image_portion_delineation_coords.append((markX,markY))
        print("Button pressed at: x=%s  y=%s\n" % (markX, markY)) 
        canvasM.create_oval( markX-5, markY-5, markX+5, markY+5, outline="red", fill="green", width = 2 )  

    @staticmethod    
    def _stop_mouse_motion(evt, input_image):
        global delineator_image
        display_width, display_height = delineator_image.size
        canvasM = evt.widget   
        markX, markY = evt.x, evt.y   
        RegionProposalGenerator.image_portion_delineation_coords.append((markX,markY))
        print("Button pressed at: x=%s  y=%s\n" % (markX, markY))
        points = RegionProposalGenerator.image_portion_delineation_coords
        canvasM.create_rectangle(points[0][0], points[0][1], points[-1][0], points[-1][1], outline="red", fill="green", width = 2 ) 
        RegionProposalGenerator.extracted_image = RegionProposalGenerator._extract_image_portion_rectangular()

    @staticmethod    
    def _on_mouse_motion(evt, input_image):
        global delineator_image
        display_width, display_height = delineator_image.size
        canvasM = evt.widget   
        markX, markY = evt.x, evt.y   
        RegionProposalGenerator.image_portion_delineation_coords.append((markX,markY))
        points = RegionProposalGenerator.image_portion_delineation_coords
        canvasM.create_rectangle(points[0][0], points[0][1], points[-1][0], points[-1][1], outline="red", fill="green", width = 2 ) 

    @staticmethod    
    def _image_portion_delineator(evt, input_image):
        global delineator_image
        display_width, display_height = delineator_image.size
        canvasM = evt.widget   
        markX, markY = evt.x, evt.y   
        RegionProposalGenerator.image_portion_delineation_coords.append((markX,markY))
        print("Button pressed at: x=%s  y=%s\n" % (markX, markY)) 
        canvasM.create_oval( markX-10, markY-10, markX+10, markY+10, outline="red", fill="green", width = 2 )  

    @staticmethod    
    def _extract_image_portion_rectangular():
        '''
        This extracts a rectangular region of the image as specified by dragging the mouse pointer
        from the upper left corner of the region to its lower right corner.  After extracting the
        region, it sets the 'original_im' and 'data_im' attributes of the RegionProposalGenerator
        instance to the region extracted.
        '''
        global delineator_image
        width,height = delineator_image.size
        polygon = RegionProposalGenerator.image_portion_delineation_coords
        extracted_width = polygon[-1][0] - polygon[0][0]
        extracted_height = polygon[-1][1] - polygon[0][1]
        extracted_image = Image.new(RegionProposalGenerator.image_type, (extracted_width,extracted_height), (0))
        for x in range(0, extracted_width):        
            for y in range(0, extracted_height):
                extracted_image.putpixel((x,y), delineator_image.getpixel((polygon[0][0]+x, polygon[0][1]+y)))
        return extracted_image

    @staticmethod    
    def _extract_and_save_image_portion_polygonal():
        '''
        This extracts a polygonal region of the image as specified by clicking the mouse in a clockwise
        direction.  After extracting the region, it sets the 'original_im' and 'data_im' attributes of 
        the RegionProposalGenerator instance to the minimum bounding rectangle portion of the original 
        image that encloses the polygonal --- with the pixels outside the polygonal area set to 0.
        '''
        global delineator_image
        width,height = delineator_image.size
        polygon = RegionProposalGenerator.image_portion_delineation_coords
        if len(polygon) <= 2:
            sys.exit("You need MORE THAN TWO mouse clicks (in a clockwise fashion) to extract a region --- aborting!")
        x_min,x_max = min([x for (x,y) in polygon]),max([x for (x,y) in polygon])
        y_min,y_max = min([y for (x,y) in polygon]),max([y for (x,y) in polygon]) 
        extracted_width = x_max - x_min
        extracted_height = y_max - y_min
        extracted_image = Image.new(RegionProposalGenerator.image_type, (extracted_width,extracted_height), (0))
        polygon = [(x - x_min, y - y_min) for (x,y) in polygon]
        for x in range(0, extracted_width):        
            for y in range(0, extracted_height):
                number_of_crossings = 0
                raster_line = (0,y,x,y)
                for l in range(0,len(polygon)-1):
                    line = (polygon[l][0],polygon[l][1],polygon[l+1][0],polygon[l+1][1])
                    if _line_intersection(raster_line, line):
                        number_of_crossings += 1
                last_line = (polygon[l+1][0],polygon[l+1][1],polygon[0][0],polygon[0][1])
                number_of_crossings += _line_intersection(raster_line, last_line)
                if number_of_crossings % 2 == 1:
                    extracted_image.putpixel((x,y), delineator_image.getpixel((x+x_min, y + y_min)))
        RegionProposalGenerator.extracted_image = extracted_image

#______________________  Private Methods of the RegionProposalGenerator Class  ________________

    def _callback(self,arg):                       ## needed in Test directory for flashing an
        arg.destroy()                              ##  image momentarily


#_________________________  End of RegionProposalGenerator Class Definition ___________________________



#______________________________    Test code follows    _________________________________

if __name__ == '__main__': 
    pass