diff --git a/.gitignore b/.gitignore index 72364f9..3287d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +*.npy +*.xml +pedestrain/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.idea/PyBOW.iml b/.idea/PyBOW.iml deleted file mode 100644 index 6711606..0000000 --- a/.idea/PyBOW.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index b45b51e..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index cf1bf7a..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/FLANN_histogram.png b/FLANN_histogram.png deleted file mode 100644 index 64e3c99..0000000 Binary files a/FLANN_histogram.png and /dev/null differ diff --git a/LICENSE b/LICENSE index fc465b5..975f47f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ MIT License -Copyright (c) 2017 nextgensparx +Copyright (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +Copyright (c) 2017 Sipho Mateke (github: siphomateke) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 6bb2d8b..0da541c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,97 @@ -# PyBOW -A Python implementation of the Bag of words algorithm using OpenCV +# Python Bag of (Visual) Words (BoW) and Histogram of Oriented Gradient (HOG) based Object Detection -Requires numpy, matplotlib and OpenCV 3.2.0 +An exemplar python implementation of the Bag of (Visual) Words and Histogram of Oriented Gradient (HOG) feature based object detection (BoW or HOG features, SVM classification) using [OpenCV](http://www.opencv.org). + +Examples used for teaching within the undergraduate Computer Science programme +at [Durham University](http://www.durham.ac.uk) (UK) by [Prof. Toby Breckon](https://breckon.org/toby/). + +All tested with [OpenCV](http://www.opencv.org) 3.4.x and Python 3.x (requiring numpy also). + +---- + +### OpenCV Setup: + +To ensure you have your [OpenCV](http://www.opencv.org) setup correctly to use these in these examples - please follow the suite of testing examples [here](https://github.com/tobybreckon/python-examples-ip/blob/master/TESTING.md). + +---- + +### Details: + +You are provided with a set of 7 example files that can be run individually as follows: + +- ```hog_training.py``` - performs object detection batch training using Histogram of Oriented Gradients (HOG) and SVM classification. + +- ```hog_testing.py``` - performs object detection batch testing using Histogram of Oriented Gradients (HOG) and SVM classification. + +- ```hog_detector.py``` - performs object detection via sliding window search using Histogram of Oriented Gradients (HOG) and SVM classification over a directory of specified images. + +- ```bow_training.py``` - performs object detection batch training using a bag of visual words (BoW) approach and SVM classification. + +- ```bow_testing.py``` - performs object detection batch testing using a bag of visual words (BoW) approach and SVM classification. + +- ```bow_detector.py``` - performs object detection via sliding window search using a bag of visual words (BoW) approach and SVM classification over a directory of specified images. + +- ```selective_search.py``` - performs selective search to generate object windows as an alternative to sliding window search (generates windows only, does not perform integrated object detection). + +and additional supporting code in ```utils.py``` (image loading / feature extraction) and ```sliding_window.py``` (multi-scale sliding window) which are imported into the above. + +---- + +### How to download and run: + +Download each file as needed (or download/uncompress a zip from [here](https://github.com/tobybreckon/python-bow-hog-object-detection/archive/master.zip)) or to download the entire repository in an environment where you have git installed try: +``` +git clone https://github.com/tobybreckon/python-bow-hog-object-detection +cd python-bow-hog-object-detection +``` +In order to perform training you will need to first download the dataset, which can be achieved as follows on a linux/unix based system (or can alteratively be downloaded from [here](ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar) - ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar ) +``` +sh ./download-data.sh +``` +If you run into errors such as _"libpng error: IDAT: invalid distance too far back"_ when running the commands below you may need to also run: +``` +sh ./fix-pngs.sh +``` +[Durham Students - just download the data sets from the [DUO](http://duo.dur.ac.uk) to avoid this.] + +To perform training of the bag of works approach you can simply run as follows (or alternatively how you run python scripts in your environment): +``` +python3 ./bow_training.py +``` +whichs will perform the stages of loading image training set, feature descriptor extraction, k-means clustering and SVM classifier training and output two resulting files ```svm_bow.xml``` (the trained SVM classifier) and ```bow_dictionary.npy``` (the BoW set of visual codewords / cluster centres - known as the dictionary). + +To perform batch testing of the bag of works approach you can then simply use (or alternatively ...): +``` +python3 ./bow_testing.py +``` +which will load the ```svm_bow.xml``` and ```bow_dictionary.npy``` created from training and report statistical testing performance over an independent set of test images (not used during training). + +To perform detection over a set of images you can then simply use (or alternatively ...): +``` +python3 ./bow_detector.py +``` +which will again load the ```svm_bow.xml``` and ```bow_dictionary.npy``` created from training but now perform multi-scale sliding window based detection over a set of images in a directory specified by the variable ```directory_to_cycle = "...."``` at the top of this python script file. + +The above instructions can be repeated for the set of ```hog_...py``` examples to perform training (to produce a single ```svm_hog.xml``` file), testing and subsequent detection as before. + +---- + +### References + +This code base was informed by the research work carried out in the following publications: + +- [On using Feature Descriptors as Visual Words for Object Detection within X-ray Baggage Security Screening](https://breckon.org/toby/publications/papers/kundegorski16xray.pdf) (M.E. Kundegorski, S. Akcay, M. Devereux, A. Mouton, T.P. Breckon), In Proc. International Conference on Imaging for Crime Detection and Prevention, IET, pp. 12 (6 .)-12 (6 .)(1), 2016. [[pdf](https://breckon.org/toby/publications/papers/kundegorski16xray.pdf)] [[doi](http://dx.doi.org/10.1049/ic.2016.0080)] + +- [Real-time Classification of Vehicle Types within Infra-red Imagery](https://breckon.org/toby/publications/papers/kundegorski16vehicle.pdf) (M.E. Kundegorski, S. Akcay, G. Payen de La Garanderie, T.P. Breckon), In Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence, SPIE, Volume 9995, pp. 1-16, 2016. [[pdf](https://breckon.org/toby/publications/papers/kundegorski16vehicle.pdf)] [[doi](http://dx.doi.org/10.1117/12.2241106)] + +- [A Photogrammetric Approach for Real-time 3D Localization and Tracking of Pedestrians in Monocular Infrared Imagery](http://community.dur.ac.uk/toby.breckon/publications/papers/kundegorski14photogrammetric.pdf) (M.E. Kundegorski, T.P. Breckon], In Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence, SPIE, Volume 9253, No. 01, pp. 1-16, 2014. [[pdf](https://breckon.org/toby/publications/papers/kundegorski14photogrammetric.pdf)] [[doi](http://dx.doi.org/10.1117/12.2065673)] + +---- + +**Acknowledgements:** originally forked from an earlier Bag of Visual Words only version at https://github.com/siphomateke/PyBOW with the additional HOG and selective search code added to this newer version. + +_[ but it appears some code portions may have broader origins elsewhere, such as from this tutorial - https://www.pyimagesearch.com/2015/03/23/sliding-windows-for-object-detection-with-python-and-opencv/ ]_ + +**Bugs:** _I do not claim this code to be bug free._ If you find any bugs raise an issue (or much better still submit a git pull request with a fix) - toby.breckon@durham.ac.uk + +_"may the source be with you"_ - anon. diff --git a/bow_detector.py b/bow_detector.py new file mode 100644 index 0000000..7b35f5c --- /dev/null +++ b/bow_detector.py @@ -0,0 +1,165 @@ +################################################################################ + +# functionality: perform detection based on Bag of (visual) Words SVM classification +# using a very basic multi-scale, sliding window (exhaustive search) approach + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin ackowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + +import cv2 +import os +import numpy as np +import math +import params +from utils import * +from sliding_window import * + +################################################################################ + +directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/"; + +show_scan_window_process = True; + +################################################################################ + +# load dictionary and SVM data + +try: + dictionary = np.load(params.BOW_DICT_PATH) + svm = cv2.ml.SVM_load(params.BOW_SVM_PATH) +except: + print("Missing files - dictionary and/or SVM!"); + print("-- have you performed training to produce these files ?"); + exit(); + +# print some checks + +print("dictionary size : ", dictionary.shape) +print("svm size : ", len(svm.getSupportVectors())) +print("svm var count : ", svm.getVarCount()) + +################################################################################ + +# process all images in directory (sorted by filename) + +for filename in sorted(os.listdir(directory_to_cycle)): + + # if it is a PNG file + + if '.png' in filename: + print(os.path.join(directory_to_cycle, filename)); + + # read image data + + img = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR) + + # make a copy for drawing the output + + output_img = img.copy(); + + # for a range of different image scales in an image pyramid + + current_scale = -1 + detections = [] + rescaling_factor = 1.25 + + ################################ for each re-scale of the image + + for resized in pyramid(img, scale=rescaling_factor): + + # at the start our scale = 1, because we catch the flag value -1 + + if current_scale == -1: + current_scale = 1 + + # after this rescale downwards each time (division by re-scale factor) + + else: + current_scale /= rescaling_factor + + rect_img = resized.copy() + + # if we want to see progress show each scale + + if (show_scan_window_process): + cv2.imshow('current scale',rect_img) + cv2.waitKey(10); + + # loop over the sliding window for each layer of the pyramid (re-sized image) + + window_size = params.DATA_WINDOW_SIZE + step = math.floor(resized.shape[0] / 16) + + if step > 0: + + ############################# for each scan window + + for (x, y, window) in sliding_window(resized, window_size, step_size=step): + + # if we want to see progress show each scan window + + if (show_scan_window_process): + cv2.imshow('current window',window) + key = cv2.waitKey(10) # wait 10ms + + # for each window region get the BoW feature point descriptors + + img_data = ImageData(window) + img_data.compute_bow_descriptors() + + # generate and classify each window by constructing a BoW + # histogram and passing it through the SVM classifier + + if img_data.bow_descriptors is not None: + img_data.generate_bow_hist(dictionary) + + print("detecting with SVM ...") + + retval, [result] = svm.predict(np.float32([img_data.bow_histogram])) + + print(result) + + # if we get a detection, then record it + + if result[0] == params.DATA_CLASS_NAMES["pedestrian"]: + + # store rect as (x1, y1) (x2,y2) pair + + rect = np.float32([x, y, x + window_size[0], y + window_size[1]]) + + # if we want to see progress show each detection, at each scale + + if (show_scan_window_process): + cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2) + cv2.imshow('current scale',rect_img) + cv2.waitKey(10) + + rect *= (1.0 / current_scale) + detections.append(rect) + + ######################################################## + + # For the overall set of detections (over all scales) perform + # non maximal suppression (i.e. remove overlapping boxes etc). + + detections = non_max_suppression_fast(np.int32(detections), 0.4) + + # finally draw all the detection on the original image + + for rect in detections: + cv2.rectangle(output_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2) + + cv2.imshow('detected objects',output_img) + key = cv2.waitKey(40) # wait 40ms + if (key == ord('x')): + break + +# close all windows + +cv2.destroyAllWindows() + +##################################################################### diff --git a/bow_test.py b/bow_test.py new file mode 100644 index 0000000..b9e8f43 --- /dev/null +++ b/bow_test.py @@ -0,0 +1,75 @@ +################################################################################ + +# functionality: perform Bag of (visual) Words (BoW) testing over +# a specified dataset and compute the resulting prediction/clasification error +# over that same dataset, using pre-saved the SVM model + BOW dictionary + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + +import numpy as np +import cv2 +from utils import * + +################################################################################ + +def main(): + + # load up the dictionary and SVM stored from prior training + + try: + dictionary = np.load(params.BOW_DICT_PATH) + svm = cv2.ml.SVM_load(params.BOW_SVM_PATH) + except: + print("Missing files - dictionary and/or SVM!"); + print("-- have you performed training to produce these files ?"); + exit(); + + # load ** testing ** data sets in the same class order as training + # (here we perform no patch sampling of the data as we are not training + # hence [0,0] sample sizes and [False,False] centre weighting flags) + + print("Loading test data as a batch ...") + + paths = [params.DATA_testing_path_neg, params.DATA_testing_path_pos] + use_centre_weighting = [False, False]; + class_names = params.DATA_CLASS_NAMES + imgs_data = load_images(paths, class_names, [0,0], use_centre_weighting) + + print("Computing descriptors...") # for each testing image + start = cv2.getTickCount() + [img_data.compute_bow_descriptors() for img_data in imgs_data] + print_duration(start) + + print("Generating histograms...") # for each testing image + start = cv2.getTickCount() + [img_data.generate_bow_hist(dictionary) for img_data in imgs_data] + print_duration(start) + + # get the example/sample bow histograms and class labels + + samples, class_labels = get_bow_histograms(imgs_data), get_class_labels(imgs_data) + + # perform batch SVM classification over the whole set + + print("Performing batch SVM classification over all data ...") + + results = svm.predict(samples) + output = results[1].ravel() + + # compute and report the error over the whole set + + error = ((np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0])) + print("Successfully trained SVM with {}% testing set error".format(round(error * 100,2))) + print("-- meaining the SVM got {}% of the testing examples correct!".format(round((1.0 - error) * 100,2))) + +################################################################################ + +if __name__ == "__main__": + main() + +################################################################################ diff --git a/bow_train.py b/bow_train.py new file mode 100644 index 0000000..dada084 --- /dev/null +++ b/bow_train.py @@ -0,0 +1,158 @@ +################################################################################ + +# functionality: perform all stages of Bag of (visual) Words (BoW) training over +# a specified dataset and compute the resulting prediction/clasification error +# over that same dataset, having saved the SVM model to file for subsequent re-use + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + +import cv2 +from utils import * + +################################################################################ + +def generate_dictionary(imgs_data, dictionary_size): + + # Extracting descriptors + desc = stack_array([img_data.bow_descriptors for img_data in imgs_data]) + + # important, cv2.kmeans() clustering only accepts type32 descriptors + + desc = np.float32(desc) + + # perform clustering - increase iterations and reduce EPS to change performance + + criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, params.BOW_clustering_iterations, 0.01) + flags = cv2.KMEANS_PP_CENTERS + + # desc is a type32 numpy array of vstacked descriptors + + compactness, labels, dictionary = cv2.kmeans(desc, dictionary_size, None, criteria, 1, flags) + np.save(params.BOW_DICT_PATH, dictionary) + + return dictionary + +################################################################################ + +def main(): + + ############################################################################ + # load our training data set of images examples + + program_start = cv2.getTickCount() + + print("Loading images...") + start = cv2.getTickCount() + + # N.B. specify data path names in same order as class names (neg, pos) + + paths = [params.DATA_training_path_neg, params.DATA_training_path_pos] + + # build a lisyt of class names automatically from our dictionary of class (name,number) pairs + + class_names = [get_class_name(class_number) for class_number in range(len(params.DATA_CLASS_NAMES))] + + # specify number of sub-window samples to take from each positive and negative + # example image in the data set + # N.B. specify in same order as class names (neg, pos) - again + + sampling_sizes = [params.DATA_training_sample_count_neg, params.DATA_training_sample_count_pos] + + # do we want to take samples only centric to the example image or ramdonly? + # No - for background -ve images (first class) + # Yes - for object samples +ve images (second class) + + sample_from_centre = [False, True]; + + # perform image loading + + imgs_data = load_images(paths, class_names, sampling_sizes, sample_from_centre, + params.DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES, params.DATA_WINDOW_SIZE); + + print(("Loaded {} image(s)".format(len(imgs_data)))) + print_duration(start) + + ############################################################################ + # perform bag of visual words feature construction + + print("Computing descriptors...") # for each training image + start = cv2.getTickCount() + [img_data.compute_bow_descriptors() for img_data in imgs_data] + print_duration(start) + + print("Clustering...") # over all images to generate dictionary code book/words + start = cv2.getTickCount() + dictionary = generate_dictionary(imgs_data, params.BOW_dictionary_size) + print_duration(start) + + print("Generating histograms...") # for each training image + start = cv2.getTickCount() + [img_data.generate_bow_hist(dictionary) for img_data in imgs_data] + print_duration(start) + + ############################################################################ + # train an SVM based on these norm_features + + print("Training SVM...") + start = cv2.getTickCount() + + # define SVM parameters + + svm = cv2.ml.SVM_create() + svm.setType(cv2.ml.SVM_C_SVC) # change this for multi-class + svm.setKernel(params.BOW_SVM_kernel) # use specific kernel type (alteratives exist) + + # compile samples (i.e. visual word histograms) for each training image + + samples = get_bow_histograms(imgs_data) + + # get class label for each training image + + class_labels = get_class_labels(imgs_data); + + # specify the termination criteria for the SVM training + + svm.setTermCriteria((cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, params.BOW_SVM_max_training_iterations, 1.e-06)) + + # perform auto training for the SVM which will essentially perform grid + # search over the set of parameters for the chosen kernel and the penalty + # cost term, C (N.B. trainAuto() syntax is correct as of OpenCV 3.4.x) + + svm.trainAuto(samples, cv2.ml.ROW_SAMPLE, class_labels, kFold = 10, balanced = True); + + # save the tained SVM to file so that we can load it again for testing / detection + + svm.save(params.BOW_SVM_PATH) + + ############################################################################ + # measure performance of the SVM trained on the bag of visual word features + + # perform prediction over the set of examples we trained over + + output = svm.predict(samples)[1].ravel() + error = (np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0]) + + # we are succesful if our prediction > than random + # e.g. for 2 class labels this would be 1/2 = 0.5 (i.e. 50%) + + if error < (1.0 / len(params.DATA_CLASS_NAMES)): + print("Trained SVM obtained {}% training set error".format(round(error * 100,2))) + print("-- meaining the SVM got {}% of the training examples correct!".format(round((1.0 - error) * 100,2))) + else: + print("Failed to train SVM. {}% error".format(round(error * 100,2))) + + print_duration(start) + + print(("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start))))) + +################################################################################ + +if __name__ == '__main__': + main() + +################################################################################ diff --git a/bowutils.py b/bowutils.py deleted file mode 100644 index eefef73..0000000 --- a/bowutils.py +++ /dev/null @@ -1,158 +0,0 @@ -import os -import numpy as np -import cv2 -import params - - -def get_elapsed_time(start): - """ Helper function for timing code execution""" - return (cv2.getTickCount() - start) / cv2.getTickFrequency() - - -def format_time(time): - time_str = "" - if time < 60.0: - time_str = "{}s".format(round(time, 1)) - elif time > 60.0: - minutes = time / 60.0 - time_str = "{}m : {}s".format(int(minutes), round(time % 60, 2)) - return time_str - - -def print_duration(start): - time = get_elapsed_time(start) - print("Took {}".format(format_time(time))) - - -def resize_img(img, width=-1, height=-1): - if height == -1 and width == -1: - raise TypeError("Invalid arguments. Width or height must be provided.") - h = img.shape[0] - w = img.shape[1] - if height == -1: - aspect_ratio = float(w) / h - new_height = int(width / aspect_ratio) - return cv2.resize(img, (width, new_height)) - elif width == -1: - aspect_ratio = h / float(w) - new_width = int(height / aspect_ratio) - return cv2.resize(img, (new_width, height)) - - -def imreads(path): - """ - This reads all the images in a given folder and returns the results - """ - images_path = [os.path.join(path, f) for f in os.listdir(path)] - images = [] - for image_path in images_path: - img = cv2.imread(image_path) - images.append(img) - return images - - -def stack_array(arr): - stacked_arr = np.array([]) - for item in arr: - # Only stack if it is not empty - if len(item) > 0: - if len(stacked_arr) == 0: - stacked_arr = np.array(item) - else: - stacked_arr = np.vstack((stacked_arr, item)) - return stacked_arr - - -def get_descriptors(img): - # returns descriptors of an image - return params.DETECTOR.detectAndCompute(img, None)[1] - - -def get_class_code(class_name): - return params.CLASS_NAMES.get(class_name, 0) - - -def get_class_name(class_code): - for name, code in params.CLASS_NAMES.iteritems(): - if code == class_code: - return name - - -class ImageData(object): - def __init__(self, img): - self.img = img - self.class_name = "" - self.response = None - self.descriptors = np.array([]) - - def set_class(self, class_name): - self.class_name = class_name - self.response = get_class_code(self.class_name) - - def compute_descriptors(self): - self.descriptors = get_descriptors(self.img) - if self.descriptors is None: - self.descriptors = np.array([]) - - def hog(self): - gx = cv2.Sobel(self.img, cv2.CV_32F, 1, 0) - gy = cv2.Sobel(self.img, cv2.CV_32F, 0, 1) - mag, ang = cv2.cartToPolar(gx, gy) - bins = np.int32(params.HOG_BIN_N * ang / (2 * np.pi)) # quantizing binvalues in (0...16) - bin_cells = bins[:10, :10], bins[10:, :10], bins[:10, 10:], bins[10:, 10:] - mag_cells = mag[:10, :10], mag[10:, :10], mag[:10, 10:], mag[10:, 10:] - hists = [np.bincount(b.ravel(), m.ravel(), params.HOG_BIN_N) for b, m in zip(bin_cells, mag_cells)] - hist = np.hstack(hists) # hist is a 64 bit vector - return hist - - def generate_bow_hist(self, dictionary): - self.features = np.zeros((len(dictionary), 1)) - # FLANN matcher needs descriptors to be type32 - matches = params.MATCHER.match(np.float32(self.descriptors), dictionary) - for match in matches: - # Get which visual word this descriptor matches in the dictionary - # match.trainIdx is the visual_word - # Increase count for this visual word in histogram - self.features[match.trainIdx] += 1 - - -def add_to_imgs_data(path, class_name, imgs_data): - imgs = imreads(path) - - img_count = len(imgs_data) - for img in imgs: - if img.shape[0] > params.MAX_IMG_WIDTH: - img = resize_img(img, params.MAX_IMG_WIDTH) - img_data = ImageData(img) - img_data.set_class(class_name) - imgs_data.insert(img_count, img_data) - img_count += 1 - - return imgs_data - - -def get_imgs_data(paths, class_names, dictionary=None): - imgs_data = [] # type: list[ImageData] - - for path, class_name in zip(paths, class_names): - add_to_imgs_data(path, class_name, imgs_data) - - [img_data.compute_descriptors() for img_data in imgs_data] - if dictionary is not None: - [img_data.generate_bow_hist(dictionary) for img_data in imgs_data] - - return imgs_data - - -def get_samples(imgs_data): - # Important! Normalize histograms to remove bias for number of descriptors - norm_features = [cv2.normalize(img_data.features, None, 0, len(img_data.features), cv2.NORM_MINMAX) for img_data in - imgs_data] - samples = stack_array([[feature] for feature in norm_features]) - # samples = stack_array([[img_data.features] for img_data in imgs_data]) - return np.float32(samples) - - -def get_responses(imgs_data): - responses = [img_data.response for img_data in imgs_data] - return np.int32(responses) diff --git a/classify.py b/classify.py deleted file mode 100644 index 83dd066..0000000 --- a/classify.py +++ /dev/null @@ -1,36 +0,0 @@ -import numpy as np -import cv2 -from bowutils import * -from matplotlib import pyplot as plt - -def main(): - # Load up the dictionary - dictionary = np.load(params.DICT_PATH) - - paths = ["test/pos", "test/neg"] - class_names = ["pos", "neg"] - imgs_data = get_imgs_data(paths, class_names, dictionary) - - samples, responses = get_samples(imgs_data), get_responses(imgs_data) - - svm = cv2.ml.SVM_load(params.SVM_PATH) - results = svm.predict(samples) - output = results[1].ravel() - - error = ((np.absolute(responses.ravel() - output).sum()) / float(output.shape[0])) * 100 - print "Error in test data: {}%".format(error) - - for i in xrange(len(imgs_data)): - img_data = imgs_data[i] - - title = "Prediction: {0}".format(output[i]) - plt.axis("off") - plt.imshow(cv2.cvtColor(img_data.img, cv2.COLOR_BGR2RGB)) - plt.suptitle(title) - plt.draw() - plt.waitforbuttonpress(0) # this will wait for indefinite time - plt.clf() - - -if __name__ == "__main__": - main() diff --git a/download-data.sh b/download-data.sh new file mode 100644 index 0000000..69cf111 --- /dev/null +++ b/download-data.sh @@ -0,0 +1,86 @@ +################################################################################ + +# simple data downloader / unpacker - (c) 2018 Toby Breckon, Durham University, UK + +################################################################################ + +# set this script to fail on error + +set -e + +# check for required commands to download and md5 check + +(command -v curl | grep curl > /dev/null) || + (echo "Error: curl command not found, cannot download!") + +(command -v md5sum | grep md5sum > /dev/null) || + (echo "Error: md5sum command not found, md5sum check will fail!") + +################################################################################ + +STARTING_DIR=`pwd` + +################################################################################ + +## INRIA Pedestrian Dataset + +################################################################################ + +URL_PERSON=ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar + +DIR_LOCAL_TARGET_PERSON=/tmp/pedestrian + +PERSON_FILE_NAME=INRIAPerson.tar + +DIR_NAME_UNPACKED=INRIAPerson +PERSON_MD5_SUM=6af009c6386c86f78f77e81003df84dc + +################################################################################ + +# perform download + +echo "Downloading pedestrian data set models..." + +mkdir -p $DIR_LOCAL_TARGET_PERSON + +TARGET=$DIR_LOCAL_TARGET_PERSON/$PERSON_FILE_NAME + +curl --progress-bar $URL_PERSON > $TARGET + +################################################################################ + +# perform md5 check and move to required local target directory + +cd $DIR_LOCAL_TARGET_PERSON + +echo "checking the MD5 checksum for downloaded data ..." + +CHECK_SUM_CHECKPOINTS="$PERSON_MD5_SUM $PERSON_FILE_NAME" + +echo $CHECK_SUM_CHECKPOINTS | md5sum -c + +echo "Unpacking the tar file..." + +tar -xvf $PERSON_FILE_NAME + +chmod -R +w $DIR_LOCAL_TARGET_PERSON + +echo "Tidying up..." + +ln -s $DIR_LOCAL_TARGET_PERSON $STARTING_DIR/pedestrian + +# mv $DIR_NAME_UNPACKED/* . + +rm $TARGET # && rm -r $DIR_NAME_UNPACKED + +################################################################################ + +echo "... completed -> required pedestrian data in $DIR_LOCAL_TARGET_PERSON/" + +################################################################################ + +# reset + +cd $STARTING_DIRPERSON + +################################################################################ diff --git a/fix_pngs.sh b/fix_pngs.sh new file mode 100644 index 0000000..7fc0868 --- /dev/null +++ b/fix_pngs.sh @@ -0,0 +1,41 @@ + +################################################################################ + +# simple png file fixer script using pngcrush + +# (c) 2018 Toby Breckon, Durham University, UK + +################################################################################ +# check for command line argument + +if (test $# -ne 1) +then + echo "usage: sh ./fix_pngs.sh /path/to/dataset/files" + exit 1 +fi + +################################################################################ + +# set this script to fail on error + +set -e + +################################################################################ +# check for required commands to perform fix + +(command -v pngcrush | grep pngcrush > /dev/null) || + (echo "Error: pngcrush command not found, cannot fix!"; + echo "install from your package manager or from https://pmt.sourceforge.io/pngcrush/"; + exit 1) + +################################################################################ +# go the right place to write + +cd $1 + +################################################################################ +# perform fix in place + +for i in `find * | grep png`; do pngcrush -fix -force $i tmp.png; mv tmp.png $i;done + +################################################################################ diff --git a/hog_detector.py b/hog_detector.py new file mode 100644 index 0000000..5573f87 --- /dev/null +++ b/hog_detector.py @@ -0,0 +1,162 @@ +################################################################################ + +# functionality: perform detection based on HOG feature descriptor / SVM classification +# using a very basic multi-scale, sliding window (exhaustive search) approach + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Minor portions: based on fork from https://github.com/siphomateke/PyBOW + +################################################################################ + +import cv2 +import os +import numpy as np +import math +import params +from utils import * +from sliding_window import * + +################################################################################ + +directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/"; + +show_scan_window_process = True; + +################################################################################ + +# load SVM from file + +try: + svm = cv2.ml.SVM_load(params.HOG_SVM_PATH) +except: + print("Missing files - SVM!"); + print("-- have you performed training to produce these files ?"); + exit(); + +# print some checks + +print("svm size : ", len(svm.getSupportVectors())) +print("svm var count : ", svm.getVarCount()) + +################################################################################ + +# process all images in directory (sorted by filename) + +for filename in sorted(os.listdir(directory_to_cycle)): + + # if it is a PNG file + + if '.png' in filename: + print(os.path.join(directory_to_cycle, filename)); + + # read image data + + img = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR) + + # make a copy for drawing the output + + output_img = img.copy(); + + # for a range of different image scales in an image pyramid + + current_scale = -1 + detections = [] + rescaling_factor = 1.25 + + ################################ for each re-scale of the image + + for resized in pyramid(img, scale=rescaling_factor): + + # at the start our scale = 1, because we catch the flag value -1 + + if current_scale == -1: + current_scale = 1 + + # after this rescale downwards each time (division by re-scale factor) + + else: + current_scale /= rescaling_factor + + rect_img = resized.copy() + + # if we want to see progress show each scale + + if (show_scan_window_process): + cv2.imshow('current scale',rect_img) + cv2.waitKey(10); + + # loop over the sliding window for each layer of the pyramid (re-sized image) + + window_size = params.DATA_WINDOW_SIZE + step = math.floor(resized.shape[0] / 16) + + if step > 0: + + ############################# for each scan window + + for (x, y, window) in sliding_window(resized, window_size, step_size=step): + + # if we want to see progress show each scan window + + if (show_scan_window_process): + cv2.imshow('current window',window) + key = cv2.waitKey(10) # wait 10ms + + # for each window region get the BoW feature point descriptors + + img_data = ImageData(window) + img_data.compute_hog_descriptor(); + + # generate and classify each window by constructing a BoW + # histogram and passing it through the SVM classifier + + if img_data.hog_descriptor is not None: + + print("detecting with SVM ...") + + retval, [result] = svm.predict(np.float32([img_data.hog_descriptor])) + + print(result) + + # if we get a detection, then record it + + if result[0] == params.DATA_CLASS_NAMES["pedestrian"]: + + # store rect as (x1, y1) (x2,y2) pair + + rect = np.float32([x, y, x + window_size[0], y + window_size[1]]) + + # if we want to see progress show each detection, at each scale + + if (show_scan_window_process): + cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2) + cv2.imshow('current scale',rect_img) + cv2.waitKey(40) + + rect *= (1.0 / current_scale) + detections.append(rect) + + ######################################################## + + # For the overall set of detections (over all scales) perform + # non maximal suppression (i.e. remove overlapping boxes etc). + + detections = non_max_suppression_fast(np.int32(detections), 0.4) + + # finally draw all the detection on the original image + + for rect in detections: + cv2.rectangle(output_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2) + + cv2.imshow('detected objects',output_img) + key = cv2.waitKey(200) # wait 200ms + if (key == ord('x')): + break + +# close all windows + +cv2.destroyAllWindows() + +##################################################################### diff --git a/hog_test.py b/hog_test.py new file mode 100644 index 0000000..2930208 --- /dev/null +++ b/hog_test.py @@ -0,0 +1,70 @@ +################################################################################ + +# functionality: perform HOG/SVM testing over a specified dataset and compute the +# resulting prediction/clasification error over that same dataset, using +# pre-saved the SVM model trained on HOG feature descriptors + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Minor portions: based on fork from https://github.com/siphomateke/PyBOW + +################################################################################ + +import numpy as np +import cv2 +from utils import * + +################################################################################ + +def main(): + + # load up the SVM stored from prior training + + try: + svm = cv2.ml.SVM_load(params.HOG_SVM_PATH) + except: + print("Missing files SVM"); + print("-- have you performed training to produce this file ?"); + exit(); + + # load ** testing ** data sets in the same class order as training + # (here we perform patch sampling only from the centre of the +ve + # class and only a single sample is taken + # hence [0,0] sample sizes and [False,True] centre weighting flags) + + print("Loading test data as a batch ...") + + paths = [params.DATA_testing_path_neg, params.DATA_testing_path_pos] + use_centre_weighting = [False, True]; + class_names = params.DATA_CLASS_NAMES + imgs_data = load_images(paths, class_names, [0,0], use_centre_weighting) + + print("Computing HOG descriptors...") # for each testing image + start = cv2.getTickCount() + [img_data.compute_hog_descriptor() for img_data in imgs_data] + print_duration(start) + + # get the example/sample HOG descriptors and class labels + + samples, class_labels = get_hog_descriptors(imgs_data), get_class_labels(imgs_data) + + # perform batch SVM classification over the whole set + + print("Performing batch SVM classification over all data ...") + + results = svm.predict(samples) + output = results[1].ravel() + + # compute and report the error over the whole set + + error = ((np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0])) + print("Successfully trained SVM with {}% testing set error".format(round(error * 100,2))) + print("-- meaining the SVM got {}% of the testing examples correct!".format(round((1.0 - error) * 100,2))) + +################################################################################ + +if __name__ == "__main__": + main() + +################################################################################ diff --git a/hog_train.py b/hog_train.py new file mode 100644 index 0000000..c7fdc24 --- /dev/null +++ b/hog_train.py @@ -0,0 +1,125 @@ +################################################################################ + +# functionality: perform all stages of HOG/SVM training over +# a specified dataset and compute the resulting prediction/clasification error +# over that same dataset, having saved the SVM model to file for subsequent re-use + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Minor portions: based on fork from https://github.com/siphomateke/PyBOW + +################################################################################ + +import cv2 +from utils import * + +################################################################################ + +def main(): + + ############################################################################ + # load our training data set of images examples + + program_start = cv2.getTickCount() + + print("Loading images...") + start = cv2.getTickCount() + + # N.B. specify data path names in same order as class names (neg, pos) + + paths = [params.DATA_training_path_neg, params.DATA_training_path_pos] + + # build a lisyt of class names automatically from our dictionary of class (name,number) pairs + + class_names = [get_class_name(class_number) for class_number in range(len(params.DATA_CLASS_NAMES))] + + # specify number of sub-window samples to take from each positive and negative + # example image in the data set + # N.B. specify in same order as class names (neg, pos) - again + + sampling_sizes = [params.DATA_training_sample_count_neg, params.DATA_training_sample_count_pos] + + # do we want to take samples only centric to the example image or ramdonly? + # No - for background -ve images (first class) + # Yes - for object samples +ve images (second class) + + sample_from_centre = [False, True]; + + # perform image loading + + imgs_data = load_images(paths, class_names, sampling_sizes, sample_from_centre, + params.DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES, params.DATA_WINDOW_SIZE); + + print(("Loaded {} image(s)".format(len(imgs_data)))) + print_duration(start) + + ############################################################################ + # perform HOG feature extraction + + print("Computing HOG descriptors...") # for each training image + start = cv2.getTickCount() + [img_data.compute_hog_descriptor() for img_data in imgs_data] + print_duration(start) + + ############################################################################ + # train an SVM based on these norm_features + + print("Training SVM...") + start = cv2.getTickCount() + + # define SVM parameters + + svm = cv2.ml.SVM_create() + svm.setType(cv2.ml.SVM_C_SVC) # change this for multi-class + svm.setKernel(params.HOG_SVM_kernel) # use specific kernel type (alteratives exist) + + # compile samples (i.e. visual word histograms) for each training image + + samples = get_hog_descriptors(imgs_data) + + # get class label for each training image + + class_labels = get_class_labels(imgs_data); + + # specify the termination criteria for the SVM training + + svm.setTermCriteria((cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, params.HOG_SVM_max_training_iterations, 1.e-06)) + + # perform auto training for the SVM which will essentially perform grid + # search over the set of parameters for the chosen kernel and the penalty + # cost term, C (N.B. trainAuto() syntax is correct as of OpenCV 3.4.x) + + svm.trainAuto(samples, cv2.ml.ROW_SAMPLE, class_labels, kFold = 10, balanced = True); + + # save the tained SVM to file so that we can load it again for testing / detection + + svm.save(params.HOG_SVM_PATH) + + ############################################################################ + # measure performance of the SVM trained on the bag of visual word features + + # perform prediction over the set of examples we trained over + + output = svm.predict(samples)[1].ravel() + error = (np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0]) + + # we are succesful if our prediction > than random + # e.g. for 2 class labels this would be 1/2 = 0.5 (i.e. 50%) + + if error < (1.0 / len(params.DATA_CLASS_NAMES)): + print("Trained SVM obtained {}% training set error".format(round(error * 100,2))) + print("-- meaining the SVM got {}% of the training examples correct!".format(round((1.0 - error) * 100,2))) + else: + print("Failed to train SVM. {}% error".format(round(error * 100,2))) + + print_duration(start) + + print(("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start))))) + +################################################################################ + +if __name__ == '__main__': + main() + +################################################################################ diff --git a/label_histogram.png b/label_histogram.png deleted file mode 100644 index 5c4f4cb..0000000 Binary files a/label_histogram.png and /dev/null differ diff --git a/params.py b/params.py index 2f87baa..d4bcfd3 100644 --- a/params.py +++ b/params.py @@ -1,20 +1,127 @@ +################################################################################ + +# functionality: parameter settings for detection algorithm training/testing + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + import cv2 +import os + +################################################################################ +# settings for datsets in general + +master_path_to_dataset = "/tmp/pedestrian"; # ** need to edit this ** + +# data location - training examples + +DATA_training_path_neg = os.path.join(master_path_to_dataset,"INRIAPerson/Train/neg/"); +DATA_training_path_pos = os.path.join(master_path_to_dataset,"INRIAPerson/train_64x128_H96/pos/"); + +# data location - testing examples + +DATA_testing_path_neg = os.path.join(master_path_to_dataset,"INRIAPerson/Test/neg/"); +DATA_testing_path_pos = os.path.join(master_path_to_dataset,"INRIAPerson/test_64x128_H96/pos/"); -MAX_IMG_WIDTH = 320 -SVM_PATH = "ml/svm.xml" -DICT_PATH = "ml/dictionary.npy" -CLASS_NAMES = { - "pos": 0, - "neg": 1 +# size of the sliding window patch / image patch to be used for classification +# (for larger windows sizes, for example from selective search - resize the +# window to this size before feature descriptor extraction / classification) + +DATA_WINDOW_SIZE = [64, 128]; + +# the maximum left/right, up/down offset to use when generating samples for training +# that are centred around the centre of the image + +DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES = 3; + +# number of sample patches to extract from each negative training example + +DATA_training_sample_count_neg = 10; + +# number of sample patches to extract from each positive training example + +DATA_training_sample_count_pos = 5; + +# class names - N.B. ordering of 0, 1 for neg/pos = order of paths + +DATA_CLASS_NAMES = { + "other": 0, + "pedestrian": 1 } -# algorithm = FLANN_INDEX_KDTREE -_index_params = dict(algorithm=0, trees=5) -_search_params = dict(checks=50) +################################################################################ +# settings for BOW - Bag of (visual) Word - approaches + +BOW_SVM_PATH = "svm_bow.xml" +BOW_DICT_PATH = "bow_dictionary.npy" + +BOW_dictionary_size = 512; # in general, larger = better performance, but potentially slower +BOW_SVM_kernel = cv2.ml.SVM_RBF; # see opencv manual for other options +BOW_SVM_max_training_iterations = 500; # stop training after max iterations + +BOW_clustering_iterations = 20; # reduce to improve speed, reduce quality + +BOW_fixed_feature_per_image_to_use = 100; # reduce to improve speed, set to 0 for variable number + +# specify the type of feature points to use]) +# -- refer to the OpenCV manual for options here, by default this is set to work on +# --- all systems "out of the box" rather than using the best available option + +BOW_use_ORB_always = False; # set to True to always use ORB over SIFT where available + +try: + + if BOW_use_ORB_always: + print("Forced used of ORB features, not SIFT") + raise Exception('force use of ORB') + + DETECTOR = cv2.xfeatures2d.SIFT_create(nfeatures=BOW_fixed_feature_per_image_to_use) # -- requires extra modules and non-free build flag + # DETECTOR = cv2.xfeatures2d.SURF_create(nfeatures=BOW_fixed_feature_per_image_to_use) # -- requires extra modules and non-free build flag + + # as SIFT/SURF feature descriptors are floating point use KD_TREE approach + + _algorithm = 0 # FLANN_INDEX_KDTREE + _index_params = dict(algorithm=_algorithm, trees=5) + _search_params = dict(checks=50) + +except: + + DETECTOR = cv2.ORB_create(nfeatures=BOW_fixed_feature_per_image_to_use) # check these params + + #if using ORB points + # taken from: https://docs.opencv.org/3.3.0/dc/dc3/tutorial_py_matcher.html + # N.B. "commented values are recommended as per the docs, + # but it didn't provide required results in some cases" + + # as SIFT/SURF feature descriptors are integer use HASHING approach + + _algorithm = 6 # FLANN_INDEX_LSH + _index_params= dict(algorithm = _algorithm, + table_number = 6, # 12 + key_size = 12, # 20 + multi_probe_level = 1) #2 + _search_params = dict(checks=50) + + if (not(BOW_use_ORB_always)): + print("Falling back to using features: ", DETECTOR.__class__()) + BOW_use_ORB_always = True; # set this as a flag we can check later which data type to uses + +print("For BOW - features in use are: ", DETECTOR.__class__(), "(ignore for HOG)") + +# based on choice and availability of feature points, set up KD-tree matcher MATCHER = cv2.FlannBasedMatcher(_index_params, _search_params) -DETECTOR = cv2.AKAZE_create() -#DETECTOR = cv2.KAZE_create() -#DETECTOR = cv2.ORB_create(nfeatures=100000, scoreType=cv2.ORB_FAST_SCORE) -HOG_BIN_N = 16 +################################################################################ +# settings for HOG approaches + +HOG_SVM_PATH = "svm_hog.xml" + +HOG_SVM_kernel = cv2.ml.SVM_LINEAR; # see opencv manual for other options +HOG_SVM_max_training_iterations = 500; # stop training after max iterations + +################################################################################ diff --git a/selective_search.py b/selective_search.py new file mode 100644 index 0000000..fd0a218 --- /dev/null +++ b/selective_search.py @@ -0,0 +1,122 @@ +##################################################################### + +# Example : performs selective search bounding box identification + +# Author : Toby Breckon, toby.breckon@durham.ac.uk +# Copyright (c) 2018 Department of Computer Science, Durham University, UK + +# License: MIT License + +# ackowledgements: based on the code and examples presented at: +# https://www.learnopencv.com/selective-search-for-object-detection-cpp-python/ + +##################################################################### + +import cv2 +import os +import sys +import math +import numpy as np + +##################################################################### + +# press all the go-faster buttons - i.e. speed-up using multithreads + +cv2.setUseOptimized(True); +cv2.setNumThreads(4); + +##################################################################### + +directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/" # edit this + +##################################################################### + +# create Selective Search Segmentation Object using default parameters + +ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation() + +##################################################################### + +# loop all images in directory (sorted by filename) + +for filename in sorted(os.listdir(directory_to_cycle)): + + # if it is a PNG file + + if '.png' in filename: + print(os.path.join(directory_to_cycle, filename)); + + # read image from file + + frame = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR) + + # start a timer (to see how long processing and display takes) + + start_t = cv2.getTickCount(); + + # set input image on which we will run segmentation + + ss.setBaseImage(frame) + + # Switch to fast but low recall Selective Search method + ss.switchToSelectiveSearchFast() + + # Switch to high recall but slow Selective Search method (slower) + # ss.switchToSelectiveSearchQuality() + + # run selective search segmentation on input image + rects = ss.process() + print('Total Number of Region Proposals: {}'.format(len(rects))) + + # number of region proposals to show + numShowRects = 100 + + # iterate over all the region proposals + for i, rect in enumerate(rects): + # draw rectangle for region proposal till numShowRects + if (i < numShowRects): + x, y, w, h = rect + cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA) + else: + break + + # display image + + cv2.imshow('Selective Search - Object Region Proposals', frame); + + # stop the timer and convert to ms. (to see how long processing and display takes) + + stop_t = ((cv2.getTickCount() - start_t)/cv2.getTickFrequency()) * 1000; + + print('Processing time (ms): {}'.format(stop_t)) + print() + + # start the event loop - essential + + # cv2.waitKey() is a keyboard binding function (argument is the time in milliseconds). + # It waits for specified milliseconds for any keyboard event. + # If you press any key in that time, the program continues. + # If 0 is passed, it waits indefinitely for a key stroke. + # (bitwise and with 0xFF to extract least significant byte of multi-byte response) + # here we use a wait time in ms. that takes account of processing time already used in the loop + + # wait 40ms or less depending on processing time taken (i.e. 1000ms / 25 fps = 40 ms) + + key = cv2.waitKey(max(40, 40 - int(math.ceil(stop_t)))) & 0xFF; + + # It can also be set to detect specific key strokes by recording which key is pressed + + # e.g. if user presses "x" then exit / press "f" for fullscreen + + if (key == ord('x')): + break + elif (key == ord('f')): + cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN); + + ss.clearImages() + +# close all windows + +cv2.destroyAllWindows() + +##################################################################### diff --git a/objdetector.py b/sliding_window.py similarity index 57% rename from objdetector.py rename to sliding_window.py index b06e80b..4985659 100644 --- a/objdetector.py +++ b/sliding_window.py @@ -1,9 +1,40 @@ +################################################################################ + +# functionality: functions for multi-scale sliding window (exhaustive) search + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + import numpy as np import cv2 -from bowutils import resize_img -from bowutils import ImageData -import params +################################################################################ + +# re-size an image with respect to its aspect ratio if needed. +# used in the multi-scale image pyramid approach + +def resize_img(img, width=-1, height=-1): + if height == -1 and width == -1: + raise TypeError("Invalid arguments. Width or height must be provided.") + h = img.shape[0] + w = img.shape[1] + if height == -1: + aspect_ratio = float(w) / h + new_height = int(width / aspect_ratio) + return cv2.resize(img, (width, new_height)) + elif width == -1: + aspect_ratio = h / float(w) + new_width = int(height / aspect_ratio) + return cv2.resize(img, (new_width, height)) + +################################################################################ + +# a very basic approach to produce an image at multi-scales (i.e. variant +# re-sized resolutions) def pyramid(img, scale=1.5, min_size=(30, 30)): # yield the original image @@ -23,16 +54,22 @@ def pyramid(img, scale=1.5, min_size=(30, 30)): # yield the next image in the pyramid yield img +################################################################################ + +# generate a set of sliding window locations across the image def sliding_window(image, window_size, step_size=8): # slide a window across the image - for y in xrange(0, image.shape[0], step_size): - for x in xrange(0, image.shape[1], step_size): + for y in range(0, image.shape[0], step_size): + for x in range(0, image.shape[1], step_size): # yield the current window window = image[y:y + window_size[1], x:x + window_size[0]] if not (window.shape[0] != window_size[1] or window.shape[1] != window_size[0]): yield (x, y, window) +################################################################################ + +# perform basic non-maximal suppression of overlapping object detections def non_max_suppression_fast(boxes, overlapThresh): # if there are no boxes, return an empty list @@ -82,7 +119,7 @@ def non_max_suppression_fast(boxes, overlapThresh): # compute the ratio of overlap overlap = (w * h) / area[idxs[:last]] - # delete all indexes from the index list that have + # delete all indexes from the index list that have a significant overlap idxs = np.delete(idxs, np.concatenate(([last], np.where(overlap > overlapThresh)[0]))) @@ -90,55 +127,4 @@ def non_max_suppression_fast(boxes, overlapThresh): # integer data type return boxes[pick].astype("int") - -image = cv2.imread("test/pos/test17.jpg") -window_size = (640, 480) - -dictionary = np.load(params.DICT_PATH) -svm = cv2.ml.SVM_load(params.SVM_PATH) - -detections = [] -current_scale = -1 -for resized in pyramid(image, scale=1.25): - if current_scale == -1: - current_scale = 1 - else: - current_scale /= 1.25 - rect_img = resized.copy() - # loop over the sliding window for each layer of the pyramid - #step = (resized.shape[0] / window_size[0]) * 32 - step = resized.shape[0] / 16 - if step > 0: - for (x, y, window) in sliding_window(resized, window_size, step_size=step): - - img_data = ImageData(window) - img_data.compute_descriptors() - - if img_data.descriptors is not None: - img_data.generate_bow_hist(dictionary) - - results = svm.predict(np.float32([img_data.features])) - output = results[1].ravel()[0] - - if output == 0.0: - rect = np.float32([x, y, x + window_size[0], y + window_size[1]]) - rect *= (1.0 / current_scale) - detections.append(rect) - cv2.rectangle(rect_img, (x, y), (x + window_size[0], y + window_size[1]), (0, 0, 255), 2) - - clone = rect_img.copy() - cv2.rectangle(clone, (x, y), (x + window_size[0], y + window_size[1]), (0, 255, 0), 2) - """if clone.shape[0] > params.MAX_IMG_WIDTH: - clone = resize_img(clone, width=640)""" - cv2.imshow("Window", clone) - cv2.waitKey(1) - -detections = non_max_suppression_fast(np.int32(detections), 0.4) -detections = np.int32(detections) -rect_img = image.copy() -for rect in detections: - cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2) - -if len(detections)>0: - cv2.imshow("Window", resize_img(rect_img, 640)) - cv2.waitKey(0) +################################################################################ diff --git a/train.py b/train.py deleted file mode 100644 index 1a3aa86..0000000 --- a/train.py +++ /dev/null @@ -1,106 +0,0 @@ -import cv2 -from bowutils import * - - -def generate_dictionary(imgs_data, dictionary_size): - # Extracting descriptors - desc = stack_array([img_data.descriptors for img_data in imgs_data]) - # important, cv2.kmeans only accepts type32 descriptors - desc = np.float32(desc) - - # Clustering - criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.01) - flags = cv2.KMEANS_PP_CENTERS - # desc is a type32 numpy array of vstacked descriptors - compactness, labels, dictionary = cv2.kmeans(desc, dictionary_size, None, criteria, 1, flags) - np.save(params.DICT_PATH, dictionary) - - return dictionary - - -def main(): - dictionary_size = 512 - # Loading images - """imgs_data = [] # type: list[ImageData] - - pos_imgs_path = "train/pos" - neg_imgs_path = "train/neg" - - print("Loading images...") - - # imreads returns a list of all images in that directory - pos_imgs = imreads(pos_imgs_path) - neg_imgs = imreads(neg_imgs_path) - - img_count = 0 - for img in pos_imgs: - img_data = ImageData(img) - img_data.set_class("pos") - imgs_data.insert(img_count, img_data) - img_count += 1 - - for img in neg_imgs: - img_data = ImageData(img) - img_data.set_class("neg") - imgs_data.insert(img_count, img_data) - img_count += 1""" - - program_start = cv2.getTickCount() - - print("Loading images...") - start = cv2.getTickCount() - paths = ["train/pos", "train/neg"] - class_names = ["pos", "neg"] - imgs_data = get_imgs_data(paths, class_names) - print("Loaded {} image(s)".format(len(imgs_data))) - print_duration(start) - - print("Computing descriptors...") - start = cv2.getTickCount() - [img_data.compute_descriptors() for img_data in imgs_data] - print_duration(start) - - print("Clustering...") - start = cv2.getTickCount() - dictionary = generate_dictionary(imgs_data, dictionary_size) - print_duration(start) - - print("Generating histograms...") - start = cv2.getTickCount() - [img_data.generate_bow_hist(dictionary) for img_data in imgs_data] - print_duration(start) - - print imgs_data[0].hog().shape - print imgs_data[0].features.shape - - print("Training SVM...") - start = cv2.getTickCount() - # Begin training SVM - svm = cv2.ml.SVM_create() - svm.setType(cv2.ml.SVM_C_SVC) - svm.setKernel(cv2.ml.SVM_LINEAR) - svm.setC(2.67) - svm.setGamma(5.383) - - # Compile samples - samples = get_samples(imgs_data) - responses = np.int32([img_data.response for img_data in imgs_data]) - - svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 1000, 1.e-06)) - svm.train(samples, cv2.ml.ROW_SAMPLE, responses) - svm.save(params.SVM_PATH) - - output = svm.predict(samples)[1].ravel() - error = (np.absolute(responses.ravel() - output).sum()) / float(output.shape[0]) - - if error < 0.2: - print "Successfully trained SVM with {}% error".format(error * 100) - else: - print "Failed to train SVM. {}% error".format(error * 100) - print_duration(start) - - print("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start)))) - - -if __name__ == '__main__': - main() diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..4de51d1 --- /dev/null +++ b/utils.py @@ -0,0 +1,331 @@ +################################################################################ + +# functionality: utility functions for BOW and HOG detection algorithms + +# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK +# License: MIT License + +# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW + +################################################################################ + +import os +import numpy as np +import cv2 +import params +import math +import random + +################################################################################ +# global flags to facilitate output of additional info per stage/function + +show_additional_process_information = False; +show_images_as_they_are_loaded = False; +show_images_as_they_are_sampled = False; + +################################################################################ + +# timing information - for training +# - helper function for timing code execution + +def get_elapsed_time(start): + return (cv2.getTickCount() - start) / cv2.getTickFrequency() + + +def format_time(time): + time_str = "" + if time < 60.0: + time_str = "{}s".format(round(time, 1)) + elif time > 60.0: + minutes = time / 60.0 + time_str = "{}m : {}s".format(int(minutes), round(time % 60, 2)) + return time_str + + +def print_duration(start): + time = get_elapsed_time(start) + print(("Took {}".format(format_time(time)))) + +################################################################################ + +# reads all the images in a given folder path and returns the results + +# for obvious reasons this will break with a very large dataset as you will run +# out of memory - so an alternative approach may be required in that case + +def read_all_images(path): + images_path = [os.path.join(path, f) for f in os.listdir(path)] + images = [] + for image_path in images_path: + + # add in a check to skip non jpg or png (lower case) named files + # as some OS (Mac OS!) helpfully creates a Thumbs.db or similar + # when you browse image folders - which then are not images when + # we try to load them + + if (('.png' in image_path) or ('.jpg' in image_path)): + img = cv2.imread(image_path) + images.append(img) + if show_additional_process_information: + print("loading file - ", image_path); + else: + if show_additional_process_information: + print("skipping non PNG/JPG file - ", image_path); + + return images + +################################################################################ + +# stack array of items as basic Pyton data manipulation + +def stack_array(arr): + stacked_arr = np.array([]) + for item in arr: + # Only stack if it is not empty + if len(item) > 0: + if len(stacked_arr) == 0: + stacked_arr = np.array(item) + else: + stacked_arr = np.vstack((stacked_arr, item)) + return stacked_arr + +################################################################################ + +# transform between class numbers (i.e. codes) - {0,1,2, ...N} and +# names {dog,cat cow, ...} - used in training and testing + +def get_class_number(class_name): + return params.DATA_CLASS_NAMES.get(class_name, 0) + +def get_class_name(class_code): + for name, code in params.DATA_CLASS_NAMES.items(): + if code == class_code: + return name + +################################################################################ + +# image data class object that contains the images, descriptors and bag of word +# histograms + +class ImageData(object): + def __init__(self, img): + self.img = img + self.class_name = "" + self.class_number = None + + # use default parameters for construction of HOG + # examples of non-default parameter use here: + # https://www.programcreek.com/python/example/84776/cv2.HOGDescriptor + + self.hog = cv2.HOGDescriptor(); # default is 64 x 128 + self.hog_descriptor = np.array([]) + self.bow_descriptors = np.array([]) + + + def set_class(self, class_name): + self.class_name = class_name + self.class_number = get_class_number(self.class_name) + if show_additional_process_information: + print("class name : ", class_name, " - ", self.class_number); + + def compute_hog_descriptor(self): + + # generate the HOG descriptors for a given image + + img_hog = cv2.resize(self.img, (params.DATA_WINDOW_SIZE[0], params.DATA_WINDOW_SIZE[1]), interpolation = cv2.INTER_AREA) + + self.hog_descriptor = self.hog.compute(img_hog) + + if self.hog_descriptor is None: + self.hog_descriptor = np.array([]) + + if show_additional_process_information: + print("HOG descriptor computed - dimension: ", self.hog_descriptor.shape); + + def compute_bow_descriptors(self): + + # generate the feature descriptors for a given image + + self.bow_descriptors = params.DETECTOR.detectAndCompute(self.img, None)[1] + + if self.bow_descriptors is None: + self.bow_descriptors = np.array([]) + + if show_additional_process_information: + print("# feature descriptors computed - ", len(self.bow_descriptors)); + + def generate_bow_hist(self, dictionary): + self.bow_histogram = np.zeros((len(dictionary), 1)) + + # generate the bow histogram of feature occurance from descriptors + + if (params.BOW_use_ORB_always): + # FLANN matcher with ORB needs dictionary to be uint8 + matches = params.MATCHER.match(self.bow_descriptors, np.uint8(dictionary)); + else: + # FLANN matcher with SIFT/SURF needs descriptors to be type32 + matches = params.MATCHER.match(np.float32(self.bow_descriptors), dictionary) + + for match in matches: + # Get which visual word this descriptor matches in the dictionary + # match.trainIdx is the visual_word + # Increase count for this visual word in histogram (known as hard assignment) + self.bow_histogram[match.trainIdx] += 1 + + # Important! - normalize the histogram to L1 to remove bias for number + # of descriptors per image or class (could use L2?) + + self.bow_histogram = cv2.normalize(self.bow_histogram, None, alpha=1, beta=0, norm_type=cv2.NORM_L1); + +################################################################################ + +# generates a set of random sample patches from a given image of a specified size +# with an optional flag just to train from patches centred around the centre of the image + +def generate_patches(img, sample_patches_to_generate=0, centre_weighted=False, + centre_sampling_offset=10, patch_size=(64,128)): + + patches = []; + + # if no patches specifed just return original image + + if (sample_patches_to_generate == 0): + return [img]; + + # otherwise generate N sub patches + + else: + + # get all heights and widths + + img_height, img_width, _ = img.shape; + patch_height = patch_size[1]; + patch_width = patch_size[0]; + + # iterate to find up to N patches (0 -> N-1) + + for patch_count in range(sample_patches_to_generate): + + # if we are using centre weighted patches, first grab the centre patch + # from the image as the first sample then take the rest around centre + + if (centre_weighted): + + # compute a patch location in centred on the centre of the image + + patch_start_h = math.floor(img_height / 2) - math.floor(patch_height / 2); + patch_start_w = math.floor(img_width / 2) - math.floor(patch_width / 2); + + # for the first sample we'll just keep the centre one, for any + # others take them from the centre position +/- centre_sampling_offset + # in both height and width position + + if (patch_count > 0): + patch_start_h = random.randint(patch_start_h - centre_sampling_offset, patch_start_h + centre_sampling_offset); + patch_start_w = random.randint(patch_start_w - centre_sampling_offset, patch_start_w + centre_sampling_offset); + + # print("centred weighted path") + + # else get patches randonly from anywhere in the image + + else: + + # print("non centred weighted path") + + # randomly select a patch, ensuring we stay inside the image + + patch_start_h = random.randint(0, (img_height - patch_height)); + patch_start_w = random.randint(0, (img_width - patch_width)); + + # add the patch to the list of patches + + patch = img[patch_start_h:patch_start_h + patch_height, patch_start_w:patch_start_w + patch_width] + + if (show_images_as_they_are_sampled): + cv2.imshow("patch", patch); + cv2.waitKey(5); + + patches.insert(patch_count, patch); + + return patches; + +################################################################################ + +# add images from a specified path to the dataset, adding the appropriate class/type name +# and optionally adding up to N samples of a specified size with flags for taking them +# from the centre of the image only with +/- offset in pixels + +def load_image_path(path, class_name, imgs_data, samples=0, centre_weighting=False, centre_sampling_offset=10 ,patch_size=(64,128)): + + # read all images at location + + imgs = read_all_images(path) + + img_count = len(imgs_data) + for img in imgs: + + if (show_images_as_they_are_loaded): + cv2.imshow("example", img); + cv2.waitKey(5); + + # generate up to N sample patches for each sample image + # if zero samples is specified then generate_patches just returns + # the original image (unchanged, unsampled) as [img] + + for img_patch in generate_patches(img, samples, centre_weighting, centre_sampling_offset, patch_size): + + if show_additional_process_information: + print("path: ", path, "class_name: ", class_name, "patch #: ", img_count) + print("patch: ", patch_size, "from centre: ", centre_weighting, "with offset: ", centre_sampling_offset) + + # add each image patch to the data set + + img_data = ImageData(img_patch) + img_data.set_class(class_name) + imgs_data.insert(img_count, img_data) + img_count += 1 + + return imgs_data + +################################################################################ + +# load image data from specified paths + +def load_images(paths, class_names, sample_set_sizes, use_centre_weighting_flags, centre_sampling_offset=10, patch_size=(64,128)): + imgs_data = [] # type: list[ImageData] + + # for each specified path and corresponding class_name and required number + # of samples - add them to the data set + + for path, class_name, sample_count, centre_weighting in zip(paths, class_names, sample_set_sizes, use_centre_weighting_flags): + load_image_path(path, class_name, imgs_data, sample_count, centre_weighting, centre_sampling_offset, patch_size) + + return imgs_data + +################################################################################ + +# return the global set of bow histograms for the data set of images + +def get_bow_histograms(imgs_data): + + samples = stack_array([[img_data.bow_histogram] for img_data in imgs_data]) + return np.float32(samples) + +################################################################################ + +# return the global set of hog descriptors for the data set of images + +def get_hog_descriptors(imgs_data): + + samples = stack_array([[img_data.hog_descriptor] for img_data in imgs_data]) + return np.float32(samples) + +################################################################################ + +# return global the set of numerical class labels for the data set of images + +def get_class_labels(imgs_data): + class_labels = [img_data.class_number for img_data in imgs_data] + return np.int32(class_labels) + +################################################################################