diff --git a/.gitignore b/.gitignore
index 72364f9..3287d2a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
+*.npy
+*.xml
+pedestrain/
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/.idea/PyBOW.iml b/.idea/PyBOW.iml
deleted file mode 100644
index 6711606..0000000
--- a/.idea/PyBOW.iml
+++ /dev/null
@@ -1,11 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index b45b51e..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,4 +0,0 @@
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index cf1bf7a..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/FLANN_histogram.png b/FLANN_histogram.png
deleted file mode 100644
index 64e3c99..0000000
Binary files a/FLANN_histogram.png and /dev/null differ
diff --git a/LICENSE b/LICENSE
index fc465b5..975f47f 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,7 @@
MIT License
-Copyright (c) 2017 nextgensparx
+Copyright (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+Copyright (c) 2017 Sipho Mateke (github: siphomateke)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 6bb2d8b..0da541c 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,97 @@
-# PyBOW
-A Python implementation of the Bag of words algorithm using OpenCV
+# Python Bag of (Visual) Words (BoW) and Histogram of Oriented Gradient (HOG) based Object Detection
-Requires numpy, matplotlib and OpenCV 3.2.0
+An exemplar python implementation of the Bag of (Visual) Words and Histogram of Oriented Gradient (HOG) feature based object detection (BoW or HOG features, SVM classification) using [OpenCV](http://www.opencv.org).
+
+Examples used for teaching within the undergraduate Computer Science programme
+at [Durham University](http://www.durham.ac.uk) (UK) by [Prof. Toby Breckon](https://breckon.org/toby/).
+
+All tested with [OpenCV](http://www.opencv.org) 3.4.x and Python 3.x (requiring numpy also).
+
+----
+
+### OpenCV Setup:
+
+To ensure you have your [OpenCV](http://www.opencv.org) setup correctly to use these in these examples - please follow the suite of testing examples [here](https://github.com/tobybreckon/python-examples-ip/blob/master/TESTING.md).
+
+----
+
+### Details:
+
+You are provided with a set of 7 example files that can be run individually as follows:
+
+- ```hog_training.py``` - performs object detection batch training using Histogram of Oriented Gradients (HOG) and SVM classification.
+
+- ```hog_testing.py``` - performs object detection batch testing using Histogram of Oriented Gradients (HOG) and SVM classification.
+
+- ```hog_detector.py``` - performs object detection via sliding window search using Histogram of Oriented Gradients (HOG) and SVM classification over a directory of specified images.
+
+- ```bow_training.py``` - performs object detection batch training using a bag of visual words (BoW) approach and SVM classification.
+
+- ```bow_testing.py``` - performs object detection batch testing using a bag of visual words (BoW) approach and SVM classification.
+
+- ```bow_detector.py``` - performs object detection via sliding window search using a bag of visual words (BoW) approach and SVM classification over a directory of specified images.
+
+- ```selective_search.py``` - performs selective search to generate object windows as an alternative to sliding window search (generates windows only, does not perform integrated object detection).
+
+and additional supporting code in ```utils.py``` (image loading / feature extraction) and ```sliding_window.py``` (multi-scale sliding window) which are imported into the above.
+
+----
+
+### How to download and run:
+
+Download each file as needed (or download/uncompress a zip from [here](https://github.com/tobybreckon/python-bow-hog-object-detection/archive/master.zip)) or to download the entire repository in an environment where you have git installed try:
+```
+git clone https://github.com/tobybreckon/python-bow-hog-object-detection
+cd python-bow-hog-object-detection
+```
+In order to perform training you will need to first download the dataset, which can be achieved as follows on a linux/unix based system (or can alteratively be downloaded from [here](ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar) - ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar )
+```
+sh ./download-data.sh
+```
+If you run into errors such as _"libpng error: IDAT: invalid distance too far back"_ when running the commands below you may need to also run:
+```
+sh ./fix-pngs.sh
+```
+[Durham Students - just download the data sets from the [DUO](http://duo.dur.ac.uk) to avoid this.]
+
+To perform training of the bag of works approach you can simply run as follows (or alternatively how you run python scripts in your environment):
+```
+python3 ./bow_training.py
+```
+whichs will perform the stages of loading image training set, feature descriptor extraction, k-means clustering and SVM classifier training and output two resulting files ```svm_bow.xml``` (the trained SVM classifier) and ```bow_dictionary.npy``` (the BoW set of visual codewords / cluster centres - known as the dictionary).
+
+To perform batch testing of the bag of works approach you can then simply use (or alternatively ...):
+```
+python3 ./bow_testing.py
+```
+which will load the ```svm_bow.xml``` and ```bow_dictionary.npy``` created from training and report statistical testing performance over an independent set of test images (not used during training).
+
+To perform detection over a set of images you can then simply use (or alternatively ...):
+```
+python3 ./bow_detector.py
+```
+which will again load the ```svm_bow.xml``` and ```bow_dictionary.npy``` created from training but now perform multi-scale sliding window based detection over a set of images in a directory specified by the variable ```directory_to_cycle = "...."``` at the top of this python script file.
+
+The above instructions can be repeated for the set of ```hog_...py``` examples to perform training (to produce a single ```svm_hog.xml``` file), testing and subsequent detection as before.
+
+----
+
+### References
+
+This code base was informed by the research work carried out in the following publications:
+
+- [On using Feature Descriptors as Visual Words for Object Detection within X-ray Baggage Security Screening](https://breckon.org/toby/publications/papers/kundegorski16xray.pdf) (M.E. Kundegorski, S. Akcay, M. Devereux, A. Mouton, T.P. Breckon), In Proc. International Conference on Imaging for Crime Detection and Prevention, IET, pp. 12 (6 .)-12 (6 .)(1), 2016. [[pdf](https://breckon.org/toby/publications/papers/kundegorski16xray.pdf)] [[doi](http://dx.doi.org/10.1049/ic.2016.0080)]
+
+- [Real-time Classification of Vehicle Types within Infra-red Imagery](https://breckon.org/toby/publications/papers/kundegorski16vehicle.pdf) (M.E. Kundegorski, S. Akcay, G. Payen de La Garanderie, T.P. Breckon), In Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence, SPIE, Volume 9995, pp. 1-16, 2016. [[pdf](https://breckon.org/toby/publications/papers/kundegorski16vehicle.pdf)] [[doi](http://dx.doi.org/10.1117/12.2241106)]
+
+- [A Photogrammetric Approach for Real-time 3D Localization and Tracking of Pedestrians in Monocular Infrared Imagery](http://community.dur.ac.uk/toby.breckon/publications/papers/kundegorski14photogrammetric.pdf) (M.E. Kundegorski, T.P. Breckon], In Proc. SPIE Optics and Photonics for Counterterrorism, Crime Fighting and Defence, SPIE, Volume 9253, No. 01, pp. 1-16, 2014. [[pdf](https://breckon.org/toby/publications/papers/kundegorski14photogrammetric.pdf)] [[doi](http://dx.doi.org/10.1117/12.2065673)]
+
+----
+
+**Acknowledgements:** originally forked from an earlier Bag of Visual Words only version at https://github.com/siphomateke/PyBOW with the additional HOG and selective search code added to this newer version.
+
+_[ but it appears some code portions may have broader origins elsewhere, such as from this tutorial - https://www.pyimagesearch.com/2015/03/23/sliding-windows-for-object-detection-with-python-and-opencv/ ]_
+
+**Bugs:** _I do not claim this code to be bug free._ If you find any bugs raise an issue (or much better still submit a git pull request with a fix) - toby.breckon@durham.ac.uk
+
+_"may the source be with you"_ - anon.
diff --git a/bow_detector.py b/bow_detector.py
new file mode 100644
index 0000000..7b35f5c
--- /dev/null
+++ b/bow_detector.py
@@ -0,0 +1,165 @@
+################################################################################
+
+# functionality: perform detection based on Bag of (visual) Words SVM classification
+# using a very basic multi-scale, sliding window (exhaustive search) approach
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin ackowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import cv2
+import os
+import numpy as np
+import math
+import params
+from utils import *
+from sliding_window import *
+
+################################################################################
+
+directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/";
+
+show_scan_window_process = True;
+
+################################################################################
+
+# load dictionary and SVM data
+
+try:
+ dictionary = np.load(params.BOW_DICT_PATH)
+ svm = cv2.ml.SVM_load(params.BOW_SVM_PATH)
+except:
+ print("Missing files - dictionary and/or SVM!");
+ print("-- have you performed training to produce these files ?");
+ exit();
+
+# print some checks
+
+print("dictionary size : ", dictionary.shape)
+print("svm size : ", len(svm.getSupportVectors()))
+print("svm var count : ", svm.getVarCount())
+
+################################################################################
+
+# process all images in directory (sorted by filename)
+
+for filename in sorted(os.listdir(directory_to_cycle)):
+
+ # if it is a PNG file
+
+ if '.png' in filename:
+ print(os.path.join(directory_to_cycle, filename));
+
+ # read image data
+
+ img = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR)
+
+ # make a copy for drawing the output
+
+ output_img = img.copy();
+
+ # for a range of different image scales in an image pyramid
+
+ current_scale = -1
+ detections = []
+ rescaling_factor = 1.25
+
+ ################################ for each re-scale of the image
+
+ for resized in pyramid(img, scale=rescaling_factor):
+
+ # at the start our scale = 1, because we catch the flag value -1
+
+ if current_scale == -1:
+ current_scale = 1
+
+ # after this rescale downwards each time (division by re-scale factor)
+
+ else:
+ current_scale /= rescaling_factor
+
+ rect_img = resized.copy()
+
+ # if we want to see progress show each scale
+
+ if (show_scan_window_process):
+ cv2.imshow('current scale',rect_img)
+ cv2.waitKey(10);
+
+ # loop over the sliding window for each layer of the pyramid (re-sized image)
+
+ window_size = params.DATA_WINDOW_SIZE
+ step = math.floor(resized.shape[0] / 16)
+
+ if step > 0:
+
+ ############################# for each scan window
+
+ for (x, y, window) in sliding_window(resized, window_size, step_size=step):
+
+ # if we want to see progress show each scan window
+
+ if (show_scan_window_process):
+ cv2.imshow('current window',window)
+ key = cv2.waitKey(10) # wait 10ms
+
+ # for each window region get the BoW feature point descriptors
+
+ img_data = ImageData(window)
+ img_data.compute_bow_descriptors()
+
+ # generate and classify each window by constructing a BoW
+ # histogram and passing it through the SVM classifier
+
+ if img_data.bow_descriptors is not None:
+ img_data.generate_bow_hist(dictionary)
+
+ print("detecting with SVM ...")
+
+ retval, [result] = svm.predict(np.float32([img_data.bow_histogram]))
+
+ print(result)
+
+ # if we get a detection, then record it
+
+ if result[0] == params.DATA_CLASS_NAMES["pedestrian"]:
+
+ # store rect as (x1, y1) (x2,y2) pair
+
+ rect = np.float32([x, y, x + window_size[0], y + window_size[1]])
+
+ # if we want to see progress show each detection, at each scale
+
+ if (show_scan_window_process):
+ cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2)
+ cv2.imshow('current scale',rect_img)
+ cv2.waitKey(10)
+
+ rect *= (1.0 / current_scale)
+ detections.append(rect)
+
+ ########################################################
+
+ # For the overall set of detections (over all scales) perform
+ # non maximal suppression (i.e. remove overlapping boxes etc).
+
+ detections = non_max_suppression_fast(np.int32(detections), 0.4)
+
+ # finally draw all the detection on the original image
+
+ for rect in detections:
+ cv2.rectangle(output_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2)
+
+ cv2.imshow('detected objects',output_img)
+ key = cv2.waitKey(40) # wait 40ms
+ if (key == ord('x')):
+ break
+
+# close all windows
+
+cv2.destroyAllWindows()
+
+#####################################################################
diff --git a/bow_test.py b/bow_test.py
new file mode 100644
index 0000000..b9e8f43
--- /dev/null
+++ b/bow_test.py
@@ -0,0 +1,75 @@
+################################################################################
+
+# functionality: perform Bag of (visual) Words (BoW) testing over
+# a specified dataset and compute the resulting prediction/clasification error
+# over that same dataset, using pre-saved the SVM model + BOW dictionary
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import numpy as np
+import cv2
+from utils import *
+
+################################################################################
+
+def main():
+
+ # load up the dictionary and SVM stored from prior training
+
+ try:
+ dictionary = np.load(params.BOW_DICT_PATH)
+ svm = cv2.ml.SVM_load(params.BOW_SVM_PATH)
+ except:
+ print("Missing files - dictionary and/or SVM!");
+ print("-- have you performed training to produce these files ?");
+ exit();
+
+ # load ** testing ** data sets in the same class order as training
+ # (here we perform no patch sampling of the data as we are not training
+ # hence [0,0] sample sizes and [False,False] centre weighting flags)
+
+ print("Loading test data as a batch ...")
+
+ paths = [params.DATA_testing_path_neg, params.DATA_testing_path_pos]
+ use_centre_weighting = [False, False];
+ class_names = params.DATA_CLASS_NAMES
+ imgs_data = load_images(paths, class_names, [0,0], use_centre_weighting)
+
+ print("Computing descriptors...") # for each testing image
+ start = cv2.getTickCount()
+ [img_data.compute_bow_descriptors() for img_data in imgs_data]
+ print_duration(start)
+
+ print("Generating histograms...") # for each testing image
+ start = cv2.getTickCount()
+ [img_data.generate_bow_hist(dictionary) for img_data in imgs_data]
+ print_duration(start)
+
+ # get the example/sample bow histograms and class labels
+
+ samples, class_labels = get_bow_histograms(imgs_data), get_class_labels(imgs_data)
+
+ # perform batch SVM classification over the whole set
+
+ print("Performing batch SVM classification over all data ...")
+
+ results = svm.predict(samples)
+ output = results[1].ravel()
+
+ # compute and report the error over the whole set
+
+ error = ((np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0]))
+ print("Successfully trained SVM with {}% testing set error".format(round(error * 100,2)))
+ print("-- meaining the SVM got {}% of the testing examples correct!".format(round((1.0 - error) * 100,2)))
+
+################################################################################
+
+if __name__ == "__main__":
+ main()
+
+################################################################################
diff --git a/bow_train.py b/bow_train.py
new file mode 100644
index 0000000..dada084
--- /dev/null
+++ b/bow_train.py
@@ -0,0 +1,158 @@
+################################################################################
+
+# functionality: perform all stages of Bag of (visual) Words (BoW) training over
+# a specified dataset and compute the resulting prediction/clasification error
+# over that same dataset, having saved the SVM model to file for subsequent re-use
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import cv2
+from utils import *
+
+################################################################################
+
+def generate_dictionary(imgs_data, dictionary_size):
+
+ # Extracting descriptors
+ desc = stack_array([img_data.bow_descriptors for img_data in imgs_data])
+
+ # important, cv2.kmeans() clustering only accepts type32 descriptors
+
+ desc = np.float32(desc)
+
+ # perform clustering - increase iterations and reduce EPS to change performance
+
+ criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, params.BOW_clustering_iterations, 0.01)
+ flags = cv2.KMEANS_PP_CENTERS
+
+ # desc is a type32 numpy array of vstacked descriptors
+
+ compactness, labels, dictionary = cv2.kmeans(desc, dictionary_size, None, criteria, 1, flags)
+ np.save(params.BOW_DICT_PATH, dictionary)
+
+ return dictionary
+
+################################################################################
+
+def main():
+
+ ############################################################################
+ # load our training data set of images examples
+
+ program_start = cv2.getTickCount()
+
+ print("Loading images...")
+ start = cv2.getTickCount()
+
+ # N.B. specify data path names in same order as class names (neg, pos)
+
+ paths = [params.DATA_training_path_neg, params.DATA_training_path_pos]
+
+ # build a lisyt of class names automatically from our dictionary of class (name,number) pairs
+
+ class_names = [get_class_name(class_number) for class_number in range(len(params.DATA_CLASS_NAMES))]
+
+ # specify number of sub-window samples to take from each positive and negative
+ # example image in the data set
+ # N.B. specify in same order as class names (neg, pos) - again
+
+ sampling_sizes = [params.DATA_training_sample_count_neg, params.DATA_training_sample_count_pos]
+
+ # do we want to take samples only centric to the example image or ramdonly?
+ # No - for background -ve images (first class)
+ # Yes - for object samples +ve images (second class)
+
+ sample_from_centre = [False, True];
+
+ # perform image loading
+
+ imgs_data = load_images(paths, class_names, sampling_sizes, sample_from_centre,
+ params.DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES, params.DATA_WINDOW_SIZE);
+
+ print(("Loaded {} image(s)".format(len(imgs_data))))
+ print_duration(start)
+
+ ############################################################################
+ # perform bag of visual words feature construction
+
+ print("Computing descriptors...") # for each training image
+ start = cv2.getTickCount()
+ [img_data.compute_bow_descriptors() for img_data in imgs_data]
+ print_duration(start)
+
+ print("Clustering...") # over all images to generate dictionary code book/words
+ start = cv2.getTickCount()
+ dictionary = generate_dictionary(imgs_data, params.BOW_dictionary_size)
+ print_duration(start)
+
+ print("Generating histograms...") # for each training image
+ start = cv2.getTickCount()
+ [img_data.generate_bow_hist(dictionary) for img_data in imgs_data]
+ print_duration(start)
+
+ ############################################################################
+ # train an SVM based on these norm_features
+
+ print("Training SVM...")
+ start = cv2.getTickCount()
+
+ # define SVM parameters
+
+ svm = cv2.ml.SVM_create()
+ svm.setType(cv2.ml.SVM_C_SVC) # change this for multi-class
+ svm.setKernel(params.BOW_SVM_kernel) # use specific kernel type (alteratives exist)
+
+ # compile samples (i.e. visual word histograms) for each training image
+
+ samples = get_bow_histograms(imgs_data)
+
+ # get class label for each training image
+
+ class_labels = get_class_labels(imgs_data);
+
+ # specify the termination criteria for the SVM training
+
+ svm.setTermCriteria((cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, params.BOW_SVM_max_training_iterations, 1.e-06))
+
+ # perform auto training for the SVM which will essentially perform grid
+ # search over the set of parameters for the chosen kernel and the penalty
+ # cost term, C (N.B. trainAuto() syntax is correct as of OpenCV 3.4.x)
+
+ svm.trainAuto(samples, cv2.ml.ROW_SAMPLE, class_labels, kFold = 10, balanced = True);
+
+ # save the tained SVM to file so that we can load it again for testing / detection
+
+ svm.save(params.BOW_SVM_PATH)
+
+ ############################################################################
+ # measure performance of the SVM trained on the bag of visual word features
+
+ # perform prediction over the set of examples we trained over
+
+ output = svm.predict(samples)[1].ravel()
+ error = (np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0])
+
+ # we are succesful if our prediction > than random
+ # e.g. for 2 class labels this would be 1/2 = 0.5 (i.e. 50%)
+
+ if error < (1.0 / len(params.DATA_CLASS_NAMES)):
+ print("Trained SVM obtained {}% training set error".format(round(error * 100,2)))
+ print("-- meaining the SVM got {}% of the training examples correct!".format(round((1.0 - error) * 100,2)))
+ else:
+ print("Failed to train SVM. {}% error".format(round(error * 100,2)))
+
+ print_duration(start)
+
+ print(("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start)))))
+
+################################################################################
+
+if __name__ == '__main__':
+ main()
+
+################################################################################
diff --git a/bowutils.py b/bowutils.py
deleted file mode 100644
index eefef73..0000000
--- a/bowutils.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import os
-import numpy as np
-import cv2
-import params
-
-
-def get_elapsed_time(start):
- """ Helper function for timing code execution"""
- return (cv2.getTickCount() - start) / cv2.getTickFrequency()
-
-
-def format_time(time):
- time_str = ""
- if time < 60.0:
- time_str = "{}s".format(round(time, 1))
- elif time > 60.0:
- minutes = time / 60.0
- time_str = "{}m : {}s".format(int(minutes), round(time % 60, 2))
- return time_str
-
-
-def print_duration(start):
- time = get_elapsed_time(start)
- print("Took {}".format(format_time(time)))
-
-
-def resize_img(img, width=-1, height=-1):
- if height == -1 and width == -1:
- raise TypeError("Invalid arguments. Width or height must be provided.")
- h = img.shape[0]
- w = img.shape[1]
- if height == -1:
- aspect_ratio = float(w) / h
- new_height = int(width / aspect_ratio)
- return cv2.resize(img, (width, new_height))
- elif width == -1:
- aspect_ratio = h / float(w)
- new_width = int(height / aspect_ratio)
- return cv2.resize(img, (new_width, height))
-
-
-def imreads(path):
- """
- This reads all the images in a given folder and returns the results
- """
- images_path = [os.path.join(path, f) for f in os.listdir(path)]
- images = []
- for image_path in images_path:
- img = cv2.imread(image_path)
- images.append(img)
- return images
-
-
-def stack_array(arr):
- stacked_arr = np.array([])
- for item in arr:
- # Only stack if it is not empty
- if len(item) > 0:
- if len(stacked_arr) == 0:
- stacked_arr = np.array(item)
- else:
- stacked_arr = np.vstack((stacked_arr, item))
- return stacked_arr
-
-
-def get_descriptors(img):
- # returns descriptors of an image
- return params.DETECTOR.detectAndCompute(img, None)[1]
-
-
-def get_class_code(class_name):
- return params.CLASS_NAMES.get(class_name, 0)
-
-
-def get_class_name(class_code):
- for name, code in params.CLASS_NAMES.iteritems():
- if code == class_code:
- return name
-
-
-class ImageData(object):
- def __init__(self, img):
- self.img = img
- self.class_name = ""
- self.response = None
- self.descriptors = np.array([])
-
- def set_class(self, class_name):
- self.class_name = class_name
- self.response = get_class_code(self.class_name)
-
- def compute_descriptors(self):
- self.descriptors = get_descriptors(self.img)
- if self.descriptors is None:
- self.descriptors = np.array([])
-
- def hog(self):
- gx = cv2.Sobel(self.img, cv2.CV_32F, 1, 0)
- gy = cv2.Sobel(self.img, cv2.CV_32F, 0, 1)
- mag, ang = cv2.cartToPolar(gx, gy)
- bins = np.int32(params.HOG_BIN_N * ang / (2 * np.pi)) # quantizing binvalues in (0...16)
- bin_cells = bins[:10, :10], bins[10:, :10], bins[:10, 10:], bins[10:, 10:]
- mag_cells = mag[:10, :10], mag[10:, :10], mag[:10, 10:], mag[10:, 10:]
- hists = [np.bincount(b.ravel(), m.ravel(), params.HOG_BIN_N) for b, m in zip(bin_cells, mag_cells)]
- hist = np.hstack(hists) # hist is a 64 bit vector
- return hist
-
- def generate_bow_hist(self, dictionary):
- self.features = np.zeros((len(dictionary), 1))
- # FLANN matcher needs descriptors to be type32
- matches = params.MATCHER.match(np.float32(self.descriptors), dictionary)
- for match in matches:
- # Get which visual word this descriptor matches in the dictionary
- # match.trainIdx is the visual_word
- # Increase count for this visual word in histogram
- self.features[match.trainIdx] += 1
-
-
-def add_to_imgs_data(path, class_name, imgs_data):
- imgs = imreads(path)
-
- img_count = len(imgs_data)
- for img in imgs:
- if img.shape[0] > params.MAX_IMG_WIDTH:
- img = resize_img(img, params.MAX_IMG_WIDTH)
- img_data = ImageData(img)
- img_data.set_class(class_name)
- imgs_data.insert(img_count, img_data)
- img_count += 1
-
- return imgs_data
-
-
-def get_imgs_data(paths, class_names, dictionary=None):
- imgs_data = [] # type: list[ImageData]
-
- for path, class_name in zip(paths, class_names):
- add_to_imgs_data(path, class_name, imgs_data)
-
- [img_data.compute_descriptors() for img_data in imgs_data]
- if dictionary is not None:
- [img_data.generate_bow_hist(dictionary) for img_data in imgs_data]
-
- return imgs_data
-
-
-def get_samples(imgs_data):
- # Important! Normalize histograms to remove bias for number of descriptors
- norm_features = [cv2.normalize(img_data.features, None, 0, len(img_data.features), cv2.NORM_MINMAX) for img_data in
- imgs_data]
- samples = stack_array([[feature] for feature in norm_features])
- # samples = stack_array([[img_data.features] for img_data in imgs_data])
- return np.float32(samples)
-
-
-def get_responses(imgs_data):
- responses = [img_data.response for img_data in imgs_data]
- return np.int32(responses)
diff --git a/classify.py b/classify.py
deleted file mode 100644
index 83dd066..0000000
--- a/classify.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import numpy as np
-import cv2
-from bowutils import *
-from matplotlib import pyplot as plt
-
-def main():
- # Load up the dictionary
- dictionary = np.load(params.DICT_PATH)
-
- paths = ["test/pos", "test/neg"]
- class_names = ["pos", "neg"]
- imgs_data = get_imgs_data(paths, class_names, dictionary)
-
- samples, responses = get_samples(imgs_data), get_responses(imgs_data)
-
- svm = cv2.ml.SVM_load(params.SVM_PATH)
- results = svm.predict(samples)
- output = results[1].ravel()
-
- error = ((np.absolute(responses.ravel() - output).sum()) / float(output.shape[0])) * 100
- print "Error in test data: {}%".format(error)
-
- for i in xrange(len(imgs_data)):
- img_data = imgs_data[i]
-
- title = "Prediction: {0}".format(output[i])
- plt.axis("off")
- plt.imshow(cv2.cvtColor(img_data.img, cv2.COLOR_BGR2RGB))
- plt.suptitle(title)
- plt.draw()
- plt.waitforbuttonpress(0) # this will wait for indefinite time
- plt.clf()
-
-
-if __name__ == "__main__":
- main()
diff --git a/download-data.sh b/download-data.sh
new file mode 100644
index 0000000..69cf111
--- /dev/null
+++ b/download-data.sh
@@ -0,0 +1,86 @@
+################################################################################
+
+# simple data downloader / unpacker - (c) 2018 Toby Breckon, Durham University, UK
+
+################################################################################
+
+# set this script to fail on error
+
+set -e
+
+# check for required commands to download and md5 check
+
+(command -v curl | grep curl > /dev/null) ||
+ (echo "Error: curl command not found, cannot download!")
+
+(command -v md5sum | grep md5sum > /dev/null) ||
+ (echo "Error: md5sum command not found, md5sum check will fail!")
+
+################################################################################
+
+STARTING_DIR=`pwd`
+
+################################################################################
+
+## INRIA Pedestrian Dataset
+
+################################################################################
+
+URL_PERSON=ftp://ftp.inrialpes.fr/pub/lear/douze/data/INRIAPerson.tar
+
+DIR_LOCAL_TARGET_PERSON=/tmp/pedestrian
+
+PERSON_FILE_NAME=INRIAPerson.tar
+
+DIR_NAME_UNPACKED=INRIAPerson
+PERSON_MD5_SUM=6af009c6386c86f78f77e81003df84dc
+
+################################################################################
+
+# perform download
+
+echo "Downloading pedestrian data set models..."
+
+mkdir -p $DIR_LOCAL_TARGET_PERSON
+
+TARGET=$DIR_LOCAL_TARGET_PERSON/$PERSON_FILE_NAME
+
+curl --progress-bar $URL_PERSON > $TARGET
+
+################################################################################
+
+# perform md5 check and move to required local target directory
+
+cd $DIR_LOCAL_TARGET_PERSON
+
+echo "checking the MD5 checksum for downloaded data ..."
+
+CHECK_SUM_CHECKPOINTS="$PERSON_MD5_SUM $PERSON_FILE_NAME"
+
+echo $CHECK_SUM_CHECKPOINTS | md5sum -c
+
+echo "Unpacking the tar file..."
+
+tar -xvf $PERSON_FILE_NAME
+
+chmod -R +w $DIR_LOCAL_TARGET_PERSON
+
+echo "Tidying up..."
+
+ln -s $DIR_LOCAL_TARGET_PERSON $STARTING_DIR/pedestrian
+
+# mv $DIR_NAME_UNPACKED/* .
+
+rm $TARGET # && rm -r $DIR_NAME_UNPACKED
+
+################################################################################
+
+echo "... completed -> required pedestrian data in $DIR_LOCAL_TARGET_PERSON/"
+
+################################################################################
+
+# reset
+
+cd $STARTING_DIRPERSON
+
+################################################################################
diff --git a/fix_pngs.sh b/fix_pngs.sh
new file mode 100644
index 0000000..7fc0868
--- /dev/null
+++ b/fix_pngs.sh
@@ -0,0 +1,41 @@
+
+################################################################################
+
+# simple png file fixer script using pngcrush
+
+# (c) 2018 Toby Breckon, Durham University, UK
+
+################################################################################
+# check for command line argument
+
+if (test $# -ne 1)
+then
+ echo "usage: sh ./fix_pngs.sh /path/to/dataset/files"
+ exit 1
+fi
+
+################################################################################
+
+# set this script to fail on error
+
+set -e
+
+################################################################################
+# check for required commands to perform fix
+
+(command -v pngcrush | grep pngcrush > /dev/null) ||
+ (echo "Error: pngcrush command not found, cannot fix!";
+ echo "install from your package manager or from https://pmt.sourceforge.io/pngcrush/";
+ exit 1)
+
+################################################################################
+# go the right place to write
+
+cd $1
+
+################################################################################
+# perform fix in place
+
+for i in `find * | grep png`; do pngcrush -fix -force $i tmp.png; mv tmp.png $i;done
+
+################################################################################
diff --git a/hog_detector.py b/hog_detector.py
new file mode 100644
index 0000000..5573f87
--- /dev/null
+++ b/hog_detector.py
@@ -0,0 +1,162 @@
+################################################################################
+
+# functionality: perform detection based on HOG feature descriptor / SVM classification
+# using a very basic multi-scale, sliding window (exhaustive search) approach
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Minor portions: based on fork from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import cv2
+import os
+import numpy as np
+import math
+import params
+from utils import *
+from sliding_window import *
+
+################################################################################
+
+directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/";
+
+show_scan_window_process = True;
+
+################################################################################
+
+# load SVM from file
+
+try:
+ svm = cv2.ml.SVM_load(params.HOG_SVM_PATH)
+except:
+ print("Missing files - SVM!");
+ print("-- have you performed training to produce these files ?");
+ exit();
+
+# print some checks
+
+print("svm size : ", len(svm.getSupportVectors()))
+print("svm var count : ", svm.getVarCount())
+
+################################################################################
+
+# process all images in directory (sorted by filename)
+
+for filename in sorted(os.listdir(directory_to_cycle)):
+
+ # if it is a PNG file
+
+ if '.png' in filename:
+ print(os.path.join(directory_to_cycle, filename));
+
+ # read image data
+
+ img = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR)
+
+ # make a copy for drawing the output
+
+ output_img = img.copy();
+
+ # for a range of different image scales in an image pyramid
+
+ current_scale = -1
+ detections = []
+ rescaling_factor = 1.25
+
+ ################################ for each re-scale of the image
+
+ for resized in pyramid(img, scale=rescaling_factor):
+
+ # at the start our scale = 1, because we catch the flag value -1
+
+ if current_scale == -1:
+ current_scale = 1
+
+ # after this rescale downwards each time (division by re-scale factor)
+
+ else:
+ current_scale /= rescaling_factor
+
+ rect_img = resized.copy()
+
+ # if we want to see progress show each scale
+
+ if (show_scan_window_process):
+ cv2.imshow('current scale',rect_img)
+ cv2.waitKey(10);
+
+ # loop over the sliding window for each layer of the pyramid (re-sized image)
+
+ window_size = params.DATA_WINDOW_SIZE
+ step = math.floor(resized.shape[0] / 16)
+
+ if step > 0:
+
+ ############################# for each scan window
+
+ for (x, y, window) in sliding_window(resized, window_size, step_size=step):
+
+ # if we want to see progress show each scan window
+
+ if (show_scan_window_process):
+ cv2.imshow('current window',window)
+ key = cv2.waitKey(10) # wait 10ms
+
+ # for each window region get the BoW feature point descriptors
+
+ img_data = ImageData(window)
+ img_data.compute_hog_descriptor();
+
+ # generate and classify each window by constructing a BoW
+ # histogram and passing it through the SVM classifier
+
+ if img_data.hog_descriptor is not None:
+
+ print("detecting with SVM ...")
+
+ retval, [result] = svm.predict(np.float32([img_data.hog_descriptor]))
+
+ print(result)
+
+ # if we get a detection, then record it
+
+ if result[0] == params.DATA_CLASS_NAMES["pedestrian"]:
+
+ # store rect as (x1, y1) (x2,y2) pair
+
+ rect = np.float32([x, y, x + window_size[0], y + window_size[1]])
+
+ # if we want to see progress show each detection, at each scale
+
+ if (show_scan_window_process):
+ cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2)
+ cv2.imshow('current scale',rect_img)
+ cv2.waitKey(40)
+
+ rect *= (1.0 / current_scale)
+ detections.append(rect)
+
+ ########################################################
+
+ # For the overall set of detections (over all scales) perform
+ # non maximal suppression (i.e. remove overlapping boxes etc).
+
+ detections = non_max_suppression_fast(np.int32(detections), 0.4)
+
+ # finally draw all the detection on the original image
+
+ for rect in detections:
+ cv2.rectangle(output_img, (rect[0], rect[1]), (rect[2], rect[3]), (0, 0, 255), 2)
+
+ cv2.imshow('detected objects',output_img)
+ key = cv2.waitKey(200) # wait 200ms
+ if (key == ord('x')):
+ break
+
+# close all windows
+
+cv2.destroyAllWindows()
+
+#####################################################################
diff --git a/hog_test.py b/hog_test.py
new file mode 100644
index 0000000..2930208
--- /dev/null
+++ b/hog_test.py
@@ -0,0 +1,70 @@
+################################################################################
+
+# functionality: perform HOG/SVM testing over a specified dataset and compute the
+# resulting prediction/clasification error over that same dataset, using
+# pre-saved the SVM model trained on HOG feature descriptors
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Minor portions: based on fork from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import numpy as np
+import cv2
+from utils import *
+
+################################################################################
+
+def main():
+
+ # load up the SVM stored from prior training
+
+ try:
+ svm = cv2.ml.SVM_load(params.HOG_SVM_PATH)
+ except:
+ print("Missing files SVM");
+ print("-- have you performed training to produce this file ?");
+ exit();
+
+ # load ** testing ** data sets in the same class order as training
+ # (here we perform patch sampling only from the centre of the +ve
+ # class and only a single sample is taken
+ # hence [0,0] sample sizes and [False,True] centre weighting flags)
+
+ print("Loading test data as a batch ...")
+
+ paths = [params.DATA_testing_path_neg, params.DATA_testing_path_pos]
+ use_centre_weighting = [False, True];
+ class_names = params.DATA_CLASS_NAMES
+ imgs_data = load_images(paths, class_names, [0,0], use_centre_weighting)
+
+ print("Computing HOG descriptors...") # for each testing image
+ start = cv2.getTickCount()
+ [img_data.compute_hog_descriptor() for img_data in imgs_data]
+ print_duration(start)
+
+ # get the example/sample HOG descriptors and class labels
+
+ samples, class_labels = get_hog_descriptors(imgs_data), get_class_labels(imgs_data)
+
+ # perform batch SVM classification over the whole set
+
+ print("Performing batch SVM classification over all data ...")
+
+ results = svm.predict(samples)
+ output = results[1].ravel()
+
+ # compute and report the error over the whole set
+
+ error = ((np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0]))
+ print("Successfully trained SVM with {}% testing set error".format(round(error * 100,2)))
+ print("-- meaining the SVM got {}% of the testing examples correct!".format(round((1.0 - error) * 100,2)))
+
+################################################################################
+
+if __name__ == "__main__":
+ main()
+
+################################################################################
diff --git a/hog_train.py b/hog_train.py
new file mode 100644
index 0000000..c7fdc24
--- /dev/null
+++ b/hog_train.py
@@ -0,0 +1,125 @@
+################################################################################
+
+# functionality: perform all stages of HOG/SVM training over
+# a specified dataset and compute the resulting prediction/clasification error
+# over that same dataset, having saved the SVM model to file for subsequent re-use
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Minor portions: based on fork from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import cv2
+from utils import *
+
+################################################################################
+
+def main():
+
+ ############################################################################
+ # load our training data set of images examples
+
+ program_start = cv2.getTickCount()
+
+ print("Loading images...")
+ start = cv2.getTickCount()
+
+ # N.B. specify data path names in same order as class names (neg, pos)
+
+ paths = [params.DATA_training_path_neg, params.DATA_training_path_pos]
+
+ # build a lisyt of class names automatically from our dictionary of class (name,number) pairs
+
+ class_names = [get_class_name(class_number) for class_number in range(len(params.DATA_CLASS_NAMES))]
+
+ # specify number of sub-window samples to take from each positive and negative
+ # example image in the data set
+ # N.B. specify in same order as class names (neg, pos) - again
+
+ sampling_sizes = [params.DATA_training_sample_count_neg, params.DATA_training_sample_count_pos]
+
+ # do we want to take samples only centric to the example image or ramdonly?
+ # No - for background -ve images (first class)
+ # Yes - for object samples +ve images (second class)
+
+ sample_from_centre = [False, True];
+
+ # perform image loading
+
+ imgs_data = load_images(paths, class_names, sampling_sizes, sample_from_centre,
+ params.DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES, params.DATA_WINDOW_SIZE);
+
+ print(("Loaded {} image(s)".format(len(imgs_data))))
+ print_duration(start)
+
+ ############################################################################
+ # perform HOG feature extraction
+
+ print("Computing HOG descriptors...") # for each training image
+ start = cv2.getTickCount()
+ [img_data.compute_hog_descriptor() for img_data in imgs_data]
+ print_duration(start)
+
+ ############################################################################
+ # train an SVM based on these norm_features
+
+ print("Training SVM...")
+ start = cv2.getTickCount()
+
+ # define SVM parameters
+
+ svm = cv2.ml.SVM_create()
+ svm.setType(cv2.ml.SVM_C_SVC) # change this for multi-class
+ svm.setKernel(params.HOG_SVM_kernel) # use specific kernel type (alteratives exist)
+
+ # compile samples (i.e. visual word histograms) for each training image
+
+ samples = get_hog_descriptors(imgs_data)
+
+ # get class label for each training image
+
+ class_labels = get_class_labels(imgs_data);
+
+ # specify the termination criteria for the SVM training
+
+ svm.setTermCriteria((cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_COUNT, params.HOG_SVM_max_training_iterations, 1.e-06))
+
+ # perform auto training for the SVM which will essentially perform grid
+ # search over the set of parameters for the chosen kernel and the penalty
+ # cost term, C (N.B. trainAuto() syntax is correct as of OpenCV 3.4.x)
+
+ svm.trainAuto(samples, cv2.ml.ROW_SAMPLE, class_labels, kFold = 10, balanced = True);
+
+ # save the tained SVM to file so that we can load it again for testing / detection
+
+ svm.save(params.HOG_SVM_PATH)
+
+ ############################################################################
+ # measure performance of the SVM trained on the bag of visual word features
+
+ # perform prediction over the set of examples we trained over
+
+ output = svm.predict(samples)[1].ravel()
+ error = (np.absolute(class_labels.ravel() - output).sum()) / float(output.shape[0])
+
+ # we are succesful if our prediction > than random
+ # e.g. for 2 class labels this would be 1/2 = 0.5 (i.e. 50%)
+
+ if error < (1.0 / len(params.DATA_CLASS_NAMES)):
+ print("Trained SVM obtained {}% training set error".format(round(error * 100,2)))
+ print("-- meaining the SVM got {}% of the training examples correct!".format(round((1.0 - error) * 100,2)))
+ else:
+ print("Failed to train SVM. {}% error".format(round(error * 100,2)))
+
+ print_duration(start)
+
+ print(("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start)))))
+
+################################################################################
+
+if __name__ == '__main__':
+ main()
+
+################################################################################
diff --git a/label_histogram.png b/label_histogram.png
deleted file mode 100644
index 5c4f4cb..0000000
Binary files a/label_histogram.png and /dev/null differ
diff --git a/params.py b/params.py
index 2f87baa..d4bcfd3 100644
--- a/params.py
+++ b/params.py
@@ -1,20 +1,127 @@
+################################################################################
+
+# functionality: parameter settings for detection algorithm training/testing
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
import cv2
+import os
+
+################################################################################
+# settings for datsets in general
+
+master_path_to_dataset = "/tmp/pedestrian"; # ** need to edit this **
+
+# data location - training examples
+
+DATA_training_path_neg = os.path.join(master_path_to_dataset,"INRIAPerson/Train/neg/");
+DATA_training_path_pos = os.path.join(master_path_to_dataset,"INRIAPerson/train_64x128_H96/pos/");
+
+# data location - testing examples
+
+DATA_testing_path_neg = os.path.join(master_path_to_dataset,"INRIAPerson/Test/neg/");
+DATA_testing_path_pos = os.path.join(master_path_to_dataset,"INRIAPerson/test_64x128_H96/pos/");
-MAX_IMG_WIDTH = 320
-SVM_PATH = "ml/svm.xml"
-DICT_PATH = "ml/dictionary.npy"
-CLASS_NAMES = {
- "pos": 0,
- "neg": 1
+# size of the sliding window patch / image patch to be used for classification
+# (for larger windows sizes, for example from selective search - resize the
+# window to this size before feature descriptor extraction / classification)
+
+DATA_WINDOW_SIZE = [64, 128];
+
+# the maximum left/right, up/down offset to use when generating samples for training
+# that are centred around the centre of the image
+
+DATA_WINDOW_OFFSET_FOR_TRAINING_SAMPLES = 3;
+
+# number of sample patches to extract from each negative training example
+
+DATA_training_sample_count_neg = 10;
+
+# number of sample patches to extract from each positive training example
+
+DATA_training_sample_count_pos = 5;
+
+# class names - N.B. ordering of 0, 1 for neg/pos = order of paths
+
+DATA_CLASS_NAMES = {
+ "other": 0,
+ "pedestrian": 1
}
-# algorithm = FLANN_INDEX_KDTREE
-_index_params = dict(algorithm=0, trees=5)
-_search_params = dict(checks=50)
+################################################################################
+# settings for BOW - Bag of (visual) Word - approaches
+
+BOW_SVM_PATH = "svm_bow.xml"
+BOW_DICT_PATH = "bow_dictionary.npy"
+
+BOW_dictionary_size = 512; # in general, larger = better performance, but potentially slower
+BOW_SVM_kernel = cv2.ml.SVM_RBF; # see opencv manual for other options
+BOW_SVM_max_training_iterations = 500; # stop training after max iterations
+
+BOW_clustering_iterations = 20; # reduce to improve speed, reduce quality
+
+BOW_fixed_feature_per_image_to_use = 100; # reduce to improve speed, set to 0 for variable number
+
+# specify the type of feature points to use])
+# -- refer to the OpenCV manual for options here, by default this is set to work on
+# --- all systems "out of the box" rather than using the best available option
+
+BOW_use_ORB_always = False; # set to True to always use ORB over SIFT where available
+
+try:
+
+ if BOW_use_ORB_always:
+ print("Forced used of ORB features, not SIFT")
+ raise Exception('force use of ORB')
+
+ DETECTOR = cv2.xfeatures2d.SIFT_create(nfeatures=BOW_fixed_feature_per_image_to_use) # -- requires extra modules and non-free build flag
+ # DETECTOR = cv2.xfeatures2d.SURF_create(nfeatures=BOW_fixed_feature_per_image_to_use) # -- requires extra modules and non-free build flag
+
+ # as SIFT/SURF feature descriptors are floating point use KD_TREE approach
+
+ _algorithm = 0 # FLANN_INDEX_KDTREE
+ _index_params = dict(algorithm=_algorithm, trees=5)
+ _search_params = dict(checks=50)
+
+except:
+
+ DETECTOR = cv2.ORB_create(nfeatures=BOW_fixed_feature_per_image_to_use) # check these params
+
+ #if using ORB points
+ # taken from: https://docs.opencv.org/3.3.0/dc/dc3/tutorial_py_matcher.html
+ # N.B. "commented values are recommended as per the docs,
+ # but it didn't provide required results in some cases"
+
+ # as SIFT/SURF feature descriptors are integer use HASHING approach
+
+ _algorithm = 6 # FLANN_INDEX_LSH
+ _index_params= dict(algorithm = _algorithm,
+ table_number = 6, # 12
+ key_size = 12, # 20
+ multi_probe_level = 1) #2
+ _search_params = dict(checks=50)
+
+ if (not(BOW_use_ORB_always)):
+ print("Falling back to using features: ", DETECTOR.__class__())
+ BOW_use_ORB_always = True; # set this as a flag we can check later which data type to uses
+
+print("For BOW - features in use are: ", DETECTOR.__class__(), "(ignore for HOG)")
+
+# based on choice and availability of feature points, set up KD-tree matcher
MATCHER = cv2.FlannBasedMatcher(_index_params, _search_params)
-DETECTOR = cv2.AKAZE_create()
-#DETECTOR = cv2.KAZE_create()
-#DETECTOR = cv2.ORB_create(nfeatures=100000, scoreType=cv2.ORB_FAST_SCORE)
-HOG_BIN_N = 16
+################################################################################
+# settings for HOG approaches
+
+HOG_SVM_PATH = "svm_hog.xml"
+
+HOG_SVM_kernel = cv2.ml.SVM_LINEAR; # see opencv manual for other options
+HOG_SVM_max_training_iterations = 500; # stop training after max iterations
+
+################################################################################
diff --git a/selective_search.py b/selective_search.py
new file mode 100644
index 0000000..fd0a218
--- /dev/null
+++ b/selective_search.py
@@ -0,0 +1,122 @@
+#####################################################################
+
+# Example : performs selective search bounding box identification
+
+# Author : Toby Breckon, toby.breckon@durham.ac.uk
+# Copyright (c) 2018 Department of Computer Science, Durham University, UK
+
+# License: MIT License
+
+# ackowledgements: based on the code and examples presented at:
+# https://www.learnopencv.com/selective-search-for-object-detection-cpp-python/
+
+#####################################################################
+
+import cv2
+import os
+import sys
+import math
+import numpy as np
+
+#####################################################################
+
+# press all the go-faster buttons - i.e. speed-up using multithreads
+
+cv2.setUseOptimized(True);
+cv2.setNumThreads(4);
+
+#####################################################################
+
+directory_to_cycle = "pedestrian/INRIAPerson/Test/pos/" # edit this
+
+#####################################################################
+
+# create Selective Search Segmentation Object using default parameters
+
+ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
+
+#####################################################################
+
+# loop all images in directory (sorted by filename)
+
+for filename in sorted(os.listdir(directory_to_cycle)):
+
+ # if it is a PNG file
+
+ if '.png' in filename:
+ print(os.path.join(directory_to_cycle, filename));
+
+ # read image from file
+
+ frame = cv2.imread(os.path.join(directory_to_cycle, filename), cv2.IMREAD_COLOR)
+
+ # start a timer (to see how long processing and display takes)
+
+ start_t = cv2.getTickCount();
+
+ # set input image on which we will run segmentation
+
+ ss.setBaseImage(frame)
+
+ # Switch to fast but low recall Selective Search method
+ ss.switchToSelectiveSearchFast()
+
+ # Switch to high recall but slow Selective Search method (slower)
+ # ss.switchToSelectiveSearchQuality()
+
+ # run selective search segmentation on input image
+ rects = ss.process()
+ print('Total Number of Region Proposals: {}'.format(len(rects)))
+
+ # number of region proposals to show
+ numShowRects = 100
+
+ # iterate over all the region proposals
+ for i, rect in enumerate(rects):
+ # draw rectangle for region proposal till numShowRects
+ if (i < numShowRects):
+ x, y, w, h = rect
+ cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 1, cv2.LINE_AA)
+ else:
+ break
+
+ # display image
+
+ cv2.imshow('Selective Search - Object Region Proposals', frame);
+
+ # stop the timer and convert to ms. (to see how long processing and display takes)
+
+ stop_t = ((cv2.getTickCount() - start_t)/cv2.getTickFrequency()) * 1000;
+
+ print('Processing time (ms): {}'.format(stop_t))
+ print()
+
+ # start the event loop - essential
+
+ # cv2.waitKey() is a keyboard binding function (argument is the time in milliseconds).
+ # It waits for specified milliseconds for any keyboard event.
+ # If you press any key in that time, the program continues.
+ # If 0 is passed, it waits indefinitely for a key stroke.
+ # (bitwise and with 0xFF to extract least significant byte of multi-byte response)
+ # here we use a wait time in ms. that takes account of processing time already used in the loop
+
+ # wait 40ms or less depending on processing time taken (i.e. 1000ms / 25 fps = 40 ms)
+
+ key = cv2.waitKey(max(40, 40 - int(math.ceil(stop_t)))) & 0xFF;
+
+ # It can also be set to detect specific key strokes by recording which key is pressed
+
+ # e.g. if user presses "x" then exit / press "f" for fullscreen
+
+ if (key == ord('x')):
+ break
+ elif (key == ord('f')):
+ cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN);
+
+ ss.clearImages()
+
+# close all windows
+
+cv2.destroyAllWindows()
+
+#####################################################################
diff --git a/objdetector.py b/sliding_window.py
similarity index 57%
rename from objdetector.py
rename to sliding_window.py
index b06e80b..4985659 100644
--- a/objdetector.py
+++ b/sliding_window.py
@@ -1,9 +1,40 @@
+################################################################################
+
+# functionality: functions for multi-scale sliding window (exhaustive) search
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
import numpy as np
import cv2
-from bowutils import resize_img
-from bowutils import ImageData
-import params
+################################################################################
+
+# re-size an image with respect to its aspect ratio if needed.
+# used in the multi-scale image pyramid approach
+
+def resize_img(img, width=-1, height=-1):
+ if height == -1 and width == -1:
+ raise TypeError("Invalid arguments. Width or height must be provided.")
+ h = img.shape[0]
+ w = img.shape[1]
+ if height == -1:
+ aspect_ratio = float(w) / h
+ new_height = int(width / aspect_ratio)
+ return cv2.resize(img, (width, new_height))
+ elif width == -1:
+ aspect_ratio = h / float(w)
+ new_width = int(height / aspect_ratio)
+ return cv2.resize(img, (new_width, height))
+
+################################################################################
+
+# a very basic approach to produce an image at multi-scales (i.e. variant
+# re-sized resolutions)
def pyramid(img, scale=1.5, min_size=(30, 30)):
# yield the original image
@@ -23,16 +54,22 @@ def pyramid(img, scale=1.5, min_size=(30, 30)):
# yield the next image in the pyramid
yield img
+################################################################################
+
+# generate a set of sliding window locations across the image
def sliding_window(image, window_size, step_size=8):
# slide a window across the image
- for y in xrange(0, image.shape[0], step_size):
- for x in xrange(0, image.shape[1], step_size):
+ for y in range(0, image.shape[0], step_size):
+ for x in range(0, image.shape[1], step_size):
# yield the current window
window = image[y:y + window_size[1], x:x + window_size[0]]
if not (window.shape[0] != window_size[1] or window.shape[1] != window_size[0]):
yield (x, y, window)
+################################################################################
+
+# perform basic non-maximal suppression of overlapping object detections
def non_max_suppression_fast(boxes, overlapThresh):
# if there are no boxes, return an empty list
@@ -82,7 +119,7 @@ def non_max_suppression_fast(boxes, overlapThresh):
# compute the ratio of overlap
overlap = (w * h) / area[idxs[:last]]
- # delete all indexes from the index list that have
+ # delete all indexes from the index list that have a significant overlap
idxs = np.delete(idxs, np.concatenate(([last],
np.where(overlap > overlapThresh)[0])))
@@ -90,55 +127,4 @@ def non_max_suppression_fast(boxes, overlapThresh):
# integer data type
return boxes[pick].astype("int")
-
-image = cv2.imread("test/pos/test17.jpg")
-window_size = (640, 480)
-
-dictionary = np.load(params.DICT_PATH)
-svm = cv2.ml.SVM_load(params.SVM_PATH)
-
-detections = []
-current_scale = -1
-for resized in pyramid(image, scale=1.25):
- if current_scale == -1:
- current_scale = 1
- else:
- current_scale /= 1.25
- rect_img = resized.copy()
- # loop over the sliding window for each layer of the pyramid
- #step = (resized.shape[0] / window_size[0]) * 32
- step = resized.shape[0] / 16
- if step > 0:
- for (x, y, window) in sliding_window(resized, window_size, step_size=step):
-
- img_data = ImageData(window)
- img_data.compute_descriptors()
-
- if img_data.descriptors is not None:
- img_data.generate_bow_hist(dictionary)
-
- results = svm.predict(np.float32([img_data.features]))
- output = results[1].ravel()[0]
-
- if output == 0.0:
- rect = np.float32([x, y, x + window_size[0], y + window_size[1]])
- rect *= (1.0 / current_scale)
- detections.append(rect)
- cv2.rectangle(rect_img, (x, y), (x + window_size[0], y + window_size[1]), (0, 0, 255), 2)
-
- clone = rect_img.copy()
- cv2.rectangle(clone, (x, y), (x + window_size[0], y + window_size[1]), (0, 255, 0), 2)
- """if clone.shape[0] > params.MAX_IMG_WIDTH:
- clone = resize_img(clone, width=640)"""
- cv2.imshow("Window", clone)
- cv2.waitKey(1)
-
-detections = non_max_suppression_fast(np.int32(detections), 0.4)
-detections = np.int32(detections)
-rect_img = image.copy()
-for rect in detections:
- cv2.rectangle(rect_img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
-
-if len(detections)>0:
- cv2.imshow("Window", resize_img(rect_img, 640))
- cv2.waitKey(0)
+################################################################################
diff --git a/train.py b/train.py
deleted file mode 100644
index 1a3aa86..0000000
--- a/train.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import cv2
-from bowutils import *
-
-
-def generate_dictionary(imgs_data, dictionary_size):
- # Extracting descriptors
- desc = stack_array([img_data.descriptors for img_data in imgs_data])
- # important, cv2.kmeans only accepts type32 descriptors
- desc = np.float32(desc)
-
- # Clustering
- criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.01)
- flags = cv2.KMEANS_PP_CENTERS
- # desc is a type32 numpy array of vstacked descriptors
- compactness, labels, dictionary = cv2.kmeans(desc, dictionary_size, None, criteria, 1, flags)
- np.save(params.DICT_PATH, dictionary)
-
- return dictionary
-
-
-def main():
- dictionary_size = 512
- # Loading images
- """imgs_data = [] # type: list[ImageData]
-
- pos_imgs_path = "train/pos"
- neg_imgs_path = "train/neg"
-
- print("Loading images...")
-
- # imreads returns a list of all images in that directory
- pos_imgs = imreads(pos_imgs_path)
- neg_imgs = imreads(neg_imgs_path)
-
- img_count = 0
- for img in pos_imgs:
- img_data = ImageData(img)
- img_data.set_class("pos")
- imgs_data.insert(img_count, img_data)
- img_count += 1
-
- for img in neg_imgs:
- img_data = ImageData(img)
- img_data.set_class("neg")
- imgs_data.insert(img_count, img_data)
- img_count += 1"""
-
- program_start = cv2.getTickCount()
-
- print("Loading images...")
- start = cv2.getTickCount()
- paths = ["train/pos", "train/neg"]
- class_names = ["pos", "neg"]
- imgs_data = get_imgs_data(paths, class_names)
- print("Loaded {} image(s)".format(len(imgs_data)))
- print_duration(start)
-
- print("Computing descriptors...")
- start = cv2.getTickCount()
- [img_data.compute_descriptors() for img_data in imgs_data]
- print_duration(start)
-
- print("Clustering...")
- start = cv2.getTickCount()
- dictionary = generate_dictionary(imgs_data, dictionary_size)
- print_duration(start)
-
- print("Generating histograms...")
- start = cv2.getTickCount()
- [img_data.generate_bow_hist(dictionary) for img_data in imgs_data]
- print_duration(start)
-
- print imgs_data[0].hog().shape
- print imgs_data[0].features.shape
-
- print("Training SVM...")
- start = cv2.getTickCount()
- # Begin training SVM
- svm = cv2.ml.SVM_create()
- svm.setType(cv2.ml.SVM_C_SVC)
- svm.setKernel(cv2.ml.SVM_LINEAR)
- svm.setC(2.67)
- svm.setGamma(5.383)
-
- # Compile samples
- samples = get_samples(imgs_data)
- responses = np.int32([img_data.response for img_data in imgs_data])
-
- svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 1000, 1.e-06))
- svm.train(samples, cv2.ml.ROW_SAMPLE, responses)
- svm.save(params.SVM_PATH)
-
- output = svm.predict(samples)[1].ravel()
- error = (np.absolute(responses.ravel() - output).sum()) / float(output.shape[0])
-
- if error < 0.2:
- print "Successfully trained SVM with {}% error".format(error * 100)
- else:
- print "Failed to train SVM. {}% error".format(error * 100)
- print_duration(start)
-
- print("Finished training BOW detector. {}".format(format_time(get_elapsed_time(program_start))))
-
-
-if __name__ == '__main__':
- main()
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..4de51d1
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,331 @@
+################################################################################
+
+# functionality: utility functions for BOW and HOG detection algorithms
+
+# This version: (c) 2018 Toby Breckon, Dept. Computer Science, Durham University, UK
+# License: MIT License
+
+# Origin acknowledgements: forked from https://github.com/siphomateke/PyBOW
+
+################################################################################
+
+import os
+import numpy as np
+import cv2
+import params
+import math
+import random
+
+################################################################################
+# global flags to facilitate output of additional info per stage/function
+
+show_additional_process_information = False;
+show_images_as_they_are_loaded = False;
+show_images_as_they_are_sampled = False;
+
+################################################################################
+
+# timing information - for training
+# - helper function for timing code execution
+
+def get_elapsed_time(start):
+ return (cv2.getTickCount() - start) / cv2.getTickFrequency()
+
+
+def format_time(time):
+ time_str = ""
+ if time < 60.0:
+ time_str = "{}s".format(round(time, 1))
+ elif time > 60.0:
+ minutes = time / 60.0
+ time_str = "{}m : {}s".format(int(minutes), round(time % 60, 2))
+ return time_str
+
+
+def print_duration(start):
+ time = get_elapsed_time(start)
+ print(("Took {}".format(format_time(time))))
+
+################################################################################
+
+# reads all the images in a given folder path and returns the results
+
+# for obvious reasons this will break with a very large dataset as you will run
+# out of memory - so an alternative approach may be required in that case
+
+def read_all_images(path):
+ images_path = [os.path.join(path, f) for f in os.listdir(path)]
+ images = []
+ for image_path in images_path:
+
+ # add in a check to skip non jpg or png (lower case) named files
+ # as some OS (Mac OS!) helpfully creates a Thumbs.db or similar
+ # when you browse image folders - which then are not images when
+ # we try to load them
+
+ if (('.png' in image_path) or ('.jpg' in image_path)):
+ img = cv2.imread(image_path)
+ images.append(img)
+ if show_additional_process_information:
+ print("loading file - ", image_path);
+ else:
+ if show_additional_process_information:
+ print("skipping non PNG/JPG file - ", image_path);
+
+ return images
+
+################################################################################
+
+# stack array of items as basic Pyton data manipulation
+
+def stack_array(arr):
+ stacked_arr = np.array([])
+ for item in arr:
+ # Only stack if it is not empty
+ if len(item) > 0:
+ if len(stacked_arr) == 0:
+ stacked_arr = np.array(item)
+ else:
+ stacked_arr = np.vstack((stacked_arr, item))
+ return stacked_arr
+
+################################################################################
+
+# transform between class numbers (i.e. codes) - {0,1,2, ...N} and
+# names {dog,cat cow, ...} - used in training and testing
+
+def get_class_number(class_name):
+ return params.DATA_CLASS_NAMES.get(class_name, 0)
+
+def get_class_name(class_code):
+ for name, code in params.DATA_CLASS_NAMES.items():
+ if code == class_code:
+ return name
+
+################################################################################
+
+# image data class object that contains the images, descriptors and bag of word
+# histograms
+
+class ImageData(object):
+ def __init__(self, img):
+ self.img = img
+ self.class_name = ""
+ self.class_number = None
+
+ # use default parameters for construction of HOG
+ # examples of non-default parameter use here:
+ # https://www.programcreek.com/python/example/84776/cv2.HOGDescriptor
+
+ self.hog = cv2.HOGDescriptor(); # default is 64 x 128
+ self.hog_descriptor = np.array([])
+ self.bow_descriptors = np.array([])
+
+
+ def set_class(self, class_name):
+ self.class_name = class_name
+ self.class_number = get_class_number(self.class_name)
+ if show_additional_process_information:
+ print("class name : ", class_name, " - ", self.class_number);
+
+ def compute_hog_descriptor(self):
+
+ # generate the HOG descriptors for a given image
+
+ img_hog = cv2.resize(self.img, (params.DATA_WINDOW_SIZE[0], params.DATA_WINDOW_SIZE[1]), interpolation = cv2.INTER_AREA)
+
+ self.hog_descriptor = self.hog.compute(img_hog)
+
+ if self.hog_descriptor is None:
+ self.hog_descriptor = np.array([])
+
+ if show_additional_process_information:
+ print("HOG descriptor computed - dimension: ", self.hog_descriptor.shape);
+
+ def compute_bow_descriptors(self):
+
+ # generate the feature descriptors for a given image
+
+ self.bow_descriptors = params.DETECTOR.detectAndCompute(self.img, None)[1]
+
+ if self.bow_descriptors is None:
+ self.bow_descriptors = np.array([])
+
+ if show_additional_process_information:
+ print("# feature descriptors computed - ", len(self.bow_descriptors));
+
+ def generate_bow_hist(self, dictionary):
+ self.bow_histogram = np.zeros((len(dictionary), 1))
+
+ # generate the bow histogram of feature occurance from descriptors
+
+ if (params.BOW_use_ORB_always):
+ # FLANN matcher with ORB needs dictionary to be uint8
+ matches = params.MATCHER.match(self.bow_descriptors, np.uint8(dictionary));
+ else:
+ # FLANN matcher with SIFT/SURF needs descriptors to be type32
+ matches = params.MATCHER.match(np.float32(self.bow_descriptors), dictionary)
+
+ for match in matches:
+ # Get which visual word this descriptor matches in the dictionary
+ # match.trainIdx is the visual_word
+ # Increase count for this visual word in histogram (known as hard assignment)
+ self.bow_histogram[match.trainIdx] += 1
+
+ # Important! - normalize the histogram to L1 to remove bias for number
+ # of descriptors per image or class (could use L2?)
+
+ self.bow_histogram = cv2.normalize(self.bow_histogram, None, alpha=1, beta=0, norm_type=cv2.NORM_L1);
+
+################################################################################
+
+# generates a set of random sample patches from a given image of a specified size
+# with an optional flag just to train from patches centred around the centre of the image
+
+def generate_patches(img, sample_patches_to_generate=0, centre_weighted=False,
+ centre_sampling_offset=10, patch_size=(64,128)):
+
+ patches = [];
+
+ # if no patches specifed just return original image
+
+ if (sample_patches_to_generate == 0):
+ return [img];
+
+ # otherwise generate N sub patches
+
+ else:
+
+ # get all heights and widths
+
+ img_height, img_width, _ = img.shape;
+ patch_height = patch_size[1];
+ patch_width = patch_size[0];
+
+ # iterate to find up to N patches (0 -> N-1)
+
+ for patch_count in range(sample_patches_to_generate):
+
+ # if we are using centre weighted patches, first grab the centre patch
+ # from the image as the first sample then take the rest around centre
+
+ if (centre_weighted):
+
+ # compute a patch location in centred on the centre of the image
+
+ patch_start_h = math.floor(img_height / 2) - math.floor(patch_height / 2);
+ patch_start_w = math.floor(img_width / 2) - math.floor(patch_width / 2);
+
+ # for the first sample we'll just keep the centre one, for any
+ # others take them from the centre position +/- centre_sampling_offset
+ # in both height and width position
+
+ if (patch_count > 0):
+ patch_start_h = random.randint(patch_start_h - centre_sampling_offset, patch_start_h + centre_sampling_offset);
+ patch_start_w = random.randint(patch_start_w - centre_sampling_offset, patch_start_w + centre_sampling_offset);
+
+ # print("centred weighted path")
+
+ # else get patches randonly from anywhere in the image
+
+ else:
+
+ # print("non centred weighted path")
+
+ # randomly select a patch, ensuring we stay inside the image
+
+ patch_start_h = random.randint(0, (img_height - patch_height));
+ patch_start_w = random.randint(0, (img_width - patch_width));
+
+ # add the patch to the list of patches
+
+ patch = img[patch_start_h:patch_start_h + patch_height, patch_start_w:patch_start_w + patch_width]
+
+ if (show_images_as_they_are_sampled):
+ cv2.imshow("patch", patch);
+ cv2.waitKey(5);
+
+ patches.insert(patch_count, patch);
+
+ return patches;
+
+################################################################################
+
+# add images from a specified path to the dataset, adding the appropriate class/type name
+# and optionally adding up to N samples of a specified size with flags for taking them
+# from the centre of the image only with +/- offset in pixels
+
+def load_image_path(path, class_name, imgs_data, samples=0, centre_weighting=False, centre_sampling_offset=10 ,patch_size=(64,128)):
+
+ # read all images at location
+
+ imgs = read_all_images(path)
+
+ img_count = len(imgs_data)
+ for img in imgs:
+
+ if (show_images_as_they_are_loaded):
+ cv2.imshow("example", img);
+ cv2.waitKey(5);
+
+ # generate up to N sample patches for each sample image
+ # if zero samples is specified then generate_patches just returns
+ # the original image (unchanged, unsampled) as [img]
+
+ for img_patch in generate_patches(img, samples, centre_weighting, centre_sampling_offset, patch_size):
+
+ if show_additional_process_information:
+ print("path: ", path, "class_name: ", class_name, "patch #: ", img_count)
+ print("patch: ", patch_size, "from centre: ", centre_weighting, "with offset: ", centre_sampling_offset)
+
+ # add each image patch to the data set
+
+ img_data = ImageData(img_patch)
+ img_data.set_class(class_name)
+ imgs_data.insert(img_count, img_data)
+ img_count += 1
+
+ return imgs_data
+
+################################################################################
+
+# load image data from specified paths
+
+def load_images(paths, class_names, sample_set_sizes, use_centre_weighting_flags, centre_sampling_offset=10, patch_size=(64,128)):
+ imgs_data = [] # type: list[ImageData]
+
+ # for each specified path and corresponding class_name and required number
+ # of samples - add them to the data set
+
+ for path, class_name, sample_count, centre_weighting in zip(paths, class_names, sample_set_sizes, use_centre_weighting_flags):
+ load_image_path(path, class_name, imgs_data, sample_count, centre_weighting, centre_sampling_offset, patch_size)
+
+ return imgs_data
+
+################################################################################
+
+# return the global set of bow histograms for the data set of images
+
+def get_bow_histograms(imgs_data):
+
+ samples = stack_array([[img_data.bow_histogram] for img_data in imgs_data])
+ return np.float32(samples)
+
+################################################################################
+
+# return the global set of hog descriptors for the data set of images
+
+def get_hog_descriptors(imgs_data):
+
+ samples = stack_array([[img_data.hog_descriptor] for img_data in imgs_data])
+ return np.float32(samples)
+
+################################################################################
+
+# return global the set of numerical class labels for the data set of images
+
+def get_class_labels(imgs_data):
+ class_labels = [img_data.class_number for img_data in imgs_data]
+ return np.int32(class_labels)
+
+################################################################################