forked from mathewzilla/hclearn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SURFExtractor.py
747 lines (635 loc) · 36.8 KB
/
SURFExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
#!/usr/bin/env python
import os
import cv
import numpy as np
import pyflann as flann
import unittest
import re
import makeMaze as mm
#rootFolder = "/Users/alansaul/Work/CompSci/SURE/hclearn_alan/"
rootFolder = "/Users/mathew/work/hclearn/"
#This is the folder being used by makeSURFRepresentation to create the surf features for learnWeights
prefixFolder = rootFolder + "DCSCourtyard/"
class SURFExtractor(object):
directions = ['N','E','S','W']
#Stores the featuresDict, filesDict and featuresDescDict
#Given a folder it will extract descriptors, merge them and generate the featureVectors for each image, and store them
#Requires the folder name
def __init__(self, folderName, maxFeaturesForMerging=10, maxFeaturesForMatching=20, mergeThreshold=0.15, matchThreshold=0.2):
self.folder = folderName
self.maxFeaturesForMerging = maxFeaturesForMerging
self.maxFeaturesForMatching = maxFeaturesForMatching
self.mergeThreshold = mergeThreshold
self.matchThreshold = matchThreshold
#Extract files by name prefix, store in dictionary
def extractFilesByPrefix(self, folder):
self.files = {}
#Key should be of form ((x,y),dir)
if os.path.exists(folder):
for file in os.listdir(folder):
parts = re.split("[-,\.]", file)
#Test that it is (NUM-NUM-DIRECTION-whatever)
if len(parts)>=2 and parts[0].isdigit() and parts[0].isdigit() and (parts[2][0].isalpha and len(parts[2]) == 1):
if parts[2][0] in self.directions:
key = ((int(parts[0]), int(parts[1])),parts[2])
#If it doesnt already exist, make this key
if key not in self.files.keys():
self.files[key] = []
fullFilePath = os.path.join(folder,file)
#Add the new file onto the end of the keys list (since there can be multiple images for one direction)
self.files[key].append(fullFilePath)
else:
raise NameError("Heading is: %s\nit should be N S E or W" % parts[2])
else:
print folder
print file
#raise NameError("File: %s\ndoes not fit naming convention INT-INT-HEADING" % file)
else:
raise NameError("Folder does not exists")
#Extract files by folders subfolders, store in dictionary
def extractFilesByFolder(self, folder):
#for subdir in os.listdir(folder):
self.files = {}
#Since each location is named after a number we can do this
locCount = 0
#For each location incrementally (locations cannot have gaps!) check to see if it exists
while os.path.exists(os.path.join(folder,str(locCount))):
#If it does exist then make this path
locsubdir = os.path.join(folder,str(locCount))
#For each direction (N,E,S,W) check to see if the subdirectory exists and whether it has any images in it
for direction in self.directions:
dirsubdir = os.path.join(locsubdir,direction)
if os.path.exists(dirsubdir) and (len(os.listdir(dirsubdir)) > 0):
self.files[(locCount,direction)] = []
#For each file in the direction subdirectory, add a tuple key (locationNum, 'Direction') with the file as the value
for file in os.listdir(dirsubdir):
fullFilePath = os.path.join(dirsubdir,file)
self.files[(locCount,direction)].append(fullFilePath)
#Go onto the next location, (they must run incrementally)
locCount += 1
#Extract maxFeaturesPerImage best descriptors per image and store in dictionary
def extractDescriptors(self, files, maxNumOfDescriptors):
self.descriptors = {}
for (loc, dir) in files.keys():
#print("\n%d, %s key" % (loc,dir))
self.descriptors[(loc,dir)] = []
for image in files[(loc,dir)]:
cvIm = cv.LoadImageM(image, cv.CV_LOAD_IMAGE_GRAYSCALE)
imFeatures = extractSURFFeatures(cvIm,0,maxNumOfDescriptors)
self.descriptors[(loc,dir)].append(imFeatures)
#Use the descriptors recently extracted to merge them down into a subset describing lots of them
def mergeFeatures(self, random=0):
#Make a subset of all the descriptors
if random:
#TODO: Add random selection of descriptors rather than just first images of each direction
#Randomly choose 'random' features from the dictionary
pass
else:
#Take the first picture from each loc, dir pair
imageDescs = self.getFirstDescs()
print("Before:\n%d,%d" % (imageDescs.shape))
#merge the features within range
self.mergedFeatures = mergeFeatures(imageDescs, self.mergeThreshold)
print("After:\n%d,%d" % (self.mergedFeatures.shape))
#Get the all of the descriptors for the first image
def getFirstDescs(self):
#Check that the dictionary isn't empty...
if self.descriptors:
#Do something more pythonic?
first = True
for key in self.descriptors.keys():
if len(self.descriptors[key]) > 0:
#If the features already exists, append the new descriptors on, else initialise it
if first:
#Get the first images descriptors in this list of several images descriptors
features = self.descriptors[key][0]
first = False
else:
features = np.vstack((features, self.descriptors[key][0]))
return features
else:
raise NameError('Dictionary of descriptors is empty, there should be items in it in order to get the first ones!')
#Go through each location, then each picture, and generate their boolean feature vector
def generateFeatureVectors(self):
#Given all the real descriptors, calculate their boolean vector form
self.flann = trainFLANN(self.mergedFeatures)
numOfFeatures = len(self.mergedFeatures)
self.featuresDescDict = {}
for (loc, dir) in self.descriptors.keys():
self.featuresDescDict[(loc,dir)] = []
for imageDescs in self.descriptors[(loc,dir)]:
#Calculate the feature vector for this image and add it to the list for this key
featureVec = calculateFeatureVector(self.flann, imageDescs, self.matchThreshold, numOfFeatures)
self.featuresDescDict[(loc,dir)].append(featureVec)
#Generate featureVectors
def generateFeatureRepresentations(self, byFolder=1):
if self.folder:
#First extract the files
if byFolder:
self.extractFilesByFolder(self.folder)
else:
self.extractFilesByPrefix(self.folder)
#Extract the descriptors of all top X features
self.extractDescriptors(self.files, self.maxFeaturesForMerging)
#print("Descriptors before merge:\n%s" % self.descriptors)
print("Merge threshold:\n%s" % self.mergeThreshold)
#Merge features so that we only have a small subset which are used to describe each image
#Unless otherwise stated this will use the FIRST image of each direction ONLY to train with
self.mergeFeatures()
#print("Merged features:\n%s" % self.mergedFeatures)
#Generate features for each image
#Get more features than we used for merging to be used for matching:
self.extractDescriptors(self.files, self.maxFeaturesForMatching)
self.generateFeatureVectors()
else:
raise NameError("Folder to select features from has not been provided")
class TestExtractor(unittest.TestCase):
def setUp(self):
self.folder = rootFolder + "folderTest/"
self.prefixFolder = rootFolder + "prefixFolderTest/"
self.fileDict = {(0,'N'): [self.folder+"0/N/Dark.jpg"], \
(0,'S'): [self.folder+"0/S/Light.jpg"], \
(1,'E'): [self.folder+"1/E/Dark.jpg"], \
(2,'E'): [self.folder+"2/E/Dark.jpg", self.folder+"2/E/Day.jpg"], \
(2,'S'): [self.folder+"2/S/Night.jpg"]}
self.pfileDict = {
((3,4),'S'): [self.prefixFolder+"3-4-S.jpg"], \
((2,3),'W'): [self.prefixFolder+"2-3-W-Dark.jpg", self.prefixFolder+"2-3-W-Light.jpg"], \
((3,5),'S'): [self.prefixFolder+"3-5-S.jpg"], \
((5,3),'E'): [self.prefixFolder+"5-3-E.jpg"], \
((3,3),'N'): [self.prefixFolder+"3-3-N.jpg"], \
((3,0),'S'): [self.prefixFolder+"3-0-S.jpg"], \
((0,3),'E'): [self.prefixFolder+"0-3-E-Dark.jpg", self.prefixFolder+"0-3-E-Light.jpg"], \
}
self.extractor = SURFExtractor(self.folder)
def test_extractByPrefix(self):
self.extractor.extractFilesByPrefix(self.prefixFolder)
self.assertTrue(compareDicts(self.extractor.files, self.pfileDict))
self.assertEqual(self.extractor.files, self.pfileDict)
def test_extractByFolder(self):
self.extractor.extractFilesByFolder(self.folder)
self.assertTrue(compareDicts(self.extractor.files, self.fileDict))
self.assertEqual(self.extractor.files, self.fileDict)
def test_extractDescriptors(self):
numOfFeaturesToExtract = 10
#Difficult to test as I cant hand write descriptors!
self.extractor.extractDescriptors(self.fileDict, numOfFeaturesToExtract)
#Structure of dictionary should be
# im1d1 im1d2 im1d3 im1d4 im2d1 im2d2 im2d3 im2d4
#{(2,'E'): [[sjefn,senjf,sefee,sjenfe],[sjenf,sefee,sjenf,sjenf]],
#...
#Less of a test more of a sanity check
self.assertEqual(len(self.fileDict.keys()), len(self.extractor.descriptors.keys()))
#Check length and type of descriptors
descriptor0N = self.extractor.descriptors[(0,'N')]
self.assertEqual(len(descriptor0N), 1)
self.assertEqual(len(descriptor0N[0]), numOfFeaturesToExtract)
self.assertEqual(len(descriptor0N[0][0]), 64)
descriptor2E = self.extractor.descriptors[(2,'E')]
self.assertEqual(len(descriptor2E), 2)
self.assertEqual(len(descriptor2E[0]), numOfFeaturesToExtract)
self.assertEqual(len(descriptor2E[0][0]), 64)
self.assertEqual(len(descriptor2E[1]), numOfFeaturesToExtract)
self.assertEqual(len(descriptor2E[1][5]), 64)
def setupMerging(self):
self.extractor.descriptors = {(0,'N'): [np.array([0,0], dtype=np.float32)], \
(0,'S'): [np.array([0,1], dtype=np.float32)], \
(1,'E'): [np.array([0,3], dtype=np.float32)], \
(2,'E'): [np.array([1,1], dtype=np.float32), np.array([1.2, 1.2], dtype=np.float32)], \
(2,'S'): [np.array([0,3.3], dtype=np.float32)]}
#With a threshold of 0.3, we should get descriptors [0,0],[0,1],[0,3.15],[1.1,1.1] if all images are used,
#and the same but [1.1,1.1] being [1,1] if only the first images are used
self.featuresDescDict = {(0,'N'): [np.array([0,0,0,1,0], dtype=np.int8)], \
(0,'S'): [np.array([0,0,1,0,0], dtype=np.int8)], \
(1,'E'): [np.array([1,0,0,0,0], dtype=np.int8)], \
(2,'E'): [np.array([0,0,0,0,1], dtype=np.int8), np.array([0,0,0,0,1], dtype=np.int8)], \
(2,'S'): [np.array([0,1,0,0,0], dtype=np.int8)]}
def test_getFirstDescs(self):
self.setupMerging()
firstDescs = self.extractor.getFirstDescs()
#Careful dictionaries arnt ordered...
stackedDescs = np.array([[0,3],[0,3.3],[0,1],[0,0],[1,1]])
#print("firstDescs:\n%s" % firstDescs)
#print("stackedDescs:\n%s" % stackedDescs)
self.assertTrue(np.allclose(stackedDescs, firstDescs))
def test_mergeFeatures(self):
self.setupMerging()
self.extractor.mergeThreshold = 0.3
self.extractor.mergeFeatures()
#distance isn't just 0.3, it must be less than 0.1!
correctMergedFeatures = np.array([[0,3.15],[0,1],[0,0],[1,1]])
self.assertTrue(np.allclose(self.extractor.mergedFeatures, correctMergedFeatures))
#[0.3] and [0,3.3] are no longer in range so should fail
newCorrectMergedFeatures = np.array([[0,3],[0,3.3],[0,1],[0,0],[1,1]])
self.extractor.mergeThreshold = 0.01
self.extractor.mergeFeatures()
self.assertTrue(np.allclose(self.extractor.mergedFeatures, newCorrectMergedFeatures))
def test_generateFeatureVectors(self):
self.setupMerging()
self.extractor.mergedFeatures = self.extractor.getFirstDescs()
self.extractor.matchThreshold = 0.3
self.extractor.generateFeatureVectors()
self.assertTrue(compareDicts(self.extractor.featuresDescDict, self.featuresDescDict))
self.extractor.matchThreshold = 0.01
self.extractor.generateFeatureVectors()
#Should have been some failures due to matches not being made (match threshold too low so theyre not counted as a match)
self.assertFalse(compareDicts(self.extractor.featuresDescDict, self.featuresDescDict))
def test_generateFeatureRepresentations(self):
self.setupMerging()
#self.folder = self.prefixFolder
newExtractor = SURFExtractor(self.folder)
#FIX: If two photos are exactly the same, this will fail as they will be merged!
newExtractor.mergeThreshold = 0
newExtractor.matchThreshold = 0.01
newExtractor.generateFeatureRepresentations()
"""
#Count how many images there are...
numOfImages = 0
for value in newExtractor.featuresDescDict.values():
numOfImages += len(value)
"""
#If we are using firstDesc, it only gets the first images of each key!
numOfImages = len(newExtractor.featuresDescDict.keys())
#Since none are merged, there should be numOfImages*numOfFeaturesForMerge
numOfFeaturesWithoutMerge = newExtractor.maxFeaturesForMerging*numOfImages
#Test this by getting first key values length of first image description (number of features in its vector)
numOfFeaturesFirstKey = len(newExtractor.featuresDescDict[newExtractor.featuresDescDict.keys()[0]][0])
numOfFeaturesSecondKey = len(newExtractor.featuresDescDict[newExtractor.featuresDescDict.keys()[1]][0])
#Number of features going into merge
self.assertEqual(numOfFeaturesFirstKey, numOfFeaturesWithoutMerge)
self.assertEqual(numOfFeaturesSecondKey, numOfFeaturesWithoutMerge)
newExtractor = SURFExtractor(self.folder)
newExtractor.mergeThreshold = 0.05
newExtractor.matchThreshold = 0.05
newExtractor.generateFeatureRepresentations()
#Test this by getting first key values length of first image description (number of features in its vector)
numOfFeaturesFirstKey = len(newExtractor.featuresDescDict[newExtractor.featuresDescDict.keys()[0]][0])
numOfFeaturesSecondKey = len(newExtractor.featuresDescDict[newExtractor.featuresDescDict.keys()[1]][0])
#Number of features going into merge
self.assertLess(numOfFeaturesFirstKey, numOfFeaturesWithoutMerge)
self.assertLess(numOfFeaturesSecondKey, numOfFeaturesWithoutMerge)
def test_mergeQuality(self):
newExtractor = SURFExtractor(self.folder)
newExtractor.mergeThreshold = 0.15
newExtractor.matchThreshold = 0.35
newExtractor.generateFeatureRepresentations()
def test_merge_senses_and_features(self):
N_mazeSize = 3
[dictSenses, dictAvailableActions, dictNext] = mm.makeMaze(N_mazeSize) #make maze, including ideal percepts at each place
print("TESTING\n%s"%dictSenses)
def test_makeSURFRepresentation(self):
#Should simply call the generate feature representation method!
sdict = makeSURFRepresentation()
print("Extracted features per image:\n")
for key in sdict.keys():
for featureVec in sdict[key]:
print("image key %s, features:\n%s" % ((key),featureVec))
def test_findSurfs(self):
#Test that given an x y and direction from a dictionary it is possible to find the surf feature!
adict = {((1,4),'N'): [np.array([1,0])], ((3,4),'E'): [np.array([1,1]), np.array([0,1])]}
x = 3
y = 4
ith = 0
sf34E = mm.findSurfs(x,y,ith,adict)
self.assertTrue(np.all(sf34E == np.array([1,1])))
x = 1
y = 4
ith = 1
sf14N = mm.findSurfs(x,y,ith,adict)
self.assertTrue(np.all(sf14N == np.array([1,0])))
x = 1
y = 3
ith = 1
sf13N = mm.findSurfs(x,y,ith,adict)
self.assertTrue(np.all(sf13N == np.array([0,0])))
#self.assertRaises(NameError, findSurfs, x,y,ith,adict)
def test_Senses_init(self):
#Create a Sense with a known dictionary and see if the results are right
pass
def makeSURFRepresentation():
#Make all things SURFY (its dictionary) and give back to makeMaze
se = SURFExtractor(prefixFolder)
#FIX: If two photos are exactly the same, this will fail as they will be merged!
se.mergeThreshold = 0.06 #0.05
se.matchThreshold = 0.2
se.generateFeatureRepresentations(0)
print("FEATUREDESCDICT: %s" % se.featuresDescDict)
return se.featuresDescDict
def compareDicts(dict1,dict2):
correctMatches=[]
for key in dict1:
for itemnum, image in enumerate(dict1[key]):
correctMatches.append(np.all(image == dict2[key][itemnum]))
return np.all(np.array(correctMatches))
def extractSURFFeatures(image,draw, N=7):
#Extract SURF features (between 10000 and 30000 are good values)
(keypoints, descriptors) = cv.ExtractSURF(image, None, cv.CreateMemStorage(), (0, 100, 3, 2) )
#Want to take the X best ones
sortedDescriptorListPairs = [descriptor for keypoint, descriptor in sorted(zip(keypoints, descriptors), key=(lambda (keypoint, descriptors): keypoint[4]), reverse = True) ]
#np.array(sortedDescriptorListPairs[0:N])
#print("Num of keypoints: %d Num of descriptors: %d " % (len(keypoints), len(descriptors)))
if draw:
for ((x, y), laplacian, size, dir, hessian) in keypoints:
#print "x=%d y=%d laplacian=%d size=%d dir=%f hessian=%f" % (x, y, laplacian, size, dir, hessian)
#For each feature draw a circle around it
#Careful! Drawing on the images changes the images!!!
cv.Circle(image, (int(x),int(y)), size, (255.0, 0.0, 0.0, 0.0), 2)
#return np.array(descriptors)
return np.array(sortedDescriptorListPairs[0:N])
def computeDescriptorCloseness(image1,image2,draw):
image2Descs = extractSURFFeatures(image2,draw)
image1Descs = extractSURFFeatures(image1,draw)
return compareDescriptors(image1Descs, image2Descs)[0]
def compareDescriptors(indexDescriptors, imageDescs, threshold=0.05):
#print(image1Descs[0])
resultInds, distances = findClosestMatchingFeaturesPairs(indexDescriptors, 1, imageDescs)
#Calculate the average distance from a feature in the test to its nearest neighbor in the training data
averageDistance = np.average(distances)
featuresWithinThreshold = findMatchingFeatures(resultInds, distances, threshold)
#print("Average distance: %f" % averageDistance)
#Out of how many matches that could be found (image2Descs is out test data, indexDescriptors is our training data)
#How many matches were below the threshold distance?
#print("%d matches out of %d potential matches" % (len(featuresWithinThreshold), len(indexDescriptors)))
#Get unique values (shouldnt be used multiple times?
#uniqueMatches = set([val for (x, val, y) in featuresWithinThreshold])
#print [val for (x, val, y) in featuresWithinThreshold] #uniqueMatchesd
#Bit of a hack but seems to work, just because you only have 2 features, doesnt mean if those two are correct you are a perfect match
#Getting 98 out of 100 should be better than 2 out of 2
percentageClose = 2*(float(len(featuresWithinThreshold))/(len(imageDescs)+len(indexDescriptors)))
#percentageClose = 2*(float(len(featuresWithinThreshold))/(len(indexDescriptors)))
return percentageClose, featuresWithinThreshold, averageDistance
def findClosestMatchingFeaturesPairs(trainingData, k, testData=None):
duplicatesIncluded = False
if testData == None:
#If we are using the training data as the test data, there will always be a distance of 0 without having k>1
assert(k>1)
duplicatesIncluded = True
testData = trainingData
#Set up a FLANN classifier (Fast Approximate Nearest Neighbor)
f=trainFLANN(trainingData)
#print("Params used to find nearest neighbours: ", params)
#Try and match all the features found in the second image with those stored in the k nearest neighbor
results, dists = f.nn_index(testData, k)
#print("Distances to nearest neighbour: ", dists)
#If we are using the trainingdata as the test data then there will always be a closest match
if duplicatesIncluded:
dists = dists[:,1:]
results = results[:,1:]
#results, dists = results[:][1:], dists[:][1:]
return results, dists
def trainFLANN(trainingData):
#Set up a FLANN classifier (Fast Approximate Nearest Neighbor)
f = flann.FLANN()
#Set the first image as the base (in the future this will be the "generalised" feature matrix
f.build_index(trainingData)
return f
def findNearestFeatures(flann, featureDescs):
results, dists = flann.nn_index(featureDescs, 1)
return results, dists
def calculateFeatureVector(flann, featureDescs, matchThreshold, sizeOfFeatureVector):
results, dists = findNearestFeatures(flann, featureDescs)
#FIX: THIS IS WHERE THE PROBLEM IS! ITS CUTTING OFF THINGS BELOW A THRESHOLD THUS THEY NO LONGER EXIST
featuresWithinThreshold = findMatchingFeatures(results, dists, matchThreshold)
featureVector = findBooleanFeatureVector(sizeOfFeatureVector, featuresWithinThreshold)
return featureVector
#Find features which are within a threshold
def findMatchingFeatures(resultInds, dists, threshold):
#Only take into account matches above a certain threshold
#Need a list of features which match, including the indices of the training data being merged, the training data indices being merged, and the distance between them for reference
#FIX: THIS IS WHERE THE PROBLEM IS! ITS CUTTING OFF THINGS BELOW A THRESHOLD THUS THEY NO LONGER EXIST
featuresWithinThreshold = [(featureInd, int(closestInd), float(dist)) for featureInd, closestInd, dist in zip(range(len(resultInds)), resultInds, dists) if abs(dist) < threshold]
sortedFeaturesWithinThreshold = sorted(featuresWithinThreshold, key=lambda feature: feature[2])
#print("Thresholded has %(number)d pairs within the threshold\n" % {"number": len(featuresWithinThreshold)})
return sortedFeaturesWithinThreshold
def mergeFeatures(trainingSet, threshold=0.05):
finishedMerging = False
while not finishedMerging:
#Generate distance to nearest features in test data (all other images features)
resultInds, distances = findClosestMatchingFeaturesPairs(trainingSet, 2)
#Get the subset of matches such that distance < tolerance
featuresWithinThreshold = findMatchingFeatures(resultInds, distances, threshold)
#Strip distances which are equal to 0 as they are effectively already merged
featuresWithinThreshold = [(featureInd, closestInd, dist) for featureInd, closestInd, dist in featuresWithinThreshold if abs(dist) > 0]
#print("Average distance between features: %f" % averageDist)
#ON print("%d features still need merging" % len(featuresWithinThreshold))
#ON print("trainingSet length: %d" % len(trainingSet))
#If subset is > 0
if len(featuresWithinThreshold) > 0:
#ON print "attempting to merge"
newTrainingSet = []
trainingIndicesMerged = set()
#Merge the matches and add to a new training set
for (featureInd, closestFeatureInd, distance) in featuresWithinThreshold:
#Can only merge with one feature at a time! Since they are ordered closest features get preference
if (featureInd not in trainingIndicesMerged) and (closestFeatureInd not in trainingIndicesMerged):
#ON print("FeatureInd: %d closestFeatureInd %d distance: %f" % (featureInd, closestFeatureInd, distance))
newGeneralFeature = mergeFeaturePair(trainingSet[featureInd], trainingSet[closestFeatureInd])
newTrainingSet.append(list(newGeneralFeature))
#newTrainingSet = newTrainingSet.vstack((newTrainingSet, newGeneralFeature))
#print("Merged feature:")
#print newGeneralFeature
#ON print("New training set length: %d" % len(newTrainingSet))
#Keep track of the indices merged so we can remove them later
trainingIndicesMerged.add(featureInd)
trainingIndicesMerged.add(closestFeatureInd)
#Add the remaining from the test and training set to a new test set
allTrainingIndices = set(range(len(trainingSet)))
trainingIndicesReused = allTrainingIndices.difference(trainingIndicesMerged)
newTrainingSet = np.concatenate((np.array(newTrainingSet), np.array(trainingSet[list(trainingIndicesReused)])))
#ON print("training set length at end of iteration: %d" % len(newTrainingSet))
trainingSet = newTrainingSet
#Else there is no more to merge so we are finished
else:
finishedMerging = True
#ON print("final training set length: %d" % len(trainingSet))
return trainingSet
def mergeFeaturePair(feature1, feature2):
#Merge the feature by combining their descriptors
newFeature = (np.array(feature1) + np.array(feature2)) / float(2)
return newFeature
def findBooleanFeatureVector(totalNumberOfFeatures, featuresWithinThreshold):
#Make a feature vector
featureVector = np.zeros(totalNumberOfFeatures, np.int8)
#Get all closest indexes, remove duplicates to save time
indices = set(([feature[1] for feature in featuresWithinThreshold]))
#print("indices being accessed: %s" % indices)
#Set the indices in the feature vector as on
if len(indices) > 0:
featureVector[list(indices)] = 1
return featureVector
def calculateSharedFeatures(featureVector1, featureVector2):
sharedVector = np.bitwise_and(featureVector1, featureVector2)
return sharedVector
class TestComparisons(unittest.TestCase):
def setUp(self):
self.regentImages = [prefixFolder + "3-1-S-Midday.jpg", prefixFolder + "4-3-E-Midday.jpg"]
self.images= ["room1.jpg", "room2.jpg", "window.jpg", "labs1.jpg", "labs2.jpg", "bottle.jpg"]
self.images = [ rootFolder + im for im in self.images ]
#Load the images
#Multiple versions of some as the SURF drawer draws on them, thus fucking up the next extraction
self.imsMat = []
for image in self.images:
self.imsMat.append(cv.LoadImageM(image, cv.CV_LOAD_IMAGE_GRAYSCALE))
self.imThresholded = np.zeros((len(self.images),len(self.images)))
#Careful again you keep drawing on the same image!
for x in range(len(self.images)):
for y in range(len(self.images)):
self.imThresholded[x][y] = computeDescriptorCloseness(self.imsMat[x], self.imsMat[y], 0)
#@unittest.skip("Saving time whilst testing other")
def test_same(self):
#Matching with itself should be 1
self.assertAlmostEqual(self.imThresholded[0][0], 1)
#@unittest.skip("Saving time whilst testing other")
def test_similar_greater_than_dissimilar(self):
"""
for x in range(len(self.images)):
for y in range(len(self.images)):
print("Im%d%d number of close matches: %f" % (x,y,self.imThresholded[x][y]))
"""
#rooms 1 and 2 are more similar to eachother than either room with the window
self.assertGreater(self.imThresholded[0][1], self.imThresholded[0][2])
self.assertGreater(self.imThresholded[0][1], self.imThresholded[1][2])
#labs 1 and 2 are more similar to eachother than either lab with the window
self.assertLess(self.imThresholded[1][2], self.imThresholded[3][4])
self.assertLess(self.imThresholded[1][2], self.imThresholded[4][3])
#@unittest.skip("Saving time whilst testing other")
def test_drawing(self):
im1DrawnOn = cv.LoadImageM(self.regentImages[0], cv.CV_LOAD_IMAGE_GRAYSCALE)
im2DrawnOn = cv.LoadImageM(self.regentImages[1], cv.CV_LOAD_IMAGE_GRAYSCALE)
extractSURFFeatures(im1DrawnOn, 1, 10)
extractSURFFeatures(im2DrawnOn, 1, 10)
cv.NamedWindow("SURFFeatures 1", cv.CV_WINDOW_AUTOSIZE)
cv.ShowImage("im1DrawnOn", im1DrawnOn)
cv.NamedWindow("SURFFeatures 2", cv.CV_WINDOW_AUTOSIZE)
cv.ShowImage("im2DrawnOn", im2DrawnOn)
cv.WaitKey(0)
self.assertTrue(True)
#@unittest.skip("Saving time whilst testing other")
def test_featureMerging(self):
feature1=[1,2,3]
feature2=[2,3,4]
newFeature = mergeFeaturePair(feature1,feature2)
expectedFeature=[1.5, 2.5, 3.5]
for i in range(2):
self.assertAlmostEqual(newFeature[i], expectedFeature[i])
#@unittest.skip("Saving time whilst testing other")
def mergeSetup(self):
self.imsDescs = [0]*len(self.images)
for im in range(len(self.images)):
self.imsDescs[im] = extractSURFFeatures(self.imsMat[im], 0)
self.allFeatures = np.vstack((self.imsDescs[0], self.imsDescs[1], self.imsDescs[2], self.imsDescs[3], self.imsDescs[5]))
#mergeThreshold=0.5
mergeThreshold=0.08
self.mergedFeatures = mergeFeatures(self.allFeatures, mergeThreshold)
#@unittest.skip("Saving time whilst testing other")
def test_mergeFeatureSets(self):
self.mergeSetup()
beforeFeatureCount = np.shape(self.allFeatures)[0]
afterFeatureCount = len(self.mergedFeatures)
#ON print("Had %d features, now have %d feature" % (beforeFeatureCount, afterFeatureCount))
self.assertGreater(beforeFeatureCount, afterFeatureCount)
#@unittest.skip("Saving time whilst testing other")
def test_overlap_of_feature_matches(self):
self.mergeSetup()
#matchThreshold=0.4
matchThreshold=0.1
imFeaturesWithinThreshold = [0]*len(self.images)
newPercentageDifferenceims = [0]*len(self.images)
imFeatureVector = [0]*len(self.images)
for i in range(len(self.images)):
#Calculate the features that are within the threshold of the merged feature set
newPercentageDifferenceims[i], imFeaturesWithinThreshold[i], averageDistance = compareDescriptors(self.mergedFeatures, self. imsDescs[i], matchThreshold)
#Make these features into a boolean feature vector
imFeatureVector[i] = findBooleanFeatureVector(len(self.mergedFeatures), imFeaturesWithinThreshold[i])
#ON print("image has %d active features in the merged feature vector:\n %s" % (sum(imFeatureVector[i]), imFeatureVector[i]))
#Each vector should have atleast one feature active
self.assertGreater(sum(imFeatureVector[i]), 0)
#Make a matrix of how features overlap
overlap = np.ndarray((len(self.images), len(self.images)), np.object)
for compareImage in range(len(self.images)):
overlap[0][compareImage] = calculateSharedFeatures(imFeatureVector[0], imFeatureVector[compareImage])
#ON print("overlap %d%d shares: %d features, feature vector:\n %s \n\n\n\n" % (0, compareImage, sum(overlap[0][compareImage]), overlap[0][compareImage]))
#self.assertGreater(sum(overlap[0][1]), sum(overlap[0][2]))
#If the featureVectors are exclusive, the superset of one vector shouldn't be any of the others, this should be the case for ones made more compact
nzIndices = [0]*len(imFeatureVector)
for featureVInd in range(len(imFeatureVector)):
nzIndices[featureVInd] = set(np.nonzero(imFeatureVector[featureVInd])[0])
supersets = [[False]*len(imFeatureVector)]*len(imFeatureVector)
#print supersets
for x in range(len(imFeatureVector)):
for y in range(len(imFeatureVector)):
if x != y:
supersets[x][y] = nzIndices[x].issuperset(nzIndices[y])
#flatten the list of lists
supersetF = [superset for sublist in supersets for superset in sublist]
#Unless the same picture is shown twice, no set of features should be a superset of any others
#Is this the case or should they just not have the same features active?
self.assertFalse(any(supersetF), "One feature vector is a superset of the other, thus one image cannot be uniquely described")
#@unittest.skip("Doesnt work yet")
def test_recognition(self):
#Given six images, each with their own location (1,2 or 3) train on three and merge features
#[(image1, loc1), (image2, loc2), (image3, loc3), (image4, loc1), (image5, loc2), (image6, loc3)]
imagesCat = [('room1.jpg',1), ('room2.jpg',1), ('labs1.jpg', 2), ('labs2.jpg', 2), ('outside1.jpg', 3), ('outside2.jpg', 3)]#, ('labs3.jpg', 2)]
imagesCat = [ (rootFolder+tup[0], tup[1]) for tup in imagesCat ]
#Calculate closeness for remaining features
imsStored = [0, 2, 4]
imsTesting = [1, 3, 5]#, 6]
imsCatMat = []
for image in imagesCat:
imsCatMat.append(cv.LoadImageM(image[0], cv.CV_LOAD_IMAGE_GRAYSCALE))
#With the remaining three images, see if their location can be determined purely from the number of matches between them and the three images
imsCatDescs = [0]*len(imsCatMat)
#Could use more features
for im in range(len(imsCatMat)):
if im in imsStored:
#ON print("%d is stored" % im)
imsCatDescs[im] = extractSURFFeatures(imsCatMat[im], 0)
else:
#ON print("%d is testing" % im)
imsCatDescs[im] = extractSURFFeatures(imsCatMat[im], 0, 10)
#ON print("%d has %d features" % (im, len(imsCatDescs[im])))
allCatFeatures = np.vstack((imsCatDescs[0], imsCatDescs[2], imsCatDescs[4]))
mergeThreshold=0.1
mergedCatFeatures = mergeFeatures(allCatFeatures, mergeThreshold)
f = trainFLANN(mergedCatFeatures)
matchThreshold=0.1
featureVecs = np.ndarray((len(imsCatMat),len(mergedCatFeatures)), np.int8)
for i in range(len(imsCatMat)):
featureVecs[i,:] = calculateFeatureVector(f, imsCatDescs[i], matchThreshold, len(mergedCatFeatures))
#ON print("Feature vectors for recognition are:\n%s" % featureVecs)
#print featureVecs
closestImages = []
print featureVecs
for im in imsTesting:
#closestImage = (indexOfImage, featuresInCommon)
closestImage = (-1, -1)
for testIm in imsStored:
overlap = np.sum(calculateSharedFeatures(featureVecs[im,:], featureVecs[testIm,:]))
#ON print("im %d and im %d overlap %d features" % (im, testIm, overlap))
if overlap > closestImage[1]:
closestImage = (testIm, overlap)
print("closest image ind %d, with %d overlapping features" % (closestImage[0], closestImage[1]))
closestImages.append((imagesCat[im][1], imagesCat[closestImage[0]][1]))
#ON print closestImages
similarity = [ loc1 == loc2 for (loc1, loc2) in closestImages ]
print similarity
#The classifier is correct if the locations match for all image pairs
self.assertTrue(all(similarity))
#ON print("\n\n\n\n")
if __name__ == '__main__':
#unittest.main()
comparisonsSuite = unittest.TestLoader().loadTestsFromTestCase(TestComparisons)
extractSuite = unittest.TestLoader().loadTestsFromTestCase(TestExtractor)
#allSuites = [extractSuite]
#allSuites = [extractSuite, comparisonsSuite]
allSuites = comparisonsSuite
completeSuite = unittest.TestSuite(allSuites)
unittest.TextTestRunner(verbosity=2).run(completeSuite)
"""
#suite.addTest(TestComparisons(unittest
#unittest.getTestCaseNames(testCaseThing)
#suite = unittest.TestLoader().loadTestsFromTestCase(TestComparisons)
unittest.TextTestRunner(verbosity=2).run(suite)
class (unittest.TestCase):
class TestExtractor(unittest.TestCase):
"""