-
Notifications
You must be signed in to change notification settings - Fork 0
/
runDnn.py
280 lines (240 loc) · 9.8 KB
/
runDnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
'''
Control script for experiment utilising the CPU optimised DNN class
TODO: Plot classification accuracies of each digit through training
'''
from dnn import *
import multiprocessing as mp
from timeit import default_timer as timer
import matplotlib.pyplot as plt
# Running mean function used for plotting
def runningMean(x, N):
cumsum = np.cumsum(np.insert(x, 0, 0))
return (cumsum[N:] - cumsum[:-N]) / float(N)
from importMNIST import imp
dat = imp()
# Set flags to load and save model (model must be same size as loaded model)
loadModel = False
modelDir = 'models/DNN/800-42.20'
saveModel = False
# Model structure parameters
hiddenLayers = [400,200]
inputNodes = 784
outputNodes = 10
# Import data normalised between 0 and 1 or -1 and 1 (vanishing mean)
vanishingMean = False
# Disable node biases for testing
testBiases = False
# Set number of training epochs
epochs = 64
check = int(epochs/8)
# TODO: Change name of 'momentum' as momentum is already a concept in ML
# Set length of 'momentum metric' (compares previous mean accuracies to latest
momLen = 5 # mean accuracies, proportional to the length)
N = momLen # Set length of rolling averages, for plots, to momentum length
# Set learning rate magnitude and calculate learning rate
magLrnRate = -3
stochasticLearningRate = 10 ** magLrnRate
targetMagLrnRate = -5
targetStochasticLearningRate = 10 ** targetMagLrnRate
# Set absolute size of training data set and the batch multiplier
absoluteSize = 60000
batchMult = 20
# Set number of workers (threads) to be used
threadNum = 5
# Calculate batch and thread size
batchSize = batchMult * threadNum
threadSize = int(batchSize/threadNum)
# Calculate learning rate based on batch size
learningRate = batchSize * stochasticLearningRate
targetLearningRate = batchSize * targetStochasticLearningRate
learningRateDelta = (targetLearningRate - learningRate) / epochs
# Calculate number of runs per epoch based on batch size and worker pool
runs = int((absoluteSize / threadSize) / threadNum)
print(hiddenLayers)
input('Waiting for execution consent...')
# Initialise the neural network with node numbers, learning rate
# and batch size
net = DNN(hiddenLayers, inputNodes, outputNodes, learningRate, batchSize)
# Activation functions for hidden and output layers of model
hiddenActivations = [net.sigmoid] * len(hiddenLayers)
outputActivation = net.softmax
# Set the activation functions to be used in the model class
net.setActivation(hiddenActivations, outputActivation)
# Create lists to unpack deltas and errors from pool workers
dW = [None] * threadNum
dB = [None] * threadNum
e = [None] * threadNum
# Create array to store errors for each run
errors = np.zeros(batchSize)
# Create array for recording mean error per run
runErr = np.zeros((epochs, runs))
# Create trace array for recording performance through training process
trace = np.zeros(epochs)
traceVal = np.zeros(epochs)
traceAcc = np.zeros(epochs)
# Import training data and get length of dataset
imTr, labTr = dat.importTrain(vanishingMean)
rng = len(labTr)
# Import validation set
imTs, labTs = dat.importTest(vanishingMean)
rngTs = len(labTs)
# Load model weights and biases from file
if loadModel: net.modelL(modelDir)
execStart = timer()
workableInit = False
while not workableInit:
# Iterate through each epoch
for i in range(epochs):
epStart = timer()
# Create pool of workers
pool = mp.Pool(threadNum)
# Created randomised array of indexes for the input data
ind = npr.permutation(rng)
indVl = npr.permutation(rngTs)
# Slice randomised indexes into the minibatches for each thread and run
thrInd = np.split(ind, runs * threadNum)
thrIndVl = np.split(indVl, threadNum)
# Iterate through each run
for j in range(runs):
# Create list of variables to be loaded into each worker
runVars = []
for k in range(threadNum):
# Calculate index for sliced indexes
r = (j*k) + k
# Create list of variables for worker
inputs = []
# Add thread size
inputs.append(k)
inputs.append(threadSize)
# Add input images and output labels (using sliced indexes)
inputs.append(imTr[thrInd[r]])
inputs.append(labTr[thrInd[r]])
# Append woker arguments to list
runVars.append(inputs)
# Pass arguments to worker pool and wait for results
e = pool.starmap(net.run, runVars)
# Iteratively write the unpacked results into corresponding arrays
for t in range(threadNum):
# Calculate starting and ending indexes for thread size
start = t * threadSize
end = start + threadSize
# Write errors into array
errors[start:end] = e[t][0][:]
# TODO: Move these variables to memory shared by worker pool
# Write deltas for each run into NN object for update step
for l in range(net.totL):
net.dW[l][:,start:end], net.dB[l][:,start:end] = e[t][1][l], e[t][2][l]
# Perform update for this batch
net.update()
# Record mean error for batch
runErr[i][j] = np.mean(errors)
# Create list of variables to be loaded into each worker
runVars = []
for k in range(threadNum):
# Create list of variables for worker
inputs = []
inputs.append(imTs[thrIndVl[k]])
inputs.append(labTs[thrIndVl[k]])
# Append woker arguments to list
runVars.append(inputs)
e = pool.starmap(net.validate, runVars)
costVal, scoreVal, vals = 0, 0, []
for x in e:
costVal += x[0]
scoreVal += x[1]
vals += x[2]
corr = [0] * 10
for n in range(rngTs):
if vals[n][0] == vals[n][1]: corr[vals[n][0]] += 1
traceVal[i] = costVal / threadNum
traceAcc[i] = scoreVal / rngTs
# Calculate momentum (mean of previous accuracies compared to current)
if i >= 2 * momLen: diff = np.mean(traceAcc[i-momLen:i])\
- np.mean(traceAcc[i-2*momLen:i-momLen])
else: diff = float("NAN")
# Close worker pool once epoch is complete
pool.close()
epEnd = timer()
net.lrnR += learningRateDelta
# Record trace of performance per epoch and report
trace[i] = np.mean(runErr[i])
#0 trn: 0.34 - val: 0.37 0.10 mom: 2.32 - ep end: 14.24
print(i, '\ttrn: %.2f'% trace[i], '-',
'val: %.2f %.2f'% (traceVal[i],traceAcc[i]),
'mom: %.2f' % diff, '-',
'ep end: %.2f'% (epEnd-epStart))
#print([0 if c == 0 else 1 for c in corr])
if not workableInit and i == check and any([c == 0 for c in corr]):
net.reinitialise()
net.lrnR = learningRate
print('reinitialising...', [c == 0 for c in corr])
break
if not workableInit and i > check: workableInit = True
# Set score counter to record number of successful classifications
score = 0
# Create list to record predicted and real classifications
vals = [None] * rngTs
# Create list for node values and initialise the input layer array
val = [None] * net.totV
val[0] = np.zeros(net.inpN)
# Create list to store weighted sum inputs in feedforward step
inp = [None] * net.totL
# Create list of arrays for deltas of this backward pass
dW = [None] * net.totL
dB = [None] * net.totL
# Iteratively populate lists with arrays for each layer
for l in range(net.totL):
i = l + 1
val[i] = np.zeros(net.nod[i])
inp[l] = np.zeros(net.nod[i])
dW[l] = np.zeros(net.nod[i])
dB[l] = np.zeros(net.nod[i])
# Create array for expected outputs
exp = np.zeros(net.outN, dtype=np.int)
# Do not use biases for testing if flag is set
if testBiases: net.biases = False
# Run the test data throigh the network
for n in range(rngTs):
val, exp = net.importDat(imTs[n], labTs[n], val, exp)
inp, val = net.feedForward(inp, val)
# Get predicted and real clasifications and record
yHat = np.argmax(val[net.totL])
y = np.argmax(exp)
vals[n] = [yHat, y]
# Check if classification was corrct and update score
if yHat == y: score += 1
# Populate list with the counts of correct classifications of each digit
corr = [0] * 10
for n in range(rngTs):
if vals[n][0] == vals[n][1]: corr[vals[n][0]] += 1
execEnd = timer()
execTime = execEnd - execStart
if execTime <= 60: unit = 's'
elif execTime <= 3600: execTime, unit = execTime/60, 'm'
else: execTime, unit = (execTime/60)/60, 'h'
print('Execution time: %.2f%s'% (execTime, unit))
# Report results from validation
# Minimum and final loss and acciracies for training
print('Trn loss: minimum = %.5f final = %.5f'% (trace.min(), trace[-1]))
print('Val loss: minimum = %.5f final = %.5f'% (traceVal.min(), traceVal[-1]))
print('Accuracy: maximum = %.5f final = %.5f'% (traceAcc.max(), traceAcc[-1]))
# Test accuracies
print('Test acc: ', score, '/', rngTs, '\t', (score/rngTs))
testCounts = np.load('testSamplesDigitCounts.npy')
print('Digit classification accuracies (0-9):\n',
[round(c/testCounts[i], 4) for i, c in enumerate(corr)])
# Network parameters and architecture
print('Learning rate =', stochasticLearningRate, 'to',
targetStochasticLearningRate, 'Batch size =', batchSize)
net.dim()
# Plot loss and accuracies through training process
plt.plot(runningMean(trace, N))
plt.plot(runningMean(traceVal, N))
plt.plot(runningMean(traceAcc, N))
y = input('Show training profile? `y`')
if y == 'y': plt.show()
# Save model weights and biases to file
if saveModel: net.modelS()
else:
y = input('Save model? `y`')
if y == 'y': net.modelS()