Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UTF-8 support #113

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions copy.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
--[[

Training a NTM to memorize input.

The current version seems to work, giving good output after 5000 iterations
or so. Proper initialization of the read/write weights seems to be crucial
here.

--]]

require('../')
require('./util')
require('optim')
require('sys')

torch.manualSeed(0)

-- NTM config
local config = {
input_dim = 10,
output_dim = 10,
mem_rows = 128,
mem_cols = 20,
cont_dim = 100
}

local input_dim = config.input_dim
local start_symbol = torch.zeros(input_dim)
start_symbol[1] = 1
local end_symbol = torch.zeros(input_dim)
end_symbol[2] = 1

function generate_sequence(len, bits)
local seq = torch.zeros(len, bits + 2)
for i = 1, len do
seq[{i, {3, bits + 2}}] = torch.rand(bits):round()
end
return seq
end

function forward(model, seq, print_flag)
local len = seq:size(1)
local loss = 0

-- present start symbol
model:forward(start_symbol)

-- present inputs
if print_flag then print('write head max') end
for j = 1, len do
model:forward(seq[j])
if print_flag then print_write_max(model) end
end

-- present end symbol
model:forward(end_symbol)

-- present targets
local zeros = torch.zeros(input_dim)
local outputs = torch.Tensor(len, input_dim)
local criteria = {}
if print_flag then print('read head max') end
for j = 1, len do
criteria[j] = nn.BCECriterion()
outputs[j] = model:forward(zeros)
loss = loss + criteria[j]:forward(outputs[j], seq[j]) * input_dim
if print_flag then print_read_max(model) end
end
return outputs, criteria, loss
end

function backward(model, seq, outputs, criteria)
local len = seq:size(1)
local zeros = torch.zeros(input_dim)
for j = len, 1, -1 do
model:backward(
zeros,
criteria[j]
:backward(outputs[j], seq[j])
:mul(input_dim)
)
end

model:backward(end_symbol, zeros)
for j = len, 1, -1 do
model:backward(seq[j], zeros)
end
model:backward(start_symbol, zeros)
end

local model = ntm.NTM(config)
local params, grads = model:getParameters()

local num_iters = 10000
local start = sys.clock()
local print_interval = 25
local min_len = 1
local max_len = 20

print(string.rep('=', 80))
print("NTM copy task")
print('training up to ' .. num_iters .. ' iteration(s)')
print('min sequence length = ' .. min_len)
print('max sequence length = ' .. max_len)
print(string.rep('=', 80))
print('num params: ' .. params:size(1))

local rmsprop_state = {
learningRate = 1e-4,
momentum = 0.9,
decay = 0.95
}

-- local adagrad_state = {
-- learningRate = 1e-3
-- }

-- train
for iter = 1, num_iters do
local print_flag = (iter % print_interval == 0)
local feval = function(x)
if print_flag then
print(string.rep('-', 80))
print('iter = ' .. iter)
print('learn rate = ' .. rmsprop_state.learningRate)
print('momentum = ' .. rmsprop_state.momentum)
print('decay = ' .. rmsprop_state.decay)
printf('t = %.1fs\n', sys.clock() - start)
end

local loss = 0
grads:zero()

local len = math.floor(torch.random(min_len, max_len))
local seq = generate_sequence(len, input_dim - 2)
local outputs, criteria, sample_loss = forward(model, seq, print_flag)
loss = loss + sample_loss
backward(model, seq, outputs, criteria)
if print_flag then
print("target:")
print(seq)
print("output:")
print(outputs)
end

-- clip gradients
grads:clamp(-10, 10)
if print_flag then
print('max grad = ' .. grads:max())
print('min grad = ' .. grads:min())
print('loss = ' .. loss)
end
return loss, grads
end

--optim.adagrad(feval, params, adagrad_state)
ntm.rmsprop(feval, params, rmsprop_state)
end
4 changes: 4 additions & 0 deletions lr.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
local function lr()
return 0.008
end
return lr
96 changes: 96 additions & 0 deletions model/CircularConvolution.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
--[[

Input: A table {x, k} of a vector x and a convolution kernel k.

Output: Circular convolution of x with k.

TODO: This module can probably be implemented more efficiently.

--]]

local CircularConvolution, parent = torch.class('nn.CircularConvolution', 'nn.Module')

function CircularConvolution:__init()
parent.__init(self)
self.gradInput = {}
end

function rotate_left(input, step)
local output = input.new():resizeAs(input)
local size = input:size(1)
output[{{1, size - step}}] = input[{{step + 1, size}}]
output[{{size - step + 1, size}}] = input[{{1, step}}]
return output
end

function rotate_right(input, step)
local output = input.new():resizeAs(input)
local size = input:size(1)
output[{{step + 1, size}}] = input[{{1, size - step}}]
output[{{1, step}}] = input[{{size - step + 1, size}}]
return output
end

-- function CircularConvolution:updateOutput_orig(input)
-- local a, b = unpack(input)
-- local size = a:size(1)
-- self.b = b:repeatTensor(1,2)
-- local circ = a.new():resize(size, size)
-- for i = 0, size - 1 do
-- circ[i + 1] = self.b:narrow(2, size - i + 1, size)
-- end
-- self.output:set(torch.mv(circ:t(), a))
-- return self.output
-- end

-- function CircularConvolution:updateGradInput_orig(input, gradOutput)
-- local a, b = unpack(input)
-- local size = a:size(1)
-- for i = 1, 2 do
-- self.gradInput[i] = self.gradInput[i] or input[1].new()
-- self.gradInput[i]:resize(size)
-- end

-- a = a:repeatTensor(1, 2)
-- for i = 0, size - 1 do
-- self.gradInput[1][i + 1] = gradOutput:dot(self.b:narrow(2, size - i + 1, size))
-- self.gradInput[2][i + 1] = gradOutput:dot(a:narrow(2, size - i + 1, size))
-- end
-- return self.gradInput
-- end

function CircularConvolution:updateOutput(input)
local v, k = unpack(input)
self.size = v:size(1)
self.kernel_size = k:size(1)
self.kernel_shift = math.floor(self.kernel_size / 2)
self.output = v.new():resize(self.size):zero()
for i = 1, self.size do
for j = 1, self.kernel_size do
local idx = i + self.kernel_shift - j + 1
if idx < 1 then idx = idx + self.size end
if idx > self.size then idx = idx - self.size end
self.output[{{i}}]:add(k[j] * v[idx])
end
end
return self.output
end

function CircularConvolution:updateGradInput(input, gradOutput)
local v, k = unpack(input)
self.gradInput[1] = self.gradInput[1] or v.new()
self.gradInput[2] = self.gradInput[2] or k.new()
self.gradInput[1]:resize(self.size)
self.gradInput[2]:resize(self.kernel_size)

local gradOutput2 = rotate_right(gradOutput:repeatTensor(1, 2):view(2 * self.size), self.kernel_shift)
for i = 1, self.size do
self.gradInput[1][i] = k:dot(gradOutput2:narrow(1, i, self.kernel_size))
end

local v2 = rotate_left(v:repeatTensor(1, 2):view(2 * self.size), self.kernel_shift + 1)
for i = 1, self.kernel_size do
self.gradInput[2][i] = gradOutput:dot(v2:narrow(1, self.size - i + 1, self.size))
end
return self.gradInput
end
11 changes: 6 additions & 5 deletions model/GRU.lua
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Creates one timestep of one GRU
Paper reference: http://arxiv.org/pdf/1412.3555v1.pdf
]]--
function GRU.gru(input_size, rnn_size, n, dropout)
dropout = dropout or 0
dropout = dropout or 0
-- there are n+1 inputs (hiddens on each layer and x)
local inputs = {}
table.insert(inputs, nn.Identity()()) -- x
Expand All @@ -26,11 +26,12 @@ function GRU.gru(input_size, rnn_size, n, dropout)

local prev_h = inputs[L+1]
-- the input to this layer
if L == 1 then
x = OneHot(input_size)(inputs[1])
if L == 1 then
print(input_size)
x = nn.LookupTable(input_size,rnn_size)(inputs[1])
input_size_L = input_size
else
x = outputs[(L-1)]
else
x = outputs[(L-1)]
if dropout > 0 then x = nn.Dropout(dropout)(x) end -- apply dropout, if any
input_size_L = rnn_size
end
Expand Down
Loading