diff --git a/CHANGELOG.md b/CHANGELOG.md index f2a4f04e..9784fe6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.5.4 + - Important correctness fix when storing very many equally sized objects +that may get GC'ed while storing is in progress! (#603) + ## 0.5.3 - Bugfix for `<: Function` structs diff --git a/Project.toml b/Project.toml index 9361c70c..caaac3c2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "JLD2" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.5.3" +version = "0.5.4" [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" diff --git a/src/JLD2.jl b/src/JLD2.jl index b76b1780..079d00c2 100644 --- a/src/JLD2.jl +++ b/src/JLD2.jl @@ -113,7 +113,7 @@ mutable struct JLDFile{T<:IO} disable_commit::Bool datatype_locations::OrderedDict{RelOffset,CommittedDatatype} datatypes::Vector{H5Datatype} - datatype_wsession::JLDWriteSession{Dict{UInt,RelOffset}} + datatype_wsession::JLDWriteSession{Dict{UInt,Tuple{RelOffset,WeakRef}}} typemap::Dict{String, Any} jlh5type::IdDict{Any,Any} h5jltype::IdDict{Any,Any} diff --git a/src/types.jl b/src/types.jl index c9e7ad77..04905020 100644 --- a/src/types.jl +++ b/src/types.jl @@ -112,25 +112,25 @@ A `JLDWriteSession` keeps track of references to serialized objects. If `T` is a written dataset. If it is `Union{}`, then references are not tracked, and objects referenced multiple times are written multiple times. """ -struct JLDWriteSession{T<:Union{Dict{UInt,RelOffset},Union{}}} +struct JLDWriteSession{T<:Union{Dict{UInt,Tuple{RelOffset,WeakRef}},Union{}}} h5offset::T - JLDWriteSession{T}() where T = new() - JLDWriteSession{T}(h5offset, objects) where T = new(h5offset) + JLDWriteSession{T}() where T = new{T}() + JLDWriteSession(h5offset::T) where T = new{T}(h5offset) end -JLDWriteSession() = JLDWriteSession{Dict{UInt,RelOffset}}(Dict{UInt,RelOffset}(), Any[]) +JLDWriteSession() = JLDWriteSession(Dict{UInt,Tuple{RelOffset,WeakRef}}()) track!(::JLDWriteSession{Union{}}, args...) = nothing function track!(s::JLDWriteSession, data, offset::RelOffset) if ismutabletype(typeof(data)) - s.h5offset[objectid(data)] = offset + s.h5offset[objectid(data)] = (offset, WeakRef(data)) end nothing end -get_tracked(wsession::JLDWriteSession{Union{}}, data) = UNDEFINED_ADDRESS +get_tracked(::JLDWriteSession{Union{}}, data) = UNDEFINED_ADDRESS function get_tracked(wsession::JLDWriteSession, data) - if ismutabletype(typeof(data)) - return get(wsession.h5offset, objectid(data), UNDEFINED_ADDRESS) - end - return UNDEFINED_ADDRESS + !ismutabletype(typeof(data)) && return UNDEFINED_ADDRESS + offset, wref = get(wsession.h5offset, objectid(data), (UNDEFINED_ADDRESS, WeakRef(nothing))) + isnothing(wref.value) && return UNDEFINED_ADDRESS + return offset end """ GlobalHeap diff --git a/test/loadsave.jl b/test/loadsave.jl index 30329062..4684601b 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -801,3 +801,40 @@ end loaded_foo = load(tempfile, "foo") @test loaded_foo isa Foo601 end + + +@testset "Issue #603 - reused objectids" begin + fn = joinpath(mktempdir(), "issue603_reused_objectids.jld2") + + function create_large_dict(levels::Int, items_per_level::Int, item_size::Int) + # Create a nested dictionary with the specified number of levels + function create_nested_dict(current_level, max_level) + if current_level > max_level + return Dict("x" => rand(Int, item_size)) + else + return Dict( + string(i) => create_nested_dict(current_level + 1, max_level) + for i in 1:items_per_level + ) + end + end + # Create the top-level dictionary + return create_nested_dict(1, levels) + end + + obj = create_large_dict(3, 30, 1); + jldsave(fn; obj) + res = load(fn, "obj") + function recursive_test(obj, res) + @test length(keys(obj)) == length(keys(res)) + for (k, v) in obj + @test haskey(res, k) + if isa(v, Dict) + recursive_test(v, res[k]) + else + @test v == res[k] + end + end + end + recursive_test(obj, res) +end \ No newline at end of file