Migrate to DifferentiationInterface #98

Red-Portal · 2024-09-27T05:49:25Z

This removes the local custom AD machinery in favor of DifferentiationInterface, which meets our needs as of v0.6, as noted in #85 . It also does some minor things:

Migrate to DifferentiationInterface
Update Tapir to Mooncake
Add an empty pipeline to stop Buildkite from complaining.

Red-Portal · 2024-09-27T05:50:34Z

@wsmoses Hi Bill, it seems Enzyme is failing again:

  Got exception outside of a @test
  AssertionError: sz == sizeof(Int)
  Stacktrace:
    [1] should_recurse(typ2::Any, arg_t::LLVM.IntegerType, byref::GPUCompiler.ArgumentCC, dl::LLVM.DataLayout)
      @ Enzyme.Compiler ~/.julia/packages/Enzyme/ZRw0g/src/absint.jl:188
    [2] abs_typeof(arg::LLVM.LoadInst, partial::Bool)
      @ Enzyme.Compiler ~/.julia/packages/Enzyme/ZRw0g/src/absint.jl:441
    [3] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, toplevel::Bool, strip::Bool, validate::Bool, only_entry::Bool, parent_job::Nothing)
      @ Enzyme.Compiler ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:8023
    [4] codegen
      @ ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:7026 [inlined]
    [5] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams}, postopt::Bool)
      @ Enzyme.Compiler ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:9292
    [6] _thunk
      @ ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:9292 [inlined]
    [7] cached_compilation
      @ ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:9333 [inlined]
    [8] thunkbase(ctx::LLVM.Context, mi::Core.MethodInstance, ::Val{0x0000000000007b9b}, ::Type{Const{typeof(AdvancedVI.estimate_repgradelbo_ad_forward)}}, ::Type{Duplicated{Any}}, tt::Type{Tuple{Duplicated{Vector{Float64}}, Const{@NamedTuple{rng::StableRNGs.LehmerRNG, adtype::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, obj::RepGradELBO{StickingTheLandingEntropy}, problem::TestNormal{Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}, PDMat{Float64, Matrix{Float64}}}, restructure::Optimisers.Restructure{MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}, @NamedTuple{location::Int64, scale::Int64}}, q_stop::MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}}}}}, ::Val{Enzyme.API.DEM_ReverseModeGradient}, ::Val{1}, ::Val{(false, false, false)}, ::Val{true}, ::Val{true}, ::Type{FFIABI}, ::Val{true}, ::Val{true})
      @ Enzyme.Compiler ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:9465
    [9] #s2064#19047
      @ ~/.julia/packages/Enzyme/ZRw0g/src/compiler.jl:9602 [inlined]
   [10] var"#s2064#19047"(FA::Any, A::Any, TT::Any, Mode::Any, ModifiedBetween::Any, width::Any, ReturnPrimal::Any, ShadowInit::Any, World::Any, ABI::Any, ErrIfFuncWritten::Any, RuntimeActivity::Any, ::Any, ::Type, ::Type, ::Type, tt::Any, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Type, ::Any)
      @ Enzyme.Compiler ./none:0
   [11] (::Core.GeneratedFunctionStub)(::UInt64, ::LineNumberNode, ::Any, ::Vararg{Any})
      @ Core ./boot.jl:602
   [12] autodiff
      @ ~/.julia/packages/Enzyme/ZRw0g/src/Enzyme.jl:368 [inlined]
   [13] autodiff
      @ ~/.julia/packages/Enzyme/ZRw0g/src/Enzyme.jl:504 [inlined]
   [14] macro expansion
      @ ~/.julia/packages/Enzyme/ZRw0g/src/Enzyme.jl:1586 [inlined]
   [15] gradient
      @ ~/.julia/packages/Enzyme/ZRw0g/src/Enzyme.jl:1569 [inlined]
   [16] value_and_gradient(f::typeof(AdvancedVI.estimate_repgradelbo_ad_forward), ::DifferentiationInterface.NoGradientPrep, backend::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, x::Vector{Float64}, contexts::Constant{@NamedTuple{rng::StableRNGs.LehmerRNG, adtype::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, obj::RepGradELBO{StickingTheLandingEntropy}, problem::TestNormal{Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}, PDMat{Float64, Matrix{Float64}}}, restructure::Optimisers.Restructure{MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}, @NamedTuple{location::Int64, scale::Int64}}, q_stop::MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}}})
      @ DifferentiationInterfaceEnzymeExt ~/.julia/packages/DifferentiationInterface/yXBFa/ext/DifferentiationInterfaceEnzymeExt/reverse_onearg.jl:266
   [17] value_and_gradient(f::typeof(AdvancedVI.estimate_repgradelbo_ad_forward), backend::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, x::Vector{Float64}, contexts::Constant{@NamedTuple{rng::StableRNGs.LehmerRNG, adtype::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, obj::RepGradELBO{StickingTheLandingEntropy}, problem::TestNormal{Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}, PDMat{Float64, Matrix{Float64}}}, restructure::Optimisers.Restructure{MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}, @NamedTuple{location::Int64, scale::Int64}}, q_stop::MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}}})
      @ DifferentiationInterface ~/.julia/packages/DifferentiationInterface/yXBFa/src/fallbacks/no_prep.jl:24
   [18] estimate_gradient(rng::StableRNGs.LehmerRNG, obj::RepGradELBO{StickingTheLandingEntropy}, adtype::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, prob::TestNormal{Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}, PDMat{Float64, Matrix{Float64}}}, params::Vector{Float64}, restructure::Optimisers.Restructure{MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}, @NamedTuple{location::Int64, scale::Int64}}, state::Nothing)
      @ AdvancedVI ~/.julia/dev/AdvancedVI/src/objectives/elbo/repgradelbo.jl:122
   [19] optimize(::StableRNGs.LehmerRNG, ::TestNormal{Fill{Float64, 1, Tuple{Base.OneTo{Int64}}}, PDMat{Float64, Matrix{Float64}}}, ::RepGradELBO{StickingTheLandingEntropy}, ::MvLocationScale{LowerTriangular{Float64, Matrix{Float64}}, Normal{Float64}, Vector{Float64}, Float64}, ::Int64; adtype::AutoEnzyme{ReverseMode{true, true, FFIABI, false, false}, Nothing}, optimizer::Descent{Float64}, averager::NoAveraging, show_progress::Bool, state_init::@NamedTuple{}, callback::Nothing, prog::ProgressMeter.Progress)
      @ AdvancedVI ~/.julia/dev/AdvancedVI/src/optimize.jl:76
   [20] macro expansion
      @ ~/.julia/dev/AdvancedVI/test/inference/repgradelbo_locationscale.jl:53 [inlined]
   [21] macro expansion
      @ /usr/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
   [22] macro expansion
      @ ~/.julia/dev/AdvancedVI/test/inference/repgradelbo_locationscale.jl:52 [inlined]
   [23] macro expansion
      @ /usr/share/julia/stdlib/v1.10/Test/src/Test.jl:1669 [inlined]
   [24] macro expansion
      @ ~/.julia/dev/AdvancedVI/test/inference/repgradelbo_locationscale.jl:17 [inlined]
   [25] macro expansion
      @ /usr/share/julia/stdlib/v1.10/Test/src/Test.jl:1577 [inlined]
   [26] top-level scope
      @ ~/.julia/dev/AdvancedVI/test/inference/repgradelbo_locationscale.jl:17
   [27] include(fname::String)
      @ Base.MainInclude ./client.jl:489
   [28] top-level scope
      @ ~/.julia/dev/AdvancedVI/test/runtests.jl:67
   [29] include(fname::String)
      @ Base.MainInclude ./client.jl:489
   [30] top-level scope
      @ none:6
   [31] eval
      @ ./boot.jl:385 [inlined]
   [32] exec_options(opts::Base.JLOptions)
      @ Base ./client.jl:291
   [33] _start()
      @ Base ./client.jl:552

This also happens in our old AD machinery as well.

github-actions

Benchmark Results

Benchmark suite	Current: `79e5c62`	Previous: `90a865c`	Ratio
`normal + bijector/meanfield/ForwardDiff`	`491433178` ns	`450838467.5` ns	`1.09`
`normal + bijector/meanfield/ReverseDiff`	`197406284` ns	`187788951` ns	`1.05`

This comment was automatically generated by workflow using github-action-benchmark.

willtebbutt

From what I can see, this looks good. It'll be great once more packages move towards depending on DifferentiationInterface, rather than other packages.

wsmoses · 2024-09-27T13:23:03Z

I’d probably recommend not removing the Enzyme extension for now as it presently makes some code error or significantly slower that could be directly differentiated with Enzyme

wsmoses · 2024-09-27T14:18:42Z

@Red-Portal can open a MWE issue for the size mismatch?

Red-Portal · 2024-09-27T19:31:02Z

@yebai @willtebbutt Any thoughts on @wsmoses 's advice on leaving Enzyme here for now? Maybe I should keep the old AD interface as an indirection so that we use Enzyme's interface for Enzyme, and DI for the rest?

yebai · 2024-09-27T20:58:35Z

I'm happy to keep the Enzyme interface as is, but perhaps we should move its tests to a separate CI task.

src/optimize.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

Red-Portal · 2024-09-30T06:17:09Z

As suggested, I reverted back to the old AD interface, where we indirect to DifferentiationInterface except for Enzyme.

wsmoses · 2024-09-30T06:39:17Z

src/AdvancedVI.jl

@@ -38,7 +39,14 @@ Evaluate the value and gradient of a function `f` at `x` using the automatic dif
 - `aux`: Auxiliary input passed to `f`.
 - `out::DiffResults.MutableDiffResult`: Buffer to contain the output gradient and function value.
 """
-function value_and_gradient! end
+function value_and_gradient!(


This might be better as an extension x/ref gdalle/DifferentiationInterface.jl#509

gdalle · 2024-09-30T07:31:12Z

I’d probably recommend not removing the Enzyme extension for now as it presently makes some code error or significantly slower that could be directly differentiated with Enzyme

In this case if I'm not mistaken there is only a single differentiated argument? Can you give a concrete example of significant slowdown or error?

wsmoses · 2024-09-30T07:46:36Z

sure, here's a ~100x slowdown example with one arg:

using Enzyme, BenchmarkTools, ADTypes, DifferentiationInterface

struct T
   a::NTuple{10,Float64}
   b
end

a = T((ones(10)...,),nothing)

sumfirst(x) = sum(x.a)

@btime Enzyme.autodiff(Reverse, sumfirst, $(Active(a))) # 6.460 ns (0 allocations: 0 bytes)

function bench_di(backend, a)
   prep = DifferentiationInterface.prepare_gradient(sumfirst, backend, a)
   @btime DifferentiationInterface.gradient($sumfirst, $prep, $backend, $a)
   return nothing
end

bench_di(AutoEnzyme(), a) #     541.346 ns (13 allocations: 1.25 KiB)

gdalle · 2024-09-30T08:02:09Z

Many of the slowdown cases you present are cases where Enzyme.gradient itself does a bad job:

julia> @btime Enzyme.autodiff(Reverse, sumfirst, $(Active(a)));
  6.452 ns (0 allocations: 0 bytes)

julia> @btime Enzyme.gradient(Reverse, sumfirst, $a);
  805.476 ns (12 allocations: 1.23 KiB)

Maybe the conclusion is that DI.gradient should always use Enzyme.autodiff and just skip the high-level utilities?

wsmoses · 2024-09-30T08:05:13Z

Yeah I just lightly modified the same two arg example from slack to make this. But sure, if you can do that go ahead!

gdalle · 2024-09-30T08:36:57Z

Haved opened gdalle/DifferentiationInterface.jl#512 to keep track, will stop derailing this PR ^^

gdalle · 2024-09-30T11:17:46Z

src/AdvancedVI.jl

+ ad::ADTypes.AbstractADType, f, x, aux, out::DiffResults.MutableDiffResult
+)
+ grad_buf = DiffResults.gradient(out)
+ y, _ = DifferentiationInterface.value_and_gradient!(f, grad_buf, ad, x, Constant(aux))


You would benefit from DI's preparation mechanism, especially for backends like ForwardDiff and ReverseDiff

Also for Moooncake.

@gdalle @willtebbutt In our case, features to come in the future (like subsampling) will result in aux.prob change at every iteration. Is is still valid to re-use the same prep object? Or is the idea that I should call DI.prepare_gradient every time before calling DI.value_and_gradient?

Mooncake.jl ought to be fine provided that the type doesn't change. Am I correct in assuming that in the case of subsampling, you're just changing the indices of the data which are subsampled at each iteration?

But @gdalle is that true in general? For example I imagine reversediff or the upcoming reactant support would result in an invalid prep object, no?

If the size of x and the size of the contents of aux.prob don't change, you should be good to go (except for ReverseDiff as explained above). That would require the minibatches to always have the same length I guess?

For most backends, what preparation does is allocate caches that correspond to the various differentiated inputs and contexts. These caches have a specific size which determines whether preparation can be reused, but then inside your function you can create more objects with arbitrary sizes, that's none of my business.

that's none of my business.

Haha fair enough.

(except for ReverseDiff as explained above). That would require the minibatches to always have the same length I guess?

I think compiled tapes actually need everything to be the same except x no? But in any case, I think the best course of action would be to expose control on whether to use preparing or not. But I feel that would be a little too complicated for this PR alone.

Oh yeah I had forgotten constants. As soon as you use constants then DI recompiles the tape every time anyway

Ah I see. Okay, then I guess there's nothing preventing us from using prepare_gradient.

Red-Portal · 2024-09-30T18:21:54Z

In light of the discussion, I will merge this PR and maybe submit a separate PR that exposes prepare_gradient.

migrate to DifferentiationInterface

ee4bd54

run formatter

c7b300f

github-actions bot reviewed Sep 27, 2024

View reviewed changes

Red-Portal added 2 commits September 26, 2024 22:53

tighten compat bound for ADTypes

c4e2db4

fix compat bound for docs

c2593c2

Red-Portal added the enhancement New feature or request label Sep 27, 2024

Red-Portal added 2 commits September 26, 2024 23:02

add weakdeps in extras too for Julia 1.9<

a8d2ee9

add basic buildkite pipeline

32a2fe0

Red-Portal mentioned this pull request Sep 27, 2024

Add SSH key for all relevant workflows #97

Merged

Red-Portal requested a review from yebai September 27, 2024 06:07

Red-Portal added 5 commits September 26, 2024 23:09

fix remove Enzyme dependency in test for Julia 1.7

9f6fbac

revert "add weakdeps in extras too for Julia 1.9<"

c102f80

add back extras

b765d48

add empty exts because Julia 1.7 wants them

6f1e989

fix missing Enzyme bug

90a865c

yebai requested review from mhauru and willtebbutt and removed request for yebai September 27, 2024 08:58

willtebbutt approved these changes Sep 27, 2024

View reviewed changes

Red-Portal added 2 commits September 29, 2024 22:18

revert to old AD interface, redirect to DI except for Enzyme

61c4999

add missing test file

169b368

wsmoses reviewed Sep 30, 2024

View reviewed changes

src/optimize.jl Outdated Show resolved Hide resolved

Red-Portal and others added 3 commits September 30, 2024 14:23

Merge branch 'master' into differentiation_interface

acc558d

merge upstream, revert to DiffResults gradient interface

36c70e7

fix formatting

dce99d7

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

Red-Portal requested a review from willtebbutt September 30, 2024 06:11

fix formatting

79e5c62

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

wsmoses reviewed Sep 30, 2024

View reviewed changes

gdalle suggested changes Sep 30, 2024

View reviewed changes

Red-Portal merged commit d0efe02 into master Sep 30, 2024
11 checks passed

Red-Portal deleted the differentiation_interface branch September 30, 2024 18:22

Red-Portal mentioned this pull request Sep 30, 2024

Make use of DifferentiationInterface.jl? #85

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Migrate to DifferentiationInterface #98

Migrate to DifferentiationInterface #98

Red-Portal commented Sep 27, 2024 •

edited

Loading

Red-Portal commented Sep 27, 2024 •

edited

Loading

github-actions bot left a comment •

edited

Loading

willtebbutt left a comment

wsmoses commented Sep 27, 2024

wsmoses commented Sep 27, 2024

Red-Portal commented Sep 27, 2024

yebai commented Sep 27, 2024 •

edited

Loading

Red-Portal commented Sep 30, 2024

wsmoses Sep 30, 2024

gdalle commented Sep 30, 2024

wsmoses commented Sep 30, 2024

gdalle commented Sep 30, 2024

wsmoses commented Sep 30, 2024

gdalle commented Sep 30, 2024

gdalle Sep 30, 2024

willtebbutt Sep 30, 2024

Red-Portal Sep 30, 2024

willtebbutt Sep 30, 2024

wsmoses Sep 30, 2024

gdalle Sep 30, 2024 •

edited

Loading

gdalle Sep 30, 2024

Red-Portal Sep 30, 2024

gdalle Sep 30, 2024

Red-Portal Sep 30, 2024

Red-Portal commented Sep 30, 2024

Migrate to DifferentiationInterface #98

Migrate to DifferentiationInterface #98

Conversation

Red-Portal commented Sep 27, 2024 • edited Loading

Red-Portal commented Sep 27, 2024 • edited Loading

github-actions bot left a comment • edited Loading

Choose a reason for hiding this comment

Benchmark Results

willtebbutt left a comment

Choose a reason for hiding this comment

wsmoses commented Sep 27, 2024

wsmoses commented Sep 27, 2024

Red-Portal commented Sep 27, 2024

yebai commented Sep 27, 2024 • edited Loading

Red-Portal commented Sep 30, 2024

Choose a reason for hiding this comment

gdalle commented Sep 30, 2024

wsmoses commented Sep 30, 2024

gdalle commented Sep 30, 2024

wsmoses commented Sep 30, 2024

gdalle commented Sep 30, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

gdalle Sep 30, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Red-Portal commented Sep 30, 2024

Red-Portal commented Sep 27, 2024 •

edited

Loading

Red-Portal commented Sep 27, 2024 •

edited

Loading

github-actions bot left a comment •

edited

Loading

yebai commented Sep 27, 2024 •

edited

Loading

gdalle Sep 30, 2024 •

edited

Loading