From b657bb4fdfe8b02c387ab4aed5819974d0d179e4 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 26 Mar 2026 08:48:37 -0400 Subject: [PATCH 1/7] More tests for CUDA + MPS --- ext/MPSKitAdaptExt.jl | 2 + test/cuda/operators.jl | 157 ++++++++++++++++++++++++++++++++++++++++- test/cuda/states.jl | 40 +++++++++++ 3 files changed, 198 insertions(+), 1 deletion(-) diff --git a/ext/MPSKitAdaptExt.jl b/ext/MPSKitAdaptExt.jl index 6f0b0c7f2..78f4b41af 100644 --- a/ext/MPSKitAdaptExt.jl +++ b/ext/MPSKitAdaptExt.jl @@ -35,5 +35,7 @@ end MPSKit.JordanMPOTensor(space(W), adapt(to, W.A), adapt(to, W.B), adapt(to, W.C), adapt(to, W.D)) @inline Adapt.adapt_structure(to, mpo::MPOHamiltonian) = MPOHamiltonian(map(x -> adapt(to, x), mpo.W)) +@inline Adapt.adapt_structure(to, ml::MPSKit.Multiline) = MPSKit.Multiline(map(adapt(to), ml.data)) +@inline Adapt.adapt_structure(to, pa::MPSKit.PeriodicArray) = MPSKit.PeriodicArray(map(adapt(to), pa.data)) end diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl index b3f03796c..47da1ed3e 100644 --- a/test/cuda/operators.jl +++ b/test/cuda/operators.jl @@ -5,12 +5,15 @@ using MPSKit: GeometryStyle, FiniteChainStyle, InfiniteChainStyle, OperatorStyle using TensorKit using MatrixAlgebraKit using TensorKit: ℙ, tensormaptype, TensorMapWithStorage -using Adapt, CUDA, cuTENSOR +using Adapt, CUDA, cuTENSOR, CUDA.CUBLAS # TODO revisit this once https://github.com/QuantumKitHub/MatrixAlgebraKit.jl/issues/176 # is resolved MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration() +pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1)) +vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1)) + @testset "CuFiniteMPO" for V in (ℂ^2, U1Space(0 => 1, 1 => 1)) # start from random operators L = 4 @@ -87,3 +90,155 @@ MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration() @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁) end + +@testset "Finite CuMPOHamiltonian" begin + L = 3 + T = ComplexF64 + for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1)) + lattice = fill(V, L) + O₁ = randn(T, V, V) + O₁ += O₁' + E = id(storagetype(O₁), domain(O₁)) + O₂ = randn(T, V^2 ← V^2) + O₂ += O₂' + + H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L)) + H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1))) + H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂)) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory} + + @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T + #=if !(T <: Complex) + for H in (H1, H2, H3) + Hc = @constinferred complex(H) + @test scalartype(Hc) == complex(T) + @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory} + end + end=# # TODO + + # check if constructor works by converting back to tensormap + #= H1_tm = convert(TensorMap, H1) + operators = vcat(fill(E, L - 1), O₁) + @test H1_tm ≈ mapreduce(+, 1:L) do i + return reduce(⊗, circshift(operators, i)) + end + operators = vcat(fill(E, L - 2), O₂) + @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i + return reduce(⊗, circshift(operators, i)) + end + @test convert(TensorMap, H3) ≈ + O₁ ⊗ E ⊗ E + E ⊗ O₂ + permute(O₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) =# # needs a fix in BlockTensorKit + + # check if adding terms on the same site works + single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2)) + H4 = adapt(CuArray, FiniteMPOHamiltonian(lattice, single_terms)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + @test H4 ≈ H1 atol = 1.0e-6 + double_terms = Iterators.flatten( + Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2) + ) + H5 = adapt(CuArray, FiniteMPOHamiltonian(lattice, double_terms)) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + @test H5 ≈ H2 atol = 1.0e-6 + + # test linear algebra + @test H1 ≈ + adapt(CuArray, FiniteMPOHamiltonian(lattice, 1 => O₁)) + + adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) + + adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁)) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + #@test 0.8 * H1 + 0.2 * H1 ≈ H1 atol = 1.0e-6 # broken due to JordanMPOTensorMap + #=@test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 + H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0))) + @test H1_trunc ≈ H1 + @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1))=# # needs fix in BlockTensorKit + + # test dot and application + state = rand(T, prod(lattice)) + mps = adapt(CuArray, FiniteMPS(state)) + + #=@test convert(TensorMap, H1 * mps) ≈ H1_tm * state # needs fix in BlockTensorKit + @test H1 * state ≈ H1_tm * state + @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps)=# + + # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys + grid = square = fill(V, 3, 3) + + local_operators = Dict((I,) => O₁ for I in eachindex(grid)) + I_vertical = CartesianIndex(1, 0) + vertical_operators = Dict( + (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1) + ) + I_horizontal = CartesianIndex(0, 1) + horizontal_operators = Dict( + (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1) + ) + operators = merge(local_operators, vertical_operators, horizontal_operators) + H4 = adapt(CuArray, FiniteMPOHamiltonian(grid, operators)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + @test H4 ≈ + adapt(CuArray, FiniteMPOHamiltonian(grid, local_operators)) + + adapt(CuArray, FiniteMPOHamiltonian(grid, vertical_operators)) + + adapt(CuArray, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + #H4′= H4 / 3 + 2H4 / 3 + #@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} + #H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) + #@test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} # more problems with arithmetic operations... + #psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) + #@test expectation_value(psi, H4) ≈ expectation_value(psi, H5) + end +end + +@testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces) + # generate a 1-2-3 body interaction + operators = ntuple(3) do i + O = rand(ComplexF64, pspace^i, pspace^i) + return O += O' + end + + H1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[1])) + H2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[2])) + H3 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, repeat(InfiniteMPOHamiltonian(operators[3]), 2)) + + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(H2) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(H3) == CuVector{ComplexF64, CUDA.DeviceMemory} + # make a teststate to measure expectation values for + ψ1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace], [Dspace])) + ψ2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace, pspace], [Dspace, Dspace])) + @test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory} + + #= + e1 = expectation_value(ψ1, H1) + e2 = expectation_value(ψ1, H2) + =# # broken due to BraidingTensor + + # H1 = 2 * H1 - CuArray([1]) # scalar indexing + # @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + # @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor + + H1 = H1 + H2 + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + + # @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor + + H1 = repeat(H1, 2) + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + + #=e1 = expectation_value(ψ2, H1) + e3 = expectation_value(ψ2, H3) + + @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10=# # broken due to BraidingTensor + + #H4 = H1 + H3 # broken due to BraidingTensor + #@test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory} + #h4 = H4 * H4 + #@test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory} + #@test real(expectation_value(ψ2, H4)) >= 0 # broken due to BraidingTensor +end diff --git a/test/cuda/states.jl b/test/cuda/states.jl index 39627ac41..7c036ea09 100644 --- a/test/cuda/states.jl +++ b/test/cuda/states.jl @@ -50,3 +50,43 @@ using Adapt, CUDA, cuTENSOR @test norm(2 * ψ + ψ - 3 * ψ) ≈ 0.0 atol = sqrt(eps(real(ComplexF64))) end + +@testset "CuMultilineMPS ($(sectortype(D)), $elt)" for (D, d, elt) in + [(ℙ^10, ℙ^2, ComplexF64), (Rep[U₁](1 => 3), Rep[U₁](0 => 1), ComplexF32)] + tol = Float64(eps(real(elt)) * 100) + ψ = adapt( + CuVector{elt, CUDA.DeviceMemory}, MultilineMPS( + [ + rand(elt, D * d, D) rand(elt, D * d, D) + rand(elt, D * d, D) rand(elt, D * d, D) + ]; tol + ) + ) + + @test GeometryStyle(typeof(ψ)) == InfiniteChainStyle() + @test GeometryStyle(ψ) == InfiniteChainStyle() + @test TensorKit.storagetype(ψ) == CuVector{elt, CUDA.DeviceMemory} + @test TensorKit.sectortype(ψ) == sectortype(D) + + @test !isfinite(typeof(ψ)) + + @test physicalspace(ψ) == fill(d, 2, 2) + @test all(x -> x ≾ D, left_virtualspace(ψ)) + @test all(x -> x ≾ D, right_virtualspace(ψ)) + + for i in 1:size(ψ, 1), j in 1:size(ψ, 2) + @plansor difference[-1 -2; -3] := ψ.AL[i, j][-1 -2; 1] * ψ.C[i, j][1; -3] - + ψ.C[i, j - 1][-1; 1] * ψ.AR[i, j][1 -2; -3] + @test norm(difference, Inf) < tol * 10 + + @test l_LL(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) ≈ l_LL(ψ, i, j + 1) + @test l_LR(ψ, i, j) * TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) ≈ l_LR(ψ, i, j + 1) + @test l_RL(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) ≈ l_RL(ψ, i, j + 1) + @test l_RR(ψ, i, j) * TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) ≈ l_RR(ψ, i, j + 1) + + @test TransferMatrix(ψ.AL[i, j], ψ.AL[i, j]) * r_LL(ψ, i, j) ≈ r_LL(ψ, i, j + 1) + @test TransferMatrix(ψ.AL[i, j], ψ.AR[i, j]) * r_LR(ψ, i, j) ≈ r_LR(ψ, i, j + 1) + @test TransferMatrix(ψ.AR[i, j], ψ.AL[i, j]) * r_RL(ψ, i, j) ≈ r_RL(ψ, i, j + 1) + @test TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) * r_RR(ψ, i, j) ≈ r_RR(ψ, i, j + 1) + end +end From 4a0d5055745397eeb7064014fd5bf0fa340cb29d Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 27 Mar 2026 05:57:50 -0400 Subject: [PATCH 2/7] Fix complex/real for JordanMPOTensor --- src/operators/jordanmpotensor.jl | 4 +++- test/cuda/operators.jl | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl index 04c4ee1cb..49704b048 100644 --- a/src/operators/jordanmpotensor.jl +++ b/src/operators/jordanmpotensor.jl @@ -205,7 +205,8 @@ end for f in (:real, :complex) @eval function Base.$f(W::JordanMPOTensor) E = $f(scalartype(W)) - W′ = JordanMPOTensor{E}(undef, space(W)) + TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), E) + W′ = similar(W, TE) for (I, v) in nonzero_pairs(W.A) W′.A[I] = $f(v) end @@ -218,6 +219,7 @@ for f in (:real, :complex) for (I, v) in nonzero_pairs(W.D) W′.D[I] = $f(v) end + @show TensorKit.storagetype(W), TensorKit.storagetype(W′) return W′ end end diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl index 47da1ed3e..f09cb2b4b 100644 --- a/test/cuda/operators.jl +++ b/test/cuda/operators.jl @@ -110,13 +110,13 @@ end @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory} @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T - #=if !(T <: Complex) + if !(T <: Complex) for H in (H1, H2, H3) Hc = @constinferred complex(H) @test scalartype(Hc) == complex(T) @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory} end - end=# # TODO + end # check if constructor works by converting back to tensormap #= H1_tm = convert(TensorMap, H1) From 59dca6fa5b57eb16466012ceab4d7b2c6fa660e9 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 31 Mar 2026 08:34:35 -0400 Subject: [PATCH 3/7] Incremental --- src/operators/abstractmpo.jl | 2 +- src/operators/jordanmpotensor.jl | 9 ++++++--- src/operators/mpohamiltonian.jl | 14 ++++++-------- src/operators/ortho.jl | 17 +++++++++-------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index 54ecf0358..01c49500c 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -99,7 +99,7 @@ Base.:\(α::Number, mpo::AbstractMPO) = scale(mpo, inv(α)) function VectorInterface.scale(mpo::AbstractMPO, α::Number) T = VectorInterface.promote_scale(scalartype(mpo), scalartype(α)) - dst = similar(mpo, T) + dst = similar(mpo, TensorKit.similarstoragetype(storagetype(mpo), T)) return scale!(dst, mpo, α) end diff --git a/src/operators/jordanmpotensor.jl b/src/operators/jordanmpotensor.jl index 49704b048..2563496de 100644 --- a/src/operators/jordanmpotensor.jl +++ b/src/operators/jordanmpotensor.jl @@ -135,6 +135,10 @@ function jordanmpotensortype(::Type{O}) where {O <: AbstractTensorMap} end Base.similar(W::JordanMPOTensor, ::Type{TorA}) where {TorA} = jordanmpotensortype(spacetype(W), TorA)(undef, space(W)) +Base.similar(W::JordanMPOTensor, V::VectorSpace) = + jordanmpotensortype(spacetype(V), storagetype(W))(undef, V) +Base.similar(W::JordanMPOTensor, V::VectorSpace, ::Type{TorA}) where {TorA} = + jordanmpotensortype(spacetype(V), TorA)(undef, V) # Properties # ---------- @@ -219,7 +223,6 @@ for f in (:real, :complex) for (I, v) in nonzero_pairs(W.D) W′.D[I] = $f(v) end - @show TensorKit.storagetype(W), TensorKit.storagetype(W′) return W′ end end @@ -244,7 +247,7 @@ end elseif 1 < i < size(W, 1) && 1 < j < size(W, 4) return W.A[i - 1, 1, 1, j - 1] else - return zeros(scalartype(W), eachspace(W)[i, 1, 1, j]) + return zeros(storagetype(W), eachspace(W)[i, 1, 1, j]) end end @@ -359,7 +362,7 @@ function add_physical_charge(O::JordanMPOTensor, charge::Sector) Vdst = left_virtualspace(O) ⊗ fuse(physicalspace(O), auxspace) ← fuse(physicalspace(O), auxspace) ⊗ right_virtualspace(O) - Odst = JordanMPOTensor{scalartype(O)}(undef, Vdst) + Odst = similar(O, Vdst) for (I, v) in nonzero_pairs(O) Odst[I] = add_physical_charge(v, charge) end diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl index 1f2e7f980..bb544d0ae 100644 --- a/src/operators/mpohamiltonian.jl +++ b/src/operators/mpohamiltonian.jl @@ -503,10 +503,8 @@ function InfiniteMPOHamiltonian(lattice′::AbstractArray{<:VectorSpace}, local_ end # partial sort by interaction range - local_mpos = sort!( - map(Base.Fix1(instantiate_operator, lattice), collect(local_operators)); - by = x -> length(x[1]) - ) + unsorted_mpos = map(Base.Fix1(instantiate_operator, lattice), [local_operators...]) + local_mpos = sort!(unsorted_mpos; by = x -> length(x[1])) for (sites, local_mpo) in local_mpos local key_R # trick to define key_R before the first iteration @@ -701,8 +699,8 @@ end function Base.similar(H::MPOHamiltonian, ::Type{O}, L::Int) where {O <: MPOTensor} return MPOHamiltonian(similar(parent(H), O, L)) end -function Base.similar(H::MPOHamiltonian, ::Type{T}) where {T <: Number} - return MPOHamiltonian(similar.(parent(H), T)) +function Base.similar(H::MPOHamiltonian, ::Type{TorA}) where {TorA <: Union{Number, DenseVector}} + return MPOHamiltonian(similar.(parent(H), TorA)) end # Linear Algebra @@ -727,7 +725,7 @@ function Base.:+( ⊞(Vtriv, right_virtualspace(A), Vtriv) V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright - H[i] = eltype(H)(V, A, B, C, D) + H[i] = JordanMPOTensor(V, A, B, C, D) end return FiniteMPOHamiltonian(H) end @@ -749,7 +747,7 @@ function Base.:+( Vright = ⊞(Vtriv, right_virtualspace(A), Vtriv) V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright - H[i] = eltype(H)(V, A, B, C, D) + H[i] = JordanMPOTensor(V, A, B, C, D) end return InfiniteMPOHamiltonian(H) end diff --git a/src/operators/ortho.jl b/src/operators/ortho.jl index d21985797..bbcaa164d 100644 --- a/src/operators/ortho.jl +++ b/src/operators/ortho.jl @@ -11,7 +11,7 @@ function left_canonicalize!( d = sqrt(dim(P)) # orthogonalize second column against first - WI = removeunit(W[1, 1, 1, 1], 1) + WI = removeunit(W[1, 1, 1, 1], 1; storage_type = TensorKit.storagetype(H)) @plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r] # TODO: the following is currently broken due to a TensorKit bug # @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r] @@ -36,7 +36,7 @@ function left_canonicalize!( end H[i] = JordanMPOTensor(codomain(W) ← physicalspace(W) ⊗ V, Q1, W.B, Q2, W.D) else - tmp = transpose(cat(insertleftunit(C′, 1), W.A; dims = 1), ((3, 1, 2), (4,))) + tmp = transpose(cat(insertleftunit(C′, 1; storage_type = TensorKit.storagetype(H)), W.A; dims = 1), ((3, 1, 2), (4,))) Q, R = left_orth!(tmp; alg) if dim(R) == 0 # fully truncated @@ -65,7 +65,7 @@ function left_canonicalize!( if size(W′, 4) > 1 @plansor C′[l p; p' r] := t[l; r'] * W′.A[r' p; p' r] - C′ = add!(removeunit(C′, 1), W′.C) + C′ = add!(removeunit(C′, 1; storage_type = TensorKit.storagetype(H)), W′.C) else C′ = W′.C # empty end @@ -77,7 +77,7 @@ function left_canonicalize!( end @plansor D′[l p; p'] := t[l; r] * W′.B[r p; p'] - D′ = add!(removeunit(D′, 1), W′.D) + D′ = add!(removeunit(D′, 1; storage_type = TensorKit.storagetype(H)), W′.D) H[i + 1] = JordanMPOTensor( right_virtualspace(H[i]) ⊗ physicalspace(W′) ← domain(W′), @@ -99,7 +99,7 @@ function right_canonicalize!( d = sqrt(dim(P)) # orthogonalize second row against last - WI = removeunit(W[end, 1, 1, end], 4) + WI = removeunit(W[end, 1, 1, end], 4; storage_type = TensorKit.storagetype(H)) @plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p'] # TODO: the following is currently broken due to a TensorKit bug # @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r] @@ -124,7 +124,8 @@ function right_canonicalize!( end H[i] = JordanMPOTensor(V ⊗ P ← domain(W), Q1, Q2, W.C, W.D) else - tmp = transpose(cat(insertleftunit(B′, 4), W.A; dims = 4), ((1,), (3, 4, 2))) + B′′ = insertleftunit(B′, 4; storage_type = TensorKit.storagetype(H)) + tmp = transpose(cat(B′′, W.A; dims = 4), ((1,), (3, 4, 2))) R, Q = right_orth!(tmp; alg) if dim(R) == 0 V = _rightunit ⊞ _rightunit @@ -152,7 +153,7 @@ function right_canonicalize!( if size(W′, 1) > 1 @plansor B′[l p; p' r] := W′.A[l p; p' r'] * t[r'; r] - B′ = add!(removeunit(B′, 4), W′.B) + B′ = add!(removeunit(B′, 4; storage_type = TensorKit.storagetype(H)), W′.B) else B′ = W′.B end @@ -164,7 +165,7 @@ function right_canonicalize!( end @plansor D′[p; p' r] := W′.C[p; p' r'] * t[r'; r] - D′ = add!(removeunit(D′, 3), W′.D) + D′ = add!(removeunit(D′, 3; storage_type = TensorKit.storagetype(H)), W′.D) H[i - 1] = JordanMPOTensor(codomain(W′) ← physicalspace(W′) ⊗ V, A′, B′, C′, D′) return H end From b79ceef808b8043a226fbfccf99846aa57858c0a Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 31 Mar 2026 08:41:52 -0400 Subject: [PATCH 4/7] Use branch sources --- Project.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Project.toml b/Project.toml index 51e51efdf..a5320f170 100644 --- a/Project.toml +++ b/Project.toml @@ -76,3 +76,8 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["Aqua", "Adapt", "CUDA", "cuTENSOR", "Pkg", "Test", "TestExtras", "Plots", "Combinatorics", "ParallelTestRunner", "TensorKitTensors"] + +[sources] +TensorKit = {url = "https://github.com/QuantumKitHub/TensorKit.jl", rev = "ksh/cuda_tweaks"} +BlockTensorKit = {url = "https://github.com/lkdvos/BlockTensorKit.jl", rev = "ksh/gpu"} + From 13ac278012cba8b6bdbdc6d535c50826e16f132e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 31 Mar 2026 08:43:35 -0400 Subject: [PATCH 5/7] Fix URL --- Project.toml | 2 +- test/cuda/operators.jl | 81 +++++++++++++++++++++++------------------- test/cuda/states.jl | 3 +- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/Project.toml b/Project.toml index a5320f170..41d87c327 100644 --- a/Project.toml +++ b/Project.toml @@ -79,5 +79,5 @@ test = ["Aqua", "Adapt", "CUDA", "cuTENSOR", "Pkg", "Test", "TestExtras", "Plots [sources] TensorKit = {url = "https://github.com/QuantumKitHub/TensorKit.jl", rev = "ksh/cuda_tweaks"} -BlockTensorKit = {url = "https://github.com/lkdvos/BlockTensorKit.jl", rev = "ksh/gpu"} +BlockTensorKit = {url = "https://github.com/kshyatt/BlockTensorKit.jl", rev = "ksh/gpu"} diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl index f09cb2b4b..38d61eb53 100644 --- a/test/cuda/operators.jl +++ b/test/cuda/operators.jl @@ -10,10 +10,11 @@ using Adapt, CUDA, cuTENSOR, CUDA.CUBLAS # TODO revisit this once https://github.com/QuantumKitHub/MatrixAlgebraKit.jl/issues/176 # is resolved MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration() +MPSKit.Defaults.alg_lq() = LQViaTransposedQR(CUSOLVER_HouseholderQR()) pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1)) vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1)) - +#= @testset "CuFiniteMPO" for V in (ℂ^2, U1Space(0 => 1, 1 => 1)) # start from random operators L = 4 @@ -90,7 +91,7 @@ vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁) end - +=# @testset "Finite CuMPOHamiltonian" begin L = 3 T = ComplexF64 @@ -98,10 +99,12 @@ end lattice = fill(V, L) O₁ = randn(T, V, V) O₁ += O₁' - E = id(storagetype(O₁), domain(O₁)) + E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁)) O₂ = randn(T, V^2 ← V^2) O₂ += O₂' + dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁) + dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂) H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L)) H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1))) H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂)) @@ -119,17 +122,17 @@ end end # check if constructor works by converting back to tensormap - #= H1_tm = convert(TensorMap, H1) - operators = vcat(fill(E, L - 1), O₁) + H1_tm = convert(TensorMap, H1) + operators = vcat(fill(E, L - 1), dO₁) @test H1_tm ≈ mapreduce(+, 1:L) do i return reduce(⊗, circshift(operators, i)) end - operators = vcat(fill(E, L - 2), O₂) + operators = vcat(fill(E, L - 2), dO₂) @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i return reduce(⊗, circshift(operators, i)) end @test convert(TensorMap, H3) ≈ - O₁ ⊗ E ⊗ E + E ⊗ O₂ + permute(O₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) =# # needs a fix in BlockTensorKit + dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) # check if adding terms on the same site works single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2)) @@ -149,20 +152,27 @@ end adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) + adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁)) @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} - #@test 0.8 * H1 + 0.2 * H1 ≈ H1 atol = 1.0e-6 # broken due to JordanMPOTensorMap - #=@test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 + #= + H1′a = 0.8 * H1 + @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory} + H1′b = 0.2 * H1 + @test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory} + H1′ = H1′a + H1′b + @test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory} + @test H1′ ≈ H1 atol = 1.0e-6 + @test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0))) @test H1_trunc ≈ H1 - @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1))=# # needs fix in BlockTensorKit + @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1)) # test dot and application state = rand(T, prod(lattice)) mps = adapt(CuArray, FiniteMPS(state)) - #=@test convert(TensorMap, H1 * mps) ≈ H1_tm * state # needs fix in BlockTensorKit + @test convert(TensorMap, H1 * mps) ≈ H1_tm * state @test H1 * state ≈ H1_tm * state - @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps)=# - + @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps) + =# # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys grid = square = fill(V, 3, 3) @@ -180,17 +190,17 @@ end @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} @test H4 ≈ - adapt(CuArray, FiniteMPOHamiltonian(grid, local_operators)) + - adapt(CuArray, FiniteMPOHamiltonian(grid, vertical_operators)) + - adapt(CuArray, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} - #H4′= H4 / 3 + 2H4 / 3 - #@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} - #H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) - #@test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} # more problems with arithmetic operations... - #psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) - #@test expectation_value(psi, H4) ≈ expectation_value(psi, H5) + #H4′= H4 / 3 + 2H4 / 3 # trouble here with cat_t + #=@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} + H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) + @test expectation_value(psi, H4) ≈ expectation_value(psi, H5)=# end end @@ -214,31 +224,30 @@ end @test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory} @test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory} - #= e1 = expectation_value(ψ1, H1) e2 = expectation_value(ψ1, H2) - =# # broken due to BraidingTensor - # H1 = 2 * H1 - CuArray([1]) # scalar indexing - # @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} - # @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor + H1 = 2 * H1 + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + H1 -= [1] + @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 H1 = H1 + H2 @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} - - # @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 # broken due to BraidingTensor + @test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10 H1 = repeat(H1, 2) @test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory} - #=e1 = expectation_value(ψ2, H1) + e1 = expectation_value(ψ2, H1) e3 = expectation_value(ψ2, H3) - @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10=# # broken due to BraidingTensor + @test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10 - #H4 = H1 + H3 # broken due to BraidingTensor - #@test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory} - #h4 = H4 * H4 - #@test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory} - #@test real(expectation_value(ψ2, H4)) >= 0 # broken due to BraidingTensor + H4 = H1 + H3 + @test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory} + h4 = H4 * H4 + @test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory} + @test real(expectation_value(ψ2, H4)) >= 0 end diff --git a/test/cuda/states.jl b/test/cuda/states.jl index 7c036ea09..a37712209 100644 --- a/test/cuda/states.jl +++ b/test/cuda/states.jl @@ -4,7 +4,7 @@ using MPSKit: GeometryStyle, InfiniteChainStyle, TransferMatrix using TensorKit using TensorKit: ℙ using Adapt, CUDA, cuTENSOR - +#= @testset "CuMPS ($(sectortype(D)), $elt)" for (D, d, elt) in [(ℙ^10, ℙ^2, ComplexF64), (Rep[U₁](1 => 3), Rep[U₁](0 => 1), ComplexF64)] tol = Float64(eps(real(elt)) * 100) @@ -90,3 +90,4 @@ end @test TransferMatrix(ψ.AR[i, j], ψ.AR[i, j]) * r_RR(ψ, i, j) ≈ r_RR(ψ, i, j + 1) end end +=# From e3b0186990c07c131d249da48a14503712518f39 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 31 Mar 2026 13:42:15 -0400 Subject: [PATCH 6/7] Last fix? --- src/operators/abstractmpo.jl | 6 +++++- test/cuda/operators.jl | 16 ++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index 01c49500c..4e021eae6 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -152,7 +152,11 @@ Compute the mpo tensor that arises from multiplying MPOs. """ function fuse_mul_mpo(O1, O2) TT = promote_type(scalartype(O1), scalartype(O2)) - T = TensorKit.similarstoragetype(storagetype(O1), TT) + T = if O1 isa BraidingTensor + TensorKit.similarstoragetype(storagetype(O2), TT) + else + TensorKit.similarstoragetype(storagetype(O1), TT) + end F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1)) F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1)) return _fuse_mpo_mpo(O1, O2, F_left, F_right) diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl index 38d61eb53..facd99682 100644 --- a/test/cuda/operators.jl +++ b/test/cuda/operators.jl @@ -14,7 +14,7 @@ MPSKit.Defaults.alg_lq() = LQViaTransposedQR(CUSOLVER_HouseholderQR()) pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1)) vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1)) -#= + @testset "CuFiniteMPO" for V in (ℂ^2, U1Space(0 => 1, 1 => 1)) # start from random operators L = 4 @@ -91,7 +91,7 @@ vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁) end -=# + @testset "Finite CuMPOHamiltonian" begin L = 3 T = ComplexF64 @@ -152,7 +152,7 @@ end adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) + adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁)) @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} - #= + H1′a = 0.8 * H1 @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory} H1′b = 0.2 * H1 @@ -172,7 +172,7 @@ end @test convert(TensorMap, H1 * mps) ≈ H1_tm * state @test H1 * state ≈ H1_tm * state @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps) - =# + # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys grid = square = fill(V, 3, 3) @@ -195,15 +195,15 @@ end adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} - #H4′= H4 / 3 + 2H4 / 3 # trouble here with cat_t - #=@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} + H4′= H4 / 3 + 2H4 / 3 + @test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) - @test expectation_value(psi, H4) ≈ expectation_value(psi, H5)=# + @test expectation_value(psi, H4) ≈ expectation_value(psi, H5) end end - +=# @testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces) # generate a 1-2-3 body interaction operators = ntuple(3) do i From 9b224da4fc67b7bd47b6dbf5a01116b65d896752 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Thu, 2 Apr 2026 06:35:39 -0400 Subject: [PATCH 7/7] Working around BraidingTensor --- src/operators/abstractmpo.jl | 8 +- src/operators/mpo.jl | 2 +- src/operators/mpohamiltonian.jl | 10 +- src/operators/ortho.jl | 30 +++-- test/cuda/operators.jl | 222 +++++++++++++++++--------------- 5 files changed, 151 insertions(+), 121 deletions(-) diff --git a/src/operators/abstractmpo.jl b/src/operators/abstractmpo.jl index 4e021eae6..85fb3e5a2 100644 --- a/src/operators/abstractmpo.jl +++ b/src/operators/abstractmpo.jl @@ -153,10 +153,10 @@ Compute the mpo tensor that arises from multiplying MPOs. function fuse_mul_mpo(O1, O2) TT = promote_type(scalartype(O1), scalartype(O2)) T = if O1 isa BraidingTensor - TensorKit.similarstoragetype(storagetype(O2), TT) - else - TensorKit.similarstoragetype(storagetype(O1), TT) - end + TensorKit.similarstoragetype(storagetype(O2), TT) + else + TensorKit.similarstoragetype(storagetype(O1), TT) + end F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1)) F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1)) return _fuse_mpo_mpo(O1, O2, F_left, F_right) diff --git a/src/operators/mpo.jl b/src/operators/mpo.jl index 8630a0d56..7bf0d16ca 100644 --- a/src/operators/mpo.jl +++ b/src/operators/mpo.jl @@ -62,7 +62,7 @@ eachsite(mpo::InfiniteMPO) = PeriodicArray(eachindex(mpo)) function Base.similar(mpo::MPO{<:MPOTensor}, ::Type{O}, L::Int) where {O <: MPOTensor} return MPO(similar(parent(mpo), O, L)) end -function Base.similar(mpo::MPO, ::Type{T}) where {T <: Number} +function Base.similar(mpo::MPO, ::Type{T}) where {T <: Union{Number, DenseVector}} return MPO(similar.(parent(mpo), T)) end diff --git a/src/operators/mpohamiltonian.jl b/src/operators/mpohamiltonian.jl index bb544d0ae..05fc36f1a 100644 --- a/src/operators/mpohamiltonian.jl +++ b/src/operators/mpohamiltonian.jl @@ -660,6 +660,8 @@ function isemptylevel(H::InfiniteMPOHamiltonian, i::Int) end function Base.convert(::Type{TensorMap}, H::FiniteMPOHamiltonian) + # TODO this FORCES the output tensor to have Vector storage + # in an uncontrollable way, should be fixed L = removeunit(H[1], 1) R = removeunit(H[end], 4) M = Tuple(H[2:(end - 1)]) @@ -853,7 +855,13 @@ end function Base.:*(H::FiniteMPOHamiltonian{<:MPOTensor}, x::AbstractTensorMap) @assert length(H) > 1 @assert numout(x) == length(H) - L = removeunit(H[1], 1) + if H[1] isa BraidingTensor + L′ = removeunit(H[1], 1) + L = similar(L′, TensorKit.storagetype(H)) + copy!(L, L′) + else + L = removeunit(H[1], 1) + end M = Tuple(H[2:(end - 1)]) R = removeunit(H[end], 4) return TensorMap(_apply_finitempo(x, L, M, R)) diff --git a/src/operators/ortho.jl b/src/operators/ortho.jl index bbcaa164d..88a37025d 100644 --- a/src/operators/ortho.jl +++ b/src/operators/ortho.jl @@ -11,7 +11,14 @@ function left_canonicalize!( d = sqrt(dim(P)) # orthogonalize second column against first - WI = removeunit(W[1, 1, 1, 1], 1; storage_type = TensorKit.storagetype(H)) + Wi = W[1, 1, 1, 1] + if Wi isa BraidingTensor + Wi′ = removeunit(Wi, 1) + WI = similar(Wi′, TensorKit.storagetype(H)) + copy!(WI, Wi′) + else + WI = removeunit(Wi, 1) + end @plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r] # TODO: the following is currently broken due to a TensorKit bug # @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r] @@ -36,7 +43,7 @@ function left_canonicalize!( end H[i] = JordanMPOTensor(codomain(W) ← physicalspace(W) ⊗ V, Q1, W.B, Q2, W.D) else - tmp = transpose(cat(insertleftunit(C′, 1; storage_type = TensorKit.storagetype(H)), W.A; dims = 1), ((3, 1, 2), (4,))) + tmp = transpose(cat(insertleftunit(C′, 1), W.A; dims = 1), ((3, 1, 2), (4,))) Q, R = left_orth!(tmp; alg) if dim(R) == 0 # fully truncated @@ -65,7 +72,7 @@ function left_canonicalize!( if size(W′, 4) > 1 @plansor C′[l p; p' r] := t[l; r'] * W′.A[r' p; p' r] - C′ = add!(removeunit(C′, 1; storage_type = TensorKit.storagetype(H)), W′.C) + C′ = add!(removeunit(C′, 1), W′.C) else C′ = W′.C # empty end @@ -77,7 +84,7 @@ function left_canonicalize!( end @plansor D′[l p; p'] := t[l; r] * W′.B[r p; p'] - D′ = add!(removeunit(D′, 1; storage_type = TensorKit.storagetype(H)), W′.D) + D′ = add!(removeunit(D′, 1), W′.D) H[i + 1] = JordanMPOTensor( right_virtualspace(H[i]) ⊗ physicalspace(W′) ← domain(W′), @@ -99,7 +106,14 @@ function right_canonicalize!( d = sqrt(dim(P)) # orthogonalize second row against last - WI = removeunit(W[end, 1, 1, end], 4; storage_type = TensorKit.storagetype(H)) + Wi = W[end, 1, 1, end] + if Wi isa BraidingTensor + Wi′ = removeunit(Wi, 4) + WI = similar(Wi′, TensorKit.storagetype(H)) + copy!(WI, Wi′) + else + WI = removeunit(Wi, 4) + end @plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p'] # TODO: the following is currently broken due to a TensorKit bug # @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r] @@ -124,7 +138,7 @@ function right_canonicalize!( end H[i] = JordanMPOTensor(V ⊗ P ← domain(W), Q1, Q2, W.C, W.D) else - B′′ = insertleftunit(B′, 4; storage_type = TensorKit.storagetype(H)) + B′′ = insertleftunit(B′, 4) tmp = transpose(cat(B′′, W.A; dims = 4), ((1,), (3, 4, 2))) R, Q = right_orth!(tmp; alg) if dim(R) == 0 @@ -153,7 +167,7 @@ function right_canonicalize!( if size(W′, 1) > 1 @plansor B′[l p; p' r] := W′.A[l p; p' r'] * t[r'; r] - B′ = add!(removeunit(B′, 4; storage_type = TensorKit.storagetype(H)), W′.B) + B′ = add!(removeunit(B′, 4), W′.B) else B′ = W′.B end @@ -165,7 +179,7 @@ function right_canonicalize!( end @plansor D′[p; p' r] := W′.C[p; p' r'] * t[r'; r] - D′ = add!(removeunit(D′, 3; storage_type = TensorKit.storagetype(H)), W′.D) + D′ = add!(removeunit(D′, 3), W′.D) H[i - 1] = JordanMPOTensor(codomain(W′) ← physicalspace(W′) ⊗ V, A′, B′, C′, D′) return H end diff --git a/test/cuda/operators.jl b/test/cuda/operators.jl index facd99682..1913866ef 100644 --- a/test/cuda/operators.jl +++ b/test/cuda/operators.jl @@ -92,118 +92,126 @@ vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 @test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁) end -@testset "Finite CuMPOHamiltonian" begin +@testset "Finite CuMPOHamiltonian T ($T), V ($(spacetype(V)))" for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1)) L = 3 - T = ComplexF64 - for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1)) - lattice = fill(V, L) - O₁ = randn(T, V, V) - O₁ += O₁' - E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁)) - O₂ = randn(T, V^2 ← V^2) - O₂ += O₂' - - dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁) - dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂) - H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L)) - H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1))) - H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂)) - @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} - @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory} - @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory} - - @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T - if !(T <: Complex) - for H in (H1, H2, H3) - Hc = @constinferred complex(H) - @test scalartype(Hc) == complex(T) - @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory} - end + lattice = fill(V, L) + O₁ = randn(T, V, V) + O₁ += O₁' + E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁)) + O₂ = randn(T, V^2 ← V^2) + O₂ += O₂' + + dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁) + dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂) + H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L)) + H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1))) + H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂)) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory} + @test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory} + + @test scalartype(H1) == scalartype(H2) == scalartype(H3) == T + if !(T <: Complex) + for H in (H1, H2, H3) + Hc = @constinferred complex(H) + @test scalartype(Hc) == complex(T) + @test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory} end + end - # check if constructor works by converting back to tensormap - H1_tm = convert(TensorMap, H1) - operators = vcat(fill(E, L - 1), dO₁) - @test H1_tm ≈ mapreduce(+, 1:L) do i - return reduce(⊗, circshift(operators, i)) - end - operators = vcat(fill(E, L - 2), dO₂) - @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i - return reduce(⊗, circshift(operators, i)) - end - @test convert(TensorMap, H3) ≈ - dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) - - # check if adding terms on the same site works - single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2)) - H4 = adapt(CuArray, FiniteMPOHamiltonian(lattice, single_terms)) - @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} - @test H4 ≈ H1 atol = 1.0e-6 - double_terms = Iterators.flatten( - Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2) - ) - H5 = adapt(CuArray, FiniteMPOHamiltonian(lattice, double_terms)) - @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} - @test H5 ≈ H2 atol = 1.0e-6 - - # test linear algebra - @test H1 ≈ - adapt(CuArray, FiniteMPOHamiltonian(lattice, 1 => O₁)) + - adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) + - adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁)) - @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} - - H1′a = 0.8 * H1 - @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory} - H1′b = 0.2 * H1 - @test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory} - H1′ = H1′a + H1′b - @test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory} - @test H1′ ≈ H1 atol = 1.0e-6 - @test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 - H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0))) - @test H1_trunc ≈ H1 - @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1)) - - # test dot and application - state = rand(T, prod(lattice)) - mps = adapt(CuArray, FiniteMPS(state)) - - @test convert(TensorMap, H1 * mps) ≈ H1_tm * state - @test H1 * state ≈ H1_tm * state - @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps) - - # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys - grid = square = fill(V, 3, 3) - - local_operators = Dict((I,) => O₁ for I in eachindex(grid)) - I_vertical = CartesianIndex(1, 0) - vertical_operators = Dict( - (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1) - ) - I_horizontal = CartesianIndex(0, 1) - horizontal_operators = Dict( - (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1) - ) - operators = merge(local_operators, vertical_operators, horizontal_operators) - H4 = adapt(CuArray, FiniteMPOHamiltonian(grid, operators)) - @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} - - @test H4 ≈ - adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) + - adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) + - adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 - @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} - - H4′= H4 / 3 + 2H4 / 3 - @test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} - H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) - @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} - psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) - @test expectation_value(psi, H4) ≈ expectation_value(psi, H5) + # check if constructor works by converting back to tensormap + H1_tm = convert(TensorMap, H1) + @test TensorKit.storagetype(H1_tm) == CuVector{T, CUDA.DeviceMemory} + operators = vcat(fill(E, L - 1), dO₁) + @test H1_tm ≈ mapreduce(+, 1:L) do i + return reduce(⊗, circshift(operators, i)) + end + operators = vcat(fill(E, L - 2), dO₂) + @test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i + return reduce(⊗, circshift(operators, i)) end + @test convert(TensorMap, H3) ≈ + dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5))) + + # check if adding terms on the same site works + single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2)) + H4 = adapt(CuArray, FiniteMPOHamiltonian(lattice, single_terms)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + @test H4 ≈ H1 atol = 1.0e-6 + double_terms = Iterators.flatten( + Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2) + ) + H5 = adapt(CuArray, FiniteMPOHamiltonian(lattice, double_terms)) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + @test H5 ≈ H2 atol = 1.0e-6 + + # test linear algebra + @test H1 ≈ + adapt(CuArray, FiniteMPOHamiltonian(lattice, 1 => O₁)) + + adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) + + adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁)) + @test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + + H1′a = 0.8 * H1 + @test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory} + H1′b = 0.2 * H1 + @test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory} + H1′ = H1′a + H1′b + @test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory} + @test H1′ ≈ H1 atol = 1.0e-6 + @test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6 + H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0))) + @test H1_trunc ≈ H1 + @test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1)) + + # test dot and application + state = adapt(CuVector{T, CUDA.DeviceMemory}, rand(T, prod(lattice))) + @test TensorKit.storagetype(state) == CuVector{T, CUDA.DeviceMemory} + mps = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPS(state)) + @test TensorKit.storagetype(mps) == CuVector{T, CUDA.DeviceMemory} + + # TODO fix me! + #=@test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory} + H1mps = H1 * mps + @test TensorKit.storagetype(H1mps) == CuVector{T, CUDA.DeviceMemory} + H1tmstate = H1_tm * state + @test TensorKit.storagetype(H1tmstate) == CuVector{T, CUDA.DeviceMemory} + H1mpstm = convert(TensorMap, H1mps) + @test TensorKit.storagetype(H1mpstm) == CuVector{T, CUDA.DeviceMemory} + @test H1mpstm ≈ H1tmstate + @test H1 * state ≈ H1tmstate + @test dot(mps, H2, mps) ≈ dot(mps, H2 * mps)=# + + # test constructor from dictionary with mixed linear and Cartesian lattice indices as keys + grid = square = fill(V, 3, 3) + + local_operators = Dict((I,) => O₁ for I in eachindex(grid)) + I_vertical = CartesianIndex(1, 0) + vertical_operators = Dict( + (I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1) + ) + I_horizontal = CartesianIndex(0, 1) + horizontal_operators = Dict( + (I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1) + ) + operators = merge(local_operators, vertical_operators, horizontal_operators) + H4 = adapt(CuArray, FiniteMPOHamiltonian(grid, operators)) + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + @test H4 ≈ + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) + + adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4 + @test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory} + + H4′ = H4 / 3 + 2H4 / 3 + @test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory} + #=H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12))) + @test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory} + psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V))) + @test expectation_value(psi, H4) ≈ expectation_value(psi, H5)=# # TODO end -=# + @testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces) # generate a 1-2-3 body interaction operators = ntuple(3) do i