Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,8 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["Aqua", "Adapt", "CUDA", "cuTENSOR", "Pkg", "Test", "TestExtras", "Plots", "Combinatorics", "ParallelTestRunner", "TensorKitTensors"]

[sources]
TensorKit = {url = "https://github.com/QuantumKitHub/TensorKit.jl", rev = "ksh/cuda_tweaks"}
BlockTensorKit = {url = "https://github.com/kshyatt/BlockTensorKit.jl", rev = "ksh/gpu"}

2 changes: 2 additions & 0 deletions ext/MPSKitAdaptExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,7 @@ end
MPSKit.JordanMPOTensor(space(W), adapt(to, W.A), adapt(to, W.B), adapt(to, W.C), adapt(to, W.D))
@inline Adapt.adapt_structure(to, mpo::MPOHamiltonian) =
MPOHamiltonian(map(x -> adapt(to, x), mpo.W))
@inline Adapt.adapt_structure(to, ml::MPSKit.Multiline) = MPSKit.Multiline(map(adapt(to), ml.data))
@inline Adapt.adapt_structure(to, pa::MPSKit.PeriodicArray) = MPSKit.PeriodicArray(map(adapt(to), pa.data))

end
8 changes: 6 additions & 2 deletions src/operators/abstractmpo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Base.:\(α::Number, mpo::AbstractMPO) = scale(mpo, inv(α))

function VectorInterface.scale(mpo::AbstractMPO, α::Number)
T = VectorInterface.promote_scale(scalartype(mpo), scalartype(α))
dst = similar(mpo, T)
dst = similar(mpo, TensorKit.similarstoragetype(storagetype(mpo), T))
return scale!(dst, mpo, α)
end

Expand Down Expand Up @@ -152,7 +152,11 @@ Compute the mpo tensor that arises from multiplying MPOs.
"""
function fuse_mul_mpo(O1, O2)
TT = promote_type(scalartype(O1), scalartype(O2))
T = TensorKit.similarstoragetype(storagetype(O1), TT)
T = if O1 isa BraidingTensor
TensorKit.similarstoragetype(storagetype(O2), TT)
else
TensorKit.similarstoragetype(storagetype(O1), TT)
end
F_left = fuser(T, left_virtualspace(O2), left_virtualspace(O1))
F_right = fuser(T, right_virtualspace(O2), right_virtualspace(O1))
return _fuse_mpo_mpo(O1, O2, F_left, F_right)
Expand Down
11 changes: 8 additions & 3 deletions src/operators/jordanmpotensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ function jordanmpotensortype(::Type{O}) where {O <: AbstractTensorMap}
end
Base.similar(W::JordanMPOTensor, ::Type{TorA}) where {TorA} =
jordanmpotensortype(spacetype(W), TorA)(undef, space(W))
Base.similar(W::JordanMPOTensor, V::VectorSpace) =
jordanmpotensortype(spacetype(V), storagetype(W))(undef, V)
Base.similar(W::JordanMPOTensor, V::VectorSpace, ::Type{TorA}) where {TorA} =
jordanmpotensortype(spacetype(V), TorA)(undef, V)

# Properties
# ----------
Expand Down Expand Up @@ -205,7 +209,8 @@ end
for f in (:real, :complex)
@eval function Base.$f(W::JordanMPOTensor)
E = $f(scalartype(W))
W′ = JordanMPOTensor{E}(undef, space(W))
TE = TensorKit.similarstoragetype(TensorKit.storagetype(W), E)
W′ = similar(W, TE)
for (I, v) in nonzero_pairs(W.A)
W′.A[I] = $f(v)
end
Expand Down Expand Up @@ -242,7 +247,7 @@ end
elseif 1 < i < size(W, 1) && 1 < j < size(W, 4)
return W.A[i - 1, 1, 1, j - 1]
else
return zeros(scalartype(W), eachspace(W)[i, 1, 1, j])
return zeros(storagetype(W), eachspace(W)[i, 1, 1, j])
end
end

Expand Down Expand Up @@ -357,7 +362,7 @@ function add_physical_charge(O::JordanMPOTensor, charge::Sector)
Vdst = left_virtualspace(O) ⊗
fuse(physicalspace(O), auxspace) ←
fuse(physicalspace(O), auxspace) ⊗ right_virtualspace(O)
Odst = JordanMPOTensor{scalartype(O)}(undef, Vdst)
Odst = similar(O, Vdst)
for (I, v) in nonzero_pairs(O)
Odst[I] = add_physical_charge(v, charge)
end
Expand Down
2 changes: 1 addition & 1 deletion src/operators/mpo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ eachsite(mpo::InfiniteMPO) = PeriodicArray(eachindex(mpo))
function Base.similar(mpo::MPO{<:MPOTensor}, ::Type{O}, L::Int) where {O <: MPOTensor}
return MPO(similar(parent(mpo), O, L))
end
function Base.similar(mpo::MPO, ::Type{T}) where {T <: Number}
function Base.similar(mpo::MPO, ::Type{T}) where {T <: Union{Number, DenseVector}}
return MPO(similar.(parent(mpo), T))
end

Expand Down
24 changes: 15 additions & 9 deletions src/operators/mpohamiltonian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -503,10 +503,8 @@ function InfiniteMPOHamiltonian(lattice′::AbstractArray{<:VectorSpace}, local_
end

# partial sort by interaction range
local_mpos = sort!(
map(Base.Fix1(instantiate_operator, lattice), collect(local_operators));
by = x -> length(x[1])
)
unsorted_mpos = map(Base.Fix1(instantiate_operator, lattice), [local_operators...])
local_mpos = sort!(unsorted_mpos; by = x -> length(x[1]))

for (sites, local_mpo) in local_mpos
local key_R # trick to define key_R before the first iteration
Expand Down Expand Up @@ -662,6 +660,8 @@ function isemptylevel(H::InfiniteMPOHamiltonian, i::Int)
end

function Base.convert(::Type{TensorMap}, H::FiniteMPOHamiltonian)
# TODO this FORCES the output tensor to have Vector storage
# in an uncontrollable way, should be fixed
L = removeunit(H[1], 1)
R = removeunit(H[end], 4)
M = Tuple(H[2:(end - 1)])
Expand Down Expand Up @@ -701,8 +701,8 @@ end
function Base.similar(H::MPOHamiltonian, ::Type{O}, L::Int) where {O <: MPOTensor}
return MPOHamiltonian(similar(parent(H), O, L))
end
function Base.similar(H::MPOHamiltonian, ::Type{T}) where {T <: Number}
return MPOHamiltonian(similar.(parent(H), T))
function Base.similar(H::MPOHamiltonian, ::Type{TorA}) where {TorA <: Union{Number, DenseVector}}
return MPOHamiltonian(similar.(parent(H), TorA))
end

# Linear Algebra
Expand All @@ -727,7 +727,7 @@ function Base.:+(
⊞(Vtriv, right_virtualspace(A), Vtriv)
V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright

H[i] = eltype(H)(V, A, B, C, D)
H[i] = JordanMPOTensor(V, A, B, C, D)
end
return FiniteMPOHamiltonian(H)
end
Expand All @@ -749,7 +749,7 @@ function Base.:+(
Vright = ⊞(Vtriv, right_virtualspace(A), Vtriv)
V = Vleft ⊗ physicalspace(A) ← physicalspace(A) ⊗ Vright

H[i] = eltype(H)(V, A, B, C, D)
H[i] = JordanMPOTensor(V, A, B, C, D)
end
return InfiniteMPOHamiltonian(H)
end
Expand Down Expand Up @@ -855,7 +855,13 @@ end
function Base.:*(H::FiniteMPOHamiltonian{<:MPOTensor}, x::AbstractTensorMap)
@assert length(H) > 1
@assert numout(x) == length(H)
L = removeunit(H[1], 1)
if H[1] isa BraidingTensor
L′ = removeunit(H[1], 1)
L = similar(L′, TensorKit.storagetype(H))
copy!(L, L′)
else
L = removeunit(H[1], 1)
end
M = Tuple(H[2:(end - 1)])
R = removeunit(H[end], 4)
return TensorMap(_apply_finitempo(x, L, M, R))
Expand Down
21 changes: 18 additions & 3 deletions src/operators/ortho.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@ function left_canonicalize!(
d = sqrt(dim(P))

# orthogonalize second column against first
WI = removeunit(W[1, 1, 1, 1], 1)
Wi = W[1, 1, 1, 1]
if Wi isa BraidingTensor
Wi′ = removeunit(Wi, 1)
WI = similar(Wi′, TensorKit.storagetype(H))
copy!(WI, Wi′)
else
WI = removeunit(Wi, 1)
end
@plansor t[l; r] := conj(WI[p; p' l]) * W.C[p; p' r]
# TODO: the following is currently broken due to a TensorKit bug
# @plansor C′[p; p' r] := W.C[p; p' r] - WI[p; p' l] * t[l; r]
Expand Down Expand Up @@ -99,7 +106,14 @@ function right_canonicalize!(
d = sqrt(dim(P))

# orthogonalize second row against last
WI = removeunit(W[end, 1, 1, end], 4)
Wi = W[end, 1, 1, end]
if Wi isa BraidingTensor
Wi′ = removeunit(Wi, 4)
WI = similar(Wi′, TensorKit.storagetype(H))
copy!(WI, Wi′)
else
WI = removeunit(Wi, 4)
end
@plansor t[l; r] := conj(WI[r p; p']) * W.B[l p; p']
# TODO: the following is currently broken due to a TensorKit bug
# @plansor B′[l p; p'] := W.B[l p; p'] - WI[r p; p'] * t[l; r]
Expand All @@ -124,7 +138,8 @@ function right_canonicalize!(
end
H[i] = JordanMPOTensor(V ⊗ P ← domain(W), Q1, Q2, W.C, W.D)
else
tmp = transpose(cat(insertleftunit(B′, 4), W.A; dims = 4), ((1,), (3, 4, 2)))
B′′ = insertleftunit(B′, 4)
tmp = transpose(cat(B′′, W.A; dims = 4), ((1,), (3, 4, 2)))
R, Q = right_orth!(tmp; alg)
if dim(R) == 0
V = _rightunit ⊞ _rightunit
Expand Down
174 changes: 173 additions & 1 deletion test/cuda/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ using MPSKit: GeometryStyle, FiniteChainStyle, InfiniteChainStyle, OperatorStyle
using TensorKit
using MatrixAlgebraKit
using TensorKit: ℙ, tensormaptype, TensorMapWithStorage
using Adapt, CUDA, cuTENSOR
using Adapt, CUDA, cuTENSOR, CUDA.CUBLAS

# TODO revisit this once https://github.com/QuantumKitHub/MatrixAlgebraKit.jl/issues/176
# is resolved
MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration()
MPSKit.Defaults.alg_lq() = LQViaTransposedQR(CUSOLVER_HouseholderQR())

pspaces = (ℙ^4, Rep[U₁](0 => 2), Rep[SU₂](1 => 1))
vspaces = (ℙ^10, Rep[U₁]((0 => 20)), Rep[SU₂](1 // 2 => 10, 3 // 2 => 5, 5 // 2 => 1))

@testset "CuFiniteMPO" for V in (ℂ^2, U1Space(0 => 1, 1 => 1))
# start from random operators
Expand Down Expand Up @@ -87,3 +91,171 @@ MPSKit.Defaults.alg_svd() = CUSOLVER_QRIteration()

@test dot(mpomps₁, mpomps₁) ≈ dot(mpo₁, mpo₁)
end

@testset "Finite CuMPOHamiltonian T ($T), V ($(spacetype(V)))" for T in (Float64, ComplexF64), V in (ℂ^2, U1Space(-1 => 1, 0 => 1, 1 => 1))
L = 3
lattice = fill(V, L)
O₁ = randn(T, V, V)
O₁ += O₁'
E = id(CuVector{T, CUDA.DeviceMemory}, domain(O₁))
O₂ = randn(T, V^2 ← V^2)
O₂ += O₂'

dO₁ = adapt(CuVector{T, CUDA.DeviceMemory}, O₁)
dO₂ = adapt(CuVector{T, CUDA.DeviceMemory}, O₂)
H1 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, i => O₁ for i in 1:L))
H2 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, (i, i + 1) => O₂ for i in 1:(L - 1)))
H3 = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(lattice, 1 => O₁, (2, 3) => O₂, (1, 3) => O₂))
@test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
@test TensorKit.storagetype(H2) == CuVector{T, CUDA.DeviceMemory}
@test TensorKit.storagetype(H3) == CuVector{T, CUDA.DeviceMemory}

@test scalartype(H1) == scalartype(H2) == scalartype(H3) == T
if !(T <: Complex)
for H in (H1, H2, H3)
Hc = @constinferred complex(H)
@test scalartype(Hc) == complex(T)
@test TensorKit.storagetype(Hc) == CuVector{complex(T), CUDA.DeviceMemory}
end
end

# check if constructor works by converting back to tensormap
H1_tm = convert(TensorMap, H1)
@test TensorKit.storagetype(H1_tm) == CuVector{T, CUDA.DeviceMemory}
operators = vcat(fill(E, L - 1), dO₁)
@test H1_tm ≈ mapreduce(+, 1:L) do i
return reduce(⊗, circshift(operators, i))
end
operators = vcat(fill(E, L - 2), dO₂)
@test convert(TensorMap, H2) ≈ mapreduce(+, 1:(L - 1)) do i
return reduce(⊗, circshift(operators, i))
end
@test convert(TensorMap, H3) ≈
dO₁ ⊗ E ⊗ E + E ⊗ dO₂ + permute(dO₂ ⊗ E, ((1, 3, 2), (4, 6, 5)))

# check if adding terms on the same site works
single_terms = Iterators.flatten(Iterators.repeated((i => O₁ / 2 for i in 1:L), 2))
H4 = adapt(CuArray, FiniteMPOHamiltonian(lattice, single_terms))
@test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}
@test H4 ≈ H1 atol = 1.0e-6
double_terms = Iterators.flatten(
Iterators.repeated(((i, i + 1) => O₂ / 2 for i in 1:(L - 1)), 2)
)
H5 = adapt(CuArray, FiniteMPOHamiltonian(lattice, double_terms))
@test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory}
@test H5 ≈ H2 atol = 1.0e-6

# test linear algebra
@test H1 ≈
adapt(CuArray, FiniteMPOHamiltonian(lattice, 1 => O₁)) +
adapt(CuArray, FiniteMPOHamiltonian(lattice, 2 => O₁)) +
adapt(CuArray, FiniteMPOHamiltonian(lattice, 3 => O₁))
@test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}

H1′a = 0.8 * H1
@test TensorKit.storagetype(H1′a) == CuVector{T, CUDA.DeviceMemory}
H1′b = 0.2 * H1
@test TensorKit.storagetype(H1′b) == CuVector{T, CUDA.DeviceMemory}
H1′ = H1′a + H1′b
@test TensorKit.storagetype(H1′) == CuVector{T, CUDA.DeviceMemory}
@test H1′ ≈ H1 atol = 1.0e-6
@test convert(TensorMap, H1 + H2) ≈ convert(TensorMap, H1) + convert(TensorMap, H2) atol = 1.0e-6
H1_trunc = changebonds(H1, SvdCut(; trscheme = truncrank(0)))
@test H1_trunc ≈ H1
@test all(left_virtualspace(H1_trunc) .== left_virtualspace(H1))

# test dot and application
state = adapt(CuVector{T, CUDA.DeviceMemory}, rand(T, prod(lattice)))
@test TensorKit.storagetype(state) == CuVector{T, CUDA.DeviceMemory}
mps = adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPS(state))
@test TensorKit.storagetype(mps) == CuVector{T, CUDA.DeviceMemory}

# TODO fix me!
#=@test TensorKit.storagetype(H1) == CuVector{T, CUDA.DeviceMemory}
H1mps = H1 * mps
@test TensorKit.storagetype(H1mps) == CuVector{T, CUDA.DeviceMemory}
H1tmstate = H1_tm * state
@test TensorKit.storagetype(H1tmstate) == CuVector{T, CUDA.DeviceMemory}
H1mpstm = convert(TensorMap, H1mps)
@test TensorKit.storagetype(H1mpstm) == CuVector{T, CUDA.DeviceMemory}
@test H1mpstm ≈ H1tmstate
@test H1 * state ≈ H1tmstate
@test dot(mps, H2, mps) ≈ dot(mps, H2 * mps)=#

# test constructor from dictionary with mixed linear and Cartesian lattice indices as keys
grid = square = fill(V, 3, 3)

local_operators = Dict((I,) => O₁ for I in eachindex(grid))
I_vertical = CartesianIndex(1, 0)
vertical_operators = Dict(
(I, I + I_vertical) => O₂ for I in eachindex(IndexCartesian(), square) if I[1] < size(square, 1)
)
I_horizontal = CartesianIndex(0, 1)
horizontal_operators = Dict(
(I, I + I_horizontal) => O₂ for I in eachindex(IndexCartesian(), square) if I[2] < size(square, 1)
)
operators = merge(local_operators, vertical_operators, horizontal_operators)
H4 = adapt(CuArray, FiniteMPOHamiltonian(grid, operators))
@test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}

@test H4 ≈
adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, local_operators)) +
adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, vertical_operators)) +
adapt(CuVector{T, CUDA.DeviceMemory}, FiniteMPOHamiltonian(grid, horizontal_operators)) atol = 1.0e-4
@test TensorKit.storagetype(H4) == CuVector{T, CUDA.DeviceMemory}

H4′ = H4 / 3 + 2H4 / 3
@test TensorKit.storagetype(H4′) == CuVector{T, CUDA.DeviceMemory}
#=H5 = changebonds(H4′, SvdCut(; trscheme = trunctol(; atol = 1.0e-12)))
@test TensorKit.storagetype(H5) == CuVector{T, CUDA.DeviceMemory}
psi = adapt(CuArray, FiniteMPS(physicalspace(H5), V ⊕ rightunitspace(V)))
@test expectation_value(psi, H4) ≈ expectation_value(psi, H5)=# # TODO
end

@testset "CuInfiniteMPOHamiltonian $(sectortype(pspace))" for (pspace, Dspace) in zip(pspaces, vspaces)
# generate a 1-2-3 body interaction
operators = ntuple(3) do i
O = rand(ComplexF64, pspace^i, pspace^i)
return O += O'
end

H1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[1]))
H2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPOHamiltonian(operators[2]))
H3 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, repeat(InfiniteMPOHamiltonian(operators[3]), 2))

@test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test TensorKit.storagetype(H2) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test TensorKit.storagetype(H3) == CuVector{ComplexF64, CUDA.DeviceMemory}
# make a teststate to measure expectation values for
ψ1 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace], [Dspace]))
ψ2 = adapt(CuVector{ComplexF64, CUDA.DeviceMemory}, InfiniteMPS([pspace, pspace], [Dspace, Dspace]))
@test TensorKit.storagetype(ψ1) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test TensorKit.storagetype(ψ2) == CuVector{ComplexF64, CUDA.DeviceMemory}

e1 = expectation_value(ψ1, H1)
e2 = expectation_value(ψ1, H2)

H1 = 2 * H1
@test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
H1 -= [1]
@test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test e1 * 2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10

H1 = H1 + H2
@test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test e1 * 2 + e2 - 1 ≈ expectation_value(ψ1, H1) atol = 1.0e-10

H1 = repeat(H1, 2)
@test TensorKit.storagetype(H1) == CuVector{ComplexF64, CUDA.DeviceMemory}

e1 = expectation_value(ψ2, H1)
e3 = expectation_value(ψ2, H3)

@test e1 + e3 ≈ expectation_value(ψ2, H1 + H3) atol = 1.0e-10

H4 = H1 + H3
@test TensorKit.storagetype(H4) == CuVector{ComplexF64, CUDA.DeviceMemory}
h4 = H4 * H4
@test TensorKit.storagetype(h4) == CuVector{ComplexF64, CUDA.DeviceMemory}
@test real(expectation_value(ψ2, H4)) >= 0
end
Loading
Loading