diff --git a/Project.toml b/Project.toml index 607558519..a1f20ec60 100644 --- a/Project.toml +++ b/Project.toml @@ -55,7 +55,7 @@ Printf = "1" Random = "1" SafeTestsets = "0.1" ScopedValues = "1.3.0" -Strided = "2" +Strided = "2.3.4" TensorKitSectors = "0.3.6" TensorOperations = "5.1" Test = "1" @@ -89,3 +89,6 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["ArgParse", "Adapt", "Aqua", "AllocCheck", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "JET", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote", "Mooncake"] + +[sources] +Strided = {url = "https://github.com/QuantumKitHub/Strided.jl", rev = "ksh/copyto"} diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl index f5efb98bb..4ee4865f1 100644 --- a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -10,7 +10,7 @@ using TensorKit.Factorizations using TensorKit.Strided using TensorKit.Factorizations: AbstractAlgorithm using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check -import TensorKit: randisometry, rand, randn +import TensorKit: randisometry, rand, randn, _copyto!, _add_general_kernel_nonthreaded!, blocktype using TensorKit: MatrixAlgebraKit diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index f065c2ec1..a276cd1a8 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -6,6 +6,9 @@ const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) end +function TensorMap{T, S, N₁, N₂, DA}(t::TensorMap{T, S, N₁, N₂, HA}) where {T, S, N₁, N₂, DA <: CuArray{T}, HA <: Array{T}} + return CuTensorMap{T, S, N₁, N₂}(CuArray{T}(t.data), space(t)) +end # project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: CuVector{T}} @@ -101,18 +104,6 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S} return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)] end -function Base.convert( - TT::Type{CuTensorMap{T, S, N₁, N₂}}, - t::AbstractTensorMap{<:Any, S, N₁, N₂} - ) where {T, S, N₁, N₂} - if typeof(t) === TT - return t - else - tnew = TT(undef, space(t)) - return copy!(tnew, t) - end -end - function LinearAlgebra.isposdef(t::CuTensorMap) domain(t) == codomain(t) || throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) @@ -138,10 +129,9 @@ function Base.promote_rule( return CuTensorMap{T, S, N₁, N₂} end -TensorKit.promote_storage_rule(::Type{CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} = +TensorKit.promote_storage_rule(::Type{<:CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} = CuArray{T, N, CUDA.default_memory} - # CuTensorMap exponentation: function TensorKit.exp!(t::CuTensorMap) domain(t) == codomain(t) || @@ -168,3 +158,21 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) return tf end end + +function TensorKit.add_kernel_nonthreaded!( + tdst::CuTensorMap, tsrc::CuTensorMap, p, transformer::TensorKit.GenericTreeTransformer, α, β, backend... + ) + # preallocate buffers + buffers = TensorKit.allocate_buffers(tdst, tsrc, transformer) + + for subtransformer in transformer.data + # Special case without intermediate buffers whenever there is only a single block + if length(subtransformer[1]) == 1 + TensorKit._add_transform_single!(tdst, tsrc, p, subtransformer, α, β, backend...) + else + cu_subtransformer = tuple(CUDA.adapt(CuArray, subtransformer[1]), subtransformer[2:end]...) + TensorKit._add_transform_multi!(tdst, tsrc, p, cu_subtransformer, buffers, α, β, backend...) + end + end + return nothing +end diff --git a/src/tensors/abstracttensor.jl b/src/tensors/abstracttensor.jl index d7d520b43..8f9d0d8a1 100644 --- a/src/tensors/abstracttensor.jl +++ b/src/tensors/abstracttensor.jl @@ -53,9 +53,11 @@ storagetype(t) = storagetype(typeof(t)) function storagetype(::Type{T}) where {T <: AbstractTensorMap} if T isa Union # attempt to be slightly more specific by promoting unions - Ma = storagetype(T.a) - Mb = storagetype(T.b) - return promote_storagetype(Ma, Mb) + return promote_storagetype(T.a, T.b) + elseif eltype(T) isa Union + # attempt to be slightly more specific by promoting unions + TU = eltype(T) + return promote_storagetype(TU.a, TU.b) else # fallback definition by using scalartype return similarstoragetype(scalartype(T)) @@ -103,11 +105,19 @@ similarstoragetype(X::Type, ::Type{T}) where {T <: Number} = # implement on tensors similarstoragetype(::Type{TT}) where {TT <: AbstractTensorMap} = similarstoragetype(storagetype(TT)) -similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} = - similarstoragetype(storagetype(TT), T) +function similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} + return similarstoragetype(storagetype(TT), T) +end +function similarstoragetype(::Type{<:AbstractTensorMap{T, S, N₁, N₂}}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂} + return similarstoragetype(TA, T) +end +function similarstoragetype(t::AbstractTensorMap{T, S, N₁, N₂}, ::Type{TA}) where {T <: Number, TA <: DenseVector, S, N₁, N₂} + return similarstoragetype(typeof(t), TA) +end # implement on arrays similarstoragetype(::Type{A}) where {A <: DenseVector{<:Number}} = A +similarstoragetype(::Type{A}, ::Type{A}) where {A <: DenseVector{<:Number}} = A Base.@assume_effects :foldable similarstoragetype(::Type{A}) where {A <: AbstractArray{<:Number}} = Core.Compiler.return_type(similar, Tuple{A, Int}) Base.@assume_effects :foldable similarstoragetype(::Type{A}, ::Type{T}) where {A <: AbstractArray, T <: Number} = diff --git a/src/tensors/adjoint.jl b/src/tensors/adjoint.jl index dfc1a4471..382f309b5 100644 --- a/src/tensors/adjoint.jl +++ b/src/tensors/adjoint.jl @@ -22,6 +22,8 @@ Base.adjoint(t::AbstractTensorMap) = AdjointTensorMap(t) space(t::AdjointTensorMap) = adjoint(space(parent(t))) dim(t::AdjointTensorMap) = dim(parent(t)) storagetype(::Type{AdjointTensorMap{T, S, N₁, N₂, TT}}) where {T, S, N₁, N₂, TT} = storagetype(TT) +similarstoragetype(::AdjointTensorMap{T, S, N₁, N₂, TT}, ::Type{T′}) where {T, S, N₁, N₂, TT, T′ <: Number} = similarstoragetype(TT, T′) +similarstoragetype(::AdjointTensorMap{T, S, N₁, N₂, TT}, ::Type{TA}) where {T, S, N₁, N₂, TT, TA <: DenseVector} = similarstoragetype(TT, TA) # Blocks and subblocks #---------------------- diff --git a/src/tensors/braidingtensor.jl b/src/tensors/braidingtensor.jl index 3ff8a9abf..8d45c7dc6 100644 --- a/src/tensors/braidingtensor.jl +++ b/src/tensors/braidingtensor.jl @@ -145,12 +145,10 @@ function block(b::BraidingTensor, s::Sector) # TODO: probably always square? m = blockdim(codomain(b), s) n = blockdim(domain(b), s) - data = Matrix{eltype(b)}(undef, (m, n)) + data = zeros(eltype(b), (m, n)) length(data) == 0 && return data # s ∉ blocksectors(b) - data = fill!(data, zero(eltype(b))) - V1, V2 = codomain(b) if sectortype(b) === Trivial d1, d2 = dim(V1), dim(V2) @@ -182,12 +180,15 @@ end has_shared_permute(t::BraidingTensor, ::Index2Tuple) = false function add_transform!( tdst::AbstractTensorMap, - tsrc::BraidingTensor, (p₁, p₂)::Index2Tuple, + tsrc::BraidingTensor{T, S}, + (p₁, p₂)::Index2Tuple, fusiontreetransform, α::Number, β::Number, backend::AbstractBackend... - ) + ) where {T, S} + tsrc_map = similar(tdst, storagetype(tdst), space(tsrc)) + copy!(tsrc_map, tsrc) return add_transform!( - tdst, TensorMap(tsrc), (p₁, p₂), fusiontreetransform, α, β, + tdst, tsrc_map, (p₁, p₂), fusiontreetransform, α, β, backend... ) end @@ -287,11 +288,15 @@ function planarcontract!( backend, allocator ) # special case only defined for contracting 2 indices - length(oindB) == length(cindB) == 2 || + if !(length(oindB) == length(cindB) == 2) + # horrible!!!!! + tB′ = TensorMap(B) + tB = TensorMapWithStorage{eltype(B), similarstoragetype(A, eltype(B)), spacetype(tB′), numout(tB′), numin(tB′)}(tB′) return planarcontract!( - C, A, (oindA, cindA), TensorMap(B), (cindB, oindB), (p1, p2), - α, β, backend, allocator - ) + C, A, (oindA, cindA), tB, (cindB, oindB), (p1, p2), + α, β, backend, allocator + ) + end codA, domA = codomainind(A), domainind(A) codB, domB = codomainind(B), domainind(B) diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index b2ac4134b..e73ad2787 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -280,7 +280,7 @@ end # ---------------- function TO.tensoradd_type(TC, A::DiagonalTensorMap, ::Index2Tuple{1, 1}, ::Bool) M = similarstoragetype(A, TC) - return DiagonalTensorMap{TC, spacetype(A), M} + return DiagonalTensorMap{scalartype(M), spacetype(A), M} end function TO.tensorcontract_type( diff --git a/src/tensors/indexmanipulations.jl b/src/tensors/indexmanipulations.jl index 3108abb17..e45789b44 100644 --- a/src/tensors/indexmanipulations.jl +++ b/src/tensors/indexmanipulations.jl @@ -17,6 +17,8 @@ for (operation, manipulation) in ( $promote_op(::Type{T}, ::Type{I}) where {T <: Number, I <: Sector} = sectorscalartype(I) <: Integer ? T : sectorscalartype(I) <: Real ? float(T) : complex(T) + $promote_op(::Type{TA}, ::Type{I}) where {TA <: DenseVector, I <: Sector} = + similarstoragetype(TA, $promote_op(eltype(TA), I)) # TODO: currently the manipulations all use sectorscalartype, change to: # $manipulation_scalartype(I) <: Integer ? T : # $manipulation_scalartype(I) <: Real ? float(T) : complex(T) @@ -342,11 +344,11 @@ See also [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) """ function insertleftunit( t::AbstractTensorMap, ::Val{i} = Val(numind(t) + 1); - copy::Bool = false, conj::Bool = false, dual::Bool = false + copy::Bool = false, conj::Bool = false, dual::Bool = false, ) where {i} W = insertleftunit(space(t), Val(i); conj, dual) if t isa TensorMap - return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W) + return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W) else tdst = similar(t, W) for (c, b) in blocks(t) @@ -371,11 +373,11 @@ See also [`insertleftunit`](@ref insertleftunit(::AbstractTensorMap, ::Val{i}) w """ function insertrightunit( t::AbstractTensorMap, ::Val{i} = Val(numind(t)); - copy::Bool = false, conj::Bool = false, dual::Bool = false + copy::Bool = false, conj::Bool = false, dual::Bool = false, ) where {i} W = insertrightunit(space(t), Val(i); conj, dual) if t isa TensorMap - return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W) + return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W) else tdst = similar(t, W) for (c, b) in blocks(t) @@ -400,7 +402,7 @@ and [`insertrightunit`](@ref insertrightunit(::AbstractTensorMap, ::Val{i}) wher function removeunit(t::AbstractTensorMap, ::Val{i}; copy::Bool = false) where {i} W = removeunit(space(t), Val(i)) if t isa TensorMap - return TensorMap{scalartype(t)}(copy ? Base.copy(t.data) : t.data, W) + return TensorMapWithStorage{scalartype(t), storagetype(t)}(copy ? Base.copy(t.data) : t.data, W) else tdst = similar(t, W) for (c, b) in blocks(t) diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl index 7bdd90f9d..90884f4d8 100644 --- a/test/cuda/tensors.jl +++ b/test/cuda/tensors.jl @@ -236,8 +236,8 @@ for V in spacelist α = rand(T) @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) - @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) - @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) + @test adapt(Vector{T}, (α * t)) ≈ α * adapt(Vector{T}, t) + @test adapt(Vector{T}, (t + t)) ≈ 2 * adapt(Vector{T}, t) end end @timedtestset "Real and imaginary parts" begin @@ -290,28 +290,29 @@ for V in spacelist @timedtestset "Permutations: test via inner product invariance" begin W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 t = CUDA.rand(ComplexF64, W) + ht = adapt(Vector{ComplexF64}, t) t′ = CUDA.randn!(similar(t)) + ht′ = adapt(Vector{ComplexF64}, t′) + dot_htt′ = dot(ht′, ht) + dot_tt′ = dot(t′, t) + @test dot_tt′ ≈ dot_htt′ + norm_t = norm(t) for k in 0:5 for p in permutations(1:5) p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) - CUDA.@allowscalar begin - t2 = @constinferred permute(t, (p1, p2)) - t2 = permute(t, (p1, p2)) - @test norm(t2) ≈ norm(t) - t2′ = permute(t′, (p1, p2)) - @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) - end - end - - CUDA.@allowscalar begin - t3 = @constinferred repartition(t, $k) - t3 = repartition(t, k) - @test norm(t3) ≈ norm(t) - t3′ = @constinferred repartition!(similar(t3), t′) - @test norm(t3′) ≈ norm(t′) - @test dot(t′, t) ≈ dot(t3′, t3) + t2 = @constinferred permute(t, (p1, p2)) + t2′ = permute(t′, (p1, p2)) + @test norm(t2) ≈ norm_t + @test dot(t2′, t2) ≈ dot_tt′ + @test dot(transpose(t2′), transpose(t2)) ≈ dot_tt′ end + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3) ≈ norm(t) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) end end if BraidingStyle(I) isa SymmetricBraiding @@ -322,34 +323,35 @@ for V in spacelist for p in permutations(1:5) p1 = ntuple(n -> p[n], k) p2 = ntuple(n -> p[k + n], 5 - k) - dt2 = CUDA.@allowscalar permute(t, (p1, p2)) - ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) - @test ht2 == TensorKit.to_cpu(dt2) + ht2 = permute(adapt(Vector{ComplexF64}, t), (p1, p2)) + dt2 = permute(t, (p1, p2)) + @test ht2 ≈ adapt(Vector{ComplexF64}, dt2) + ht3 = transpose(adapt(Vector{ComplexF64}, dt2)) + dt3 = transpose(dt2) + hht3 = adapt(Vector{ComplexF64}, dt3) + @test ht3 ≈ hht3 end - - dt3 = CUDA.@allowscalar repartition(t, k) - ht3 = repartition(TensorKit.to_cpu(t), k) - @test ht3 == TensorKit.to_cpu(dt3) + dt4 = repartition(t, k) + ht4 = repartition(adapt(Vector{ComplexF64}, t), k) + @test ht4 == adapt(Vector{ComplexF64}, dt4) end end end @timedtestset "Full trace: test self-consistency" begin t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') - CUDA.@allowscalar begin - t2 = permute(t, ((1, 2), (4, 3))) - s = @constinferred tr(t2) - @test conj(s) ≈ tr(t2') - if !isdual(V1) - t2 = twist!(t2, 1) - end - if isdual(V2) - t2 = twist!(t2, 2) - end - ss = tr(t2) - @tensor s2 = t[a, b, b, a] - @tensor t3[a, b] := t[a, c, c, b] - @tensor s3 = t3[a, a] + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] @test ss ≈ s2 @test ss ≈ s3 end @@ -363,24 +365,20 @@ for V in spacelist if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Trace: test via conversion" begin t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') - CUDA.@allowscalar begin - @tensor t2[a, b] := t[c, d, b, d, c, a] - @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] - end + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] @test t3 ≈ ad(t2) end end @timedtestset "Trace and contraction" begin t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') - CUDA.@allowscalar begin - t3 = t1 ⊗ t2 - @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] - @tensor tb[a, b] := t3[x, y, a, y, b, x] - end + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] @test ta ≈ tb end - #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) @timedtestset "Tensor contraction: test via CPU" begin dA1 = CUDA.randn(ComplexF64, V1' * V2', V3') dA2 = CUDA.randn(ComplexF64, V3 * V4, V5) @@ -395,45 +393,39 @@ for V in spacelist TensorKit.to_cpu(dH)[s1, s2, t1, t2] @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 end - end=# # doesn't yet work because of AdjointTensor + end @timedtestset "Index flipping: test flipping inverse" begin t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) for i in 1:4 - CUDA.@allowscalar begin - @test t ≈ flip(flip(t, i), i; inv = true) - @test t ≈ flip(flip(t, i; inv = true), i) - end + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) end end - #=@timedtestset "Index flipping: test via explicit flip" begin + @timedtestset "Index flipping: test via explicit flip" begin t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) - F1 = unitary(flip(V1), V1) + F1 = adapt(CuArray{ComplexF64}, unitary(flip(V1), V1)) - CUDA.@allowscalar begin - @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] - @test flip(t, 1) ≈ tf - @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] - @test twist!(flip(t, 2), 2) ≈ tf - @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] - @test flip(t, 3) ≈ tf - @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] - @test twist!(flip(t, 4), 4) ≈ tf - end + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf end @timedtestset "Index flipping: test via contraction" begin t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) - CUDA.@allowscalar begin - @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] - @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] - @test ta ≈ tb - @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] - @test ta ≈ tb - @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] - @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] - @test flip(ta, (1, 2)) ≈ tb - end - end=# # TODO + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end @timedtestset "Multiplication of isometries: test properties" begin W2 = V4 ⊗ V5 W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) @@ -567,9 +559,7 @@ for V in spacelist for T in (Float32, ComplexF64) t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) - CUDA.@allowscalar begin - t = @constinferred (t1 ⊗ t2) - end + t = @constinferred (t1 ⊗ t2) @test norm(t) ≈ norm(t1) * norm(t2) end end @@ -582,11 +572,9 @@ for V in spacelist d2 = dim(codomain(t2)) d3 = dim(domain(t1)) d4 = dim(domain(t2)) - CUDA.@allowscalar begin - t = @constinferred (t1 ⊗ t2) - At = ad(t) - @test ad(t) ≈ ad(t1) ⊗ ad(t2) - end + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) end end end @@ -594,11 +582,9 @@ for V in spacelist for T in (Float32, ComplexF64) t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) - CUDA.@allowscalar begin - t = @constinferred (t1 ⊗ t2) - @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] - # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] - end + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] end end end @@ -614,16 +600,12 @@ end for T in (Float32, ComplexF64) t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) t2 = CUDA.rand(T, W2, W1 ⊗ W1') - CUDA.@allowscalar begin - t = @constinferred (t1 ⊠ t2) - end + t = @constinferred (t1 ⊠ t2) d1 = dim(codomain(t1)) d2 = dim(codomain(t2)) d3 = dim(domain(t1)) d4 = dim(domain(t2)) - CUDA.@allowscalar begin - @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) - end + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) end end end