@@ -7,6 +7,17 @@ function CuTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂,
77 return CuTensorMap {T, S, N₁, N₂} (CuArray {T} (t. data), space (t))
88end
99
10+ #= function TensorKit.TensorMap{T, S₁, N₁, N₂, A}(
11+ ::UndefInitializer, space::TensorMapSpace{S₂, N₁, N₂}
12+ ) where {T, S₁, S₂ <: TensorKit.ElementarySpace, N₁, N₂, A <: CuVector{T}}
13+ d = TensorKit.fusionblockstructure(space).totaldim
14+ data = A(undef, d)
15+ if !isbitstype(T)
16+ zerovector!(data)
17+ end
18+ return TensorKit.TensorMap{T, S₂, A}(data, space)
19+ end=#
20+
1021# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
1122function TensorKit. project_symmetric_and_check (:: Type{T} , :: Type{A} , data:: AbstractArray , V:: TensorMapSpace ; tol = sqrt (eps (real (float (eltype (data)))))) where {T, A <: CuVector{T} }
1223 h_t = TensorKit. TensorMapWithStorage {T, Vector{T}} (undef, V)
@@ -17,6 +28,10 @@ function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::Abstr
1728 return TensorKit. TensorMapWithStorage {T, A} (A (h_t. data), V)
1829end
1930
31+ function TensorKit. blocktype (:: Type{<:CuTensorMap{T, S}} ) where {T, S}
32+ return SubArray{T, 1 , CuVector{T, CUDA. DeviceMemory}, Tuple{UnitRange{Int}}, true }
33+ end
34+
2035for (fname, felt) in ((:zeros , :zero ), (:ones , :one ))
2136 @eval begin
2237 function CUDA. $fname (
@@ -102,9 +117,21 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
102117end
103118
104119function Base. convert (
105- TT:: Type{CuTensorMap{T, S, N₁, N₂}} ,
106- t:: AbstractTensorMap{<:Any, S, N₁, N₂}
107- ) where {T, S, N₁, N₂}
120+ TT:: Type{TensorMap{T, S, N₁, N₂, A}} ,
121+ t:: TensorMap{T, S, N₁, N₂, AA}
122+ ) where {T, S, N₁, N₂, A <: CuArray{T} , AA}
123+ if typeof (t) === TT
124+ return t
125+ else
126+ tnew = TT (undef, space (t))
127+ return copy! (tnew, t)
128+ end
129+ end
130+
131+ function Base. convert (
132+ TT:: Type{TensorMap{T, S, N₁, N₂, A}} ,
133+ t:: AdjointTensorMap
134+ ) where {T, S, N₁, N₂, A <: CuArray{T} }
108135 if typeof (t) === TT
109136 return t
110137 else
140167
141168TensorKit. promote_storage_rule (:: Type{CuArray{T, N}} , :: Type{<:CuArray{T, N}} ) where {T, N} =
142169 CuArray{T, N, CUDA. default_memory}
170+ TensorKit. promote_storage_rule (:: Type{<:CuArray{T, N}} , :: Type{CuArray{T, N}} ) where {T, N} =
171+ CuArray{T, N, CUDA. default_memory}
143172
144173
145174# CuTensorMap exponentation:
@@ -168,3 +197,21 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
168197 return tf
169198 end
170199end
200+
201+ function TensorKit. _add_general_kernel_nonthreaded! (
202+ tdst:: CuTensorMap , tsrc:: CuTensorMap , p, transformer:: TensorKit.GenericTreeTransformer , α, β, backend...
203+ )
204+ # preallocate buffers
205+ buffers = TensorKit. allocate_buffers (tdst, tsrc, transformer)
206+
207+ for subtransformer in transformer. data
208+ # Special case without intermediate buffers whenever there is only a single block
209+ if length (subtransformer[1 ]) == 1
210+ TensorKit. _add_transform_single! (tdst, tsrc, p, subtransformer, α, β, backend... )
211+ else
212+ cu_subtransformer = tuple (CUDA. adapt (CuArray, subtransformer[1 ]), subtransformer[2 : end ]. .. )
213+ TensorKit. _add_transform_multi! (tdst, tsrc, p, cu_subtransformer, buffers, α, β, backend... )
214+ end
215+ end
216+ return nothing
217+ end
0 commit comments