@@ -500,6 +500,65 @@ TaskExecStatus CudaUploadFrame::Run() {
500500 return TASK_EXEC_SUCCESS;
501501}
502502
503+ namespace VPF {
504+ struct UploadBuffer_Impl {
505+ CUstream cuStream;
506+ CUcontext cuContext;
507+ CudaBuffer *pBuffer = nullptr ;
508+
509+ UploadBuffer_Impl () = delete ;
510+ UploadBuffer_Impl (const UploadBuffer_Impl &other) = delete ;
511+ UploadBuffer_Impl &operator =(const UploadBuffer_Impl &other) = delete ;
512+
513+ UploadBuffer_Impl (CUstream stream, CUcontext context,
514+ uint32_t elem_size, uint32_t num_elems)
515+ : cuStream(stream), cuContext(context) {
516+ pBuffer = CudaBuffer::Make (elem_size, num_elems, context);
517+ }
518+
519+ ~UploadBuffer_Impl () { delete pBuffer; }
520+ };
521+ } // namespace VPF
522+
523+ UploadBuffer *UploadBuffer::Make (CUstream cuStream, CUcontext cuContext,
524+ uint32_t elem_size, uint32_t num_elems) {
525+ return new UploadBuffer (cuStream, cuContext, elem_size, num_elems);
526+ }
527+
528+ UploadBuffer::UploadBuffer (CUstream cuStream, CUcontext cuContext,
529+ uint32_t elem_size, uint32_t num_elems)
530+ :
531+
532+ Task(" UploadBuffer" , UploadBuffer::numInputs,
533+ UploadBuffer::numOutputs, cuda_stream_sync, (void *)cuStream) {
534+ pImpl = new UploadBuffer_Impl (cuStream, cuContext, elem_size, num_elems);
535+ }
536+
537+ UploadBuffer::~UploadBuffer () { delete pImpl; }
538+
539+ TaskExecStatus UploadBuffer::Run () {
540+ NvtxMark tick (__FUNCTION__);
541+ if (!GetInput ()) {
542+ return TASK_EXEC_FAIL;
543+ }
544+
545+ ClearOutputs ();
546+
547+ auto stream = pImpl->cuStream ;
548+ auto context = pImpl->cuContext ;
549+ auto pBuffer = pImpl->pBuffer ;
550+ auto pSrcHost = ((Buffer *)GetInput ())->GetDataAs <void >();
551+
552+ CudaCtxPush lock (context);
553+ if (CUDA_SUCCESS != cuMemcpyHtoDAsync (pBuffer->GpuMem (), (const void *)pSrcHost,
554+ pBuffer->GetRawMemSize (), stream)) {
555+ return TASK_EXEC_FAIL;
556+ }
557+
558+ SetOutput (pBuffer, 0 );
559+ return TASK_EXEC_SUCCESS;
560+ }
561+
503562namespace VPF {
504563struct CudaDownloadSurface_Impl {
505564 CUstream cuStream;
@@ -538,6 +597,25 @@ struct CudaDownloadSurface_Impl {
538597
539598 ~CudaDownloadSurface_Impl () { delete pHostFrame; }
540599};
600+
601+ struct DownloadCudaBuffer_Impl {
602+ CUstream cuStream;
603+ CUcontext cuContext;
604+ Buffer *pHostBuffer = nullptr ;
605+
606+ DownloadCudaBuffer_Impl () = delete ;
607+ DownloadCudaBuffer_Impl (const DownloadCudaBuffer_Impl &other) = delete ;
608+ DownloadCudaBuffer_Impl &
609+ operator =(const DownloadCudaBuffer_Impl &other) = delete ;
610+
611+ DownloadCudaBuffer_Impl (CUstream stream, CUcontext context, uint32_t elem_size,
612+ uint32_t num_elems)
613+ : cuStream(stream), cuContext(context) {
614+ pHostBuffer = Buffer::MakeOwnMem (elem_size * num_elems, context);
615+ }
616+
617+ ~DownloadCudaBuffer_Impl () { delete pHostBuffer; }
618+ };
541619} // namespace VPF
542620
543621CudaDownloadSurface *CudaDownloadSurface::Make (CUstream cuStream,
@@ -601,6 +679,45 @@ TaskExecStatus CudaDownloadSurface::Run() {
601679 return TASK_EXEC_SUCCESS;
602680}
603681
682+ DownloadCudaBuffer *DownloadCudaBuffer::Make (CUstream cuStream, CUcontext cuContext,
683+ uint32_t elem_size, uint32_t num_elems) {
684+ return new DownloadCudaBuffer (cuStream, cuContext, elem_size, num_elems);
685+ }
686+
687+ DownloadCudaBuffer::DownloadCudaBuffer (CUstream cuStream, CUcontext cuContext,
688+ uint32_t elem_size, uint32_t num_elems) :
689+ Task(" DownloadCudaBuffer" , DownloadCudaBuffer::numInputs,
690+ DownloadCudaBuffer::numOutputs, cuda_stream_sync,
691+ (void *)cuStream) {
692+ pImpl = new DownloadCudaBuffer_Impl (cuStream, cuContext, elem_size, num_elems);
693+ }
694+
695+ DownloadCudaBuffer::~DownloadCudaBuffer () { delete pImpl; }
696+
697+ TaskExecStatus DownloadCudaBuffer::Run () {
698+ NvtxMark tick (__FUNCTION__);
699+
700+ if (!GetInput ()) {
701+ return TASK_EXEC_FAIL;
702+ }
703+
704+ ClearOutputs ();
705+
706+ auto stream = pImpl->cuStream ;
707+ auto context = pImpl->cuContext ;
708+ auto pCudaBuffer = (CudaBuffer *)GetInput ();
709+ auto pDstHost = ((Buffer *)pImpl->pHostBuffer )->GetDataAs <void >();
710+
711+ CudaCtxPush lock (context);
712+ if (CUDA_SUCCESS != cuMemcpyDtoHAsync (pDstHost, pCudaBuffer->GpuMem (),
713+ pCudaBuffer->GetRawMemSize (), stream)) {
714+ return TASK_EXEC_FAIL;
715+ }
716+
717+ SetOutput (pImpl->pHostBuffer , 0 );
718+ return TASK_EXEC_SUCCESS;
719+ }
720+
604721namespace VPF {
605722struct DemuxFrame_Impl {
606723 size_t videoBytes = 0U ;
0 commit comments