From 505109ca4f885aafe979c5c3a7efb72e105d791b Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 21 May 2026 17:38:11 +0200 Subject: [PATCH 01/31] Speed up ribbon generation --- recon_surf/recon-surf.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index e0e480493..ff0b06fd6 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1089,8 +1089,13 @@ then # anatomical stats can run without ribbon, but will omit some surface based measures then # wmparc needs ribbon, probably other stuff (aparc to aseg etc). # So lets run it to have these measures below. - cmd="recon-all -subject $subject -cortribbon -umask $(umask) $hiresflag $fsthreads" + cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ + --label_right_white 41 --label_right_ribbon 42 --save_ribbon" + if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi + cmd="$cmd $subject" RunIt "$cmd" "$LF" + mkdir -p "$SUBJECTS_DIR/$subject/touch" + echo "$cmd" > "$SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" fi # skip in base From d3741d1a2767f2aa82d959b50b55199c193c0f05 Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 21 May 2026 17:38:27 +0200 Subject: [PATCH 02/31] Speed up spherical morphing --- recon_surf/recon-surf.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index ff0b06fd6..0e2aca5aa 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -872,9 +872,14 @@ for hemi in lh rh ; do if [[ "$long" == "false" ]] then - # SPHERE: Inflate to sphere with minimal metric distortion - cmd="recon-all -subject $subject -hemi $hemi -sphere $hiresflag -no-isrunning -umask $(umask) $fsthreads" + # SPHERE: Inflate to sphere with minimal metric distortion. + # This step is deterministic with the fixed seed and scales better when it can use the full requested + # thread count. Run it directly to avoid constraining it to per-hemisphere threads. + cmd="env OMP_NUM_THREADS=$threads ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=1 \ + mris_sphere -seed 1234 $sdir/${hemi}.inflated $sdir/${hemi}.sphere" RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_sphere -seed 1234 ../surf/${hemi}.inflated ../surf/${hemi}.sphere\" > $SUBJECTS_DIR/$subject/touch/${hemi}.sphmorph.touch" >> "$CMDF" # SURFREG (sphere.reg) # Surface registration for cross-subject correspondence (registration to fsaverage) From 7b2985cea845c3ba76763a79002938b386a71401 Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 21 May 2026 17:38:44 +0200 Subject: [PATCH 03/31] Overlap surface statistics --- recon_surf/recon-surf.sh | 101 +++++++++++++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 20 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 0e2aca5aa..695c440a0 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -394,6 +394,56 @@ if [[ "$DoneFile" != /dev/null ]] ; then rm -f "$DoneFile" ; fi LF="$SUBJECTS_DIR/$subject/scripts/recon-surf.log" if [[ "$LF" != /dev/null ]] && [[ "$edits" != "true" ]]; then rm -f "$LF" ; fi echo "Log file for recon-surf.sh" >> "$LF" + +ASYNC_PIDS=() +ASYNC_LOGS=() +ASYNC_CMDFS=() + +function start_async_cmdf() +{ + local cmdf=$1 + local log="$cmdf.log" + chmod u+x "$cmdf" + printf "\n %s\n\n" "$cmdf" > "$log" + "$cmdf" >> "$log" 2>&1 & + ASYNC_PIDS+=("$!") + ASYNC_LOGS+=("$log") + ASYNC_CMDFS+=("$cmdf") +} + +function wait_async_cmdfs() +{ + local unsuccessful=() + local status + local i + for i in "${!ASYNC_PIDS[@]}" + do + echo "Waiting for async PID ${ASYNC_PIDS[i]} of (${ASYNC_PIDS[*]}) to complete..." | tee -a "$LF" + wait "${ASYNC_PIDS[i]}" + status="$?" + tee -a "$LF" < "${ASYNC_LOGS[i]}" + rm -f "${ASYNC_LOGS[i]}" + if [[ "$status" != "0" ]] + then + unsuccessful+=("$i") + { + echo "ERROR: The async script ${ASYNC_CMDFS[i]} (PID: ${ASYNC_PIDS[i]}) did not complete successfully!" + echo "========================================" + echo "" + } | tee -a "$LF" + fi + done + + if [[ "${#unsuccessful}" != 0 ]] + then + echo "Async PIDs (${unsuccessful[*]}) of (${ASYNC_PIDS[*]}) have NOT completed successfully! All logs appended." | tee -a "$LF" + exit 1 + elif [[ "${#ASYNC_PIDS[@]}" != 0 ]] + then + echo "Async PIDs (${ASYNC_PIDS[*]}) completed successfully! Their logs have been appended." | tee -a "$LF" + fi +} + { # all output tee -a "$LF" date 2>&1 echo " " @@ -1160,18 +1210,41 @@ then # ============================= MAPPED SURF-STATS ========================================= + MAPPED_STATS_CMDF="$SUBJECTS_DIR/$subject/scripts/mapped_stats.cmdf" + rm -f "$MAPPED_STATS_CMDF" { - echo "" - echo "===================== Creating surfaces - mapped stats =========================" - echo "" - } | tee -a "$LF" + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - mapped stats =========================\"" + echo "echo \"\"" + } > "$MAPPED_STATS_CMDF" - # 2x18sec create stats from mapped aparc + # 2x18sec create stats from mapped aparc. This only depends on completed surfaces and + # ribbon outputs, so it can overlap with the later volume-labeling/statistics chain. for hemi in lh rh do cmd="mris_anatomical_stats -th3 -mgz -cortex $ldir/$hemi.cortex.label -f $statsdir/$hemi.aparc.DKTatlas.mapped.stats -b -a $ldir/$hemi.aparc.DKTatlas.mapped.annot -c $ldir/aparc.annot.mapped.ctab $subject $hemi white" - RunIt "$cmd" "$LF" + RunIt "$cmd" "$LF" "$MAPPED_STATS_CMDF" done + start_async_cmdf "$MAPPED_STATS_CMDF" + + if [[ "$fssurfreg" == "true" ]] + then + BALABELS_CMDF="$SUBJECTS_DIR/$subject/scripts/balabels.cmdf" + rm -f "$BALABELS_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - BA labels ============================\"" + echo "echo \"\"" + } > "$BALABELS_CMDF" + + # BA labels only depend on completed surface registration and surface geometry, so run + # them while the main process creates the mapped volumes and segmentation statistics. + cmd="$python ${binpath}/fs_balabels.py --sd $SUBJECTS_DIR --sid $subject" + RunIt "$cmd" "$LF" "$BALABELS_CMDF" + start_async_cmdf "$BALABELS_CMDF" + fi # ============================= FASTSURFER - surfcon hypo stats ========================================= @@ -1348,22 +1421,10 @@ then fi -# ============================= BALABELS ========================================= - - # balabels need sphere.reg - if [[ "$fssurfreg" == "true" ]] - then - # can be produced if surf registration exists - #cmd="recon-all -subject $subject -balabels $hiresflag $fsthreads" - #RunIt "$cmd" "$LF" - # here we run our version of balabels: mapping and annot creation is very fast - # time is used in mris_anatomical_stats (called 4 times, BA and BA-thresh for each hemi) - cmd="$python ${binpath}/fs_balabels.py --sd $SUBJECTS_DIR --sid $subject" - RunIt "$cmd" "$LF" - fi - fi # not base run +wait_async_cmdfs + # Collect info EndTime=$(date) From 8af67fd76a456b92594909c5808026dbd828e2c8 Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 14 May 2026 16:33:33 +0200 Subject: [PATCH 04/31] Speed up surface wrapper steps --- recon_surf/check_surface_volume_info.py | 35 +++++ recon_surf/recon-surf.sh | 182 +++++++++++++++++++++--- 2 files changed, 196 insertions(+), 21 deletions(-) create mode 100644 recon_surf/check_surface_volume_info.py diff --git a/recon_surf/check_surface_volume_info.py b/recon_surf/check_surface_volume_info.py new file mode 100644 index 000000000..229b985a6 --- /dev/null +++ b/recon_surf/check_surface_volume_info.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +"""Check that a FreeSurfer surface has valid volume metadata.""" + +from __future__ import annotations + +import argparse +import sys + +from nibabel.freesurfer.io import read_geometry + + +def options_parse() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("surface", help="FreeSurfer surface to check") + return parser.parse_args() + + +def main() -> int: + args = options_parse() + info = read_geometry(args.surface, read_metadata=True)[2] + head = list(info.get("head", [])) + valid = str(info.get("valid", "")).startswith("1") + if valid and head == [2, 0, 20]: + return 0 + print( + f"Invalid surface volume metadata in {args.surface}: " + f"valid={info.get('valid')!r}, head={head!r}", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 695c440a0..1dbd96150 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -442,6 +442,9 @@ function wait_async_cmdfs() then echo "Async PIDs (${ASYNC_PIDS[*]}) completed successfully! Their logs have been appended." | tee -a "$LF" fi + ASYNC_PIDS=() + ASYNC_LOGS=() + ASYNC_CMDFS=() } { # all output tee -a "$LF" @@ -762,13 +765,10 @@ for hemi in lh rh ; do #cmd="$python ${binpath}rewrite_mc_surface.py --input $outmesh --output $outmesh --filename_pretess $mdir/filled-pretess$hemivalue.mgz" #RunIt "$cmd" "$LF" "$CMDF" - # Check if the surfaceRAS was correctly set and exit otherwise (sanity check in case nibabel changes their default header behaviour) - { - cmd="mris_info $outmesh | tr -s ' ' | grep -q 'vertex locs : surfaceRAS'" - echo "echo \"$cmd\"" - echo "$timecmd $cmd" - } | tee -a "$CMDF" - echo "if [ \${PIPESTATUS[1]} -ne 0 ] ; then echo \"Incorrect header information detected in $outmesh: vertex locs is not set to surfaceRAS. Exiting... \" ; exit 1 ; fi" >> "$CMDF" + # Check that mri_mc wrote valid surface volume metadata. This replaces a + # full mris_info scan with a direct nibabel metadata read. + cmda=($python "${binpath}check_surface_volume_info.py" "$outmesh") + run_it_cmdf "$LF" "$CMDF" "${cmda[@]}" # Reduce to largest component (usually there should only be one) cmd="mris_extract_main_component $outmesh $outmesh" @@ -814,9 +814,12 @@ for hemi in lh rh ; do echo "echo \"\"" } | tee -a "$CMDF" - #surface inflation (54sec both hemis) (needed for qsphere and for topo-fixer) - cmd="recon-all -subject $subject -hemi $hemi -inflate1 -no-isrunning -umask $(umask) $hiresflag $fsthreads" + # surface inflation (needed for qsphere and for topo-fixer). Run the + # underlying command directly to avoid recon-all wrapper overhead. + cmd="mris_inflate -no-save-sulc $sdir/$hemi.smoothwm.nofix $sdir/$hemi.inflated.nofix" RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_inflate -no-save-sulc ../surf/${hemi}.smoothwm.nofix ../surf/${hemi}.inflated.nofix\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate1.touch" >> "$CMDF" if [ "$fsqsphere" == "true" ] then @@ -854,8 +857,15 @@ for hemi in lh rh ; do RunIt "$cmd" "$LF" "$CMDF" # create first WM surface white.preaparc from topo fixed orig surf - cmd="recon-all -subject $subject -hemi $hemi -autodetgwstats -white-preaparc -no-isrunning -umask $(umask) $hiresflag $fsthreads" + echo "pushd $mdir > /dev/null || exit 1" >> "$CMDF" + cmd="mris_autodet_gwstats --o ../surf/autodet.gw.stats.$hemi.dat --i brain.finalsurfs.mgz --wm wm.mgz --surf ../surf/$hemi.orig.premesh" + RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_autodet_gwstats --o ../surf/autodet.gw.stats.${hemi}.dat --i brain.finalsurfs.mgz --wm wm.mgz --surf ../surf/${hemi}.orig.premesh\" > $SUBJECTS_DIR/$subject/touch/${hemi}.autodet.gw.stats.touch" >> "$CMDF" + cmd="mris_place_surface --adgws-in ../surf/autodet.gw.stats.$hemi.dat --wm wm.mgz --threads $threads_hemi --invol brain.finalsurfs.mgz --$hemi --i ../surf/$hemi.orig --o ../surf/$hemi.white.preaparc --white --seg aseg.presurf.mgz --nsmooth 5" RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mris_place_surface --adgws-in ../surf/autodet.gw.stats.${hemi}.dat --wm wm.mgz --threads $threads_hemi --invol brain.finalsurfs.mgz --${hemi} --i ../surf/${hemi}.orig --o ../surf/${hemi}.white.preaparc --white --seg aseg.presurf.mgz --nsmooth 5\" > $SUBJECTS_DIR/$subject/touch/${hemi}.white.preaparc.touch" >> "$CMDF" + echo "popd > /dev/null || exit 1" >> "$CMDF" else # longitudinal stream # ... we skip topo fix @@ -882,9 +892,39 @@ for hemi in lh rh ; do # create cortex label (1min) # create nicer inflated surface from topo fixed (not needed, just later for visualization) - # identical for long processing - cmd="recon-all -subject $subject -hemi $hemi -cortex-label -smooth2 -inflate2 -curvHK -no-isrunning -umask $(umask) $hiresflag $fsthreads" + # identical for long processing. Run the underlying commands directly to avoid + # repeated recon-all wrapper/update-check overhead. + echo "pushd $mdir > /dev/null || exit 1" >> "$CMDF" + cmd="mri_label2label --label-cortex ../surf/$hemi.white.preaparc aseg.presurf.mgz 0 ../label/$hemi.cortex.label" + RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mri_label2label --label-cortex ../surf/${hemi}.white.preaparc aseg.presurf.mgz 0 ../label/${hemi}.cortex.label\" > $SUBJECTS_DIR/$subject/touch/${hemi}.cortex.touch" >> "$CMDF" + cmd="mri_label2label --label-cortex ../surf/$hemi.white.preaparc aseg.presurf.mgz 1 ../label/$hemi.cortex+hipamyg.label" + RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mri_label2label --label-cortex ../surf/${hemi}.white.preaparc aseg.presurf.mgz 1 ../label/${hemi}.cortex+hipamyg.label\" > $SUBJECTS_DIR/$subject/touch/${hemi}.cortex+hipamyg.touch" >> "$CMDF" + echo "popd > /dev/null || exit 1" >> "$CMDF" + cmd="mris_smooth -n 3 -nw -seed 1234 $sdir/$hemi.white.preaparc $sdir/$hemi.smoothwm" + RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mris_smooth -n 3 -nw -seed 1234 ../surf/${hemi}.white.preaparc ../surf/${hemi}.smoothwm\" > $SUBJECTS_DIR/$subject/touch/${hemi}.smoothwm2.touch" >> "$CMDF" + cmd="mris_inflate $sdir/$hemi.smoothwm $sdir/$hemi.inflated" + RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mris_inflate ../surf/${hemi}.smoothwm ../surf/${hemi}.inflated\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate2.touch" >> "$CMDF" + echo "pushd $sdir > /dev/null || exit 1" >> "$CMDF" + cmd="mris_curvature -w -seed 1234 $hemi.white.preaparc" + RunIt "$cmd" "$LF" "$CMDF" + cmd="rm -f $hemi.white.H" + RunIt "$cmd" "$LF" "$CMDF" + cmd="ln -s $hemi.white.preaparc.H $hemi.white.H" + RunIt "$cmd" "$LF" "$CMDF" + cmd="rm -f $hemi.white.K" + RunIt "$cmd" "$LF" "$CMDF" + cmd="ln -s $hemi.white.preaparc.K $hemi.white.K" RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mris_curvature -w -seed 1234 ${hemi}.white.preaparc\" > $SUBJECTS_DIR/$subject/touch/${hemi}.white.H.K.touch" >> "$CMDF" + cmd="mris_curvature -seed 1234 -thresh .999 -n -a 5 -w -distances 10 10 $hemi.inflated" + RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mris_curvature -seed 1234 -thresh .999 -n -a 5 -w -distances 10 10 ${hemi}.inflated\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate.H.K.touch" >> "$CMDF" + echo "popd > /dev/null || exit 1" >> "$CMDF" # ============================= MAP-DKT ========================================================== @@ -981,8 +1021,13 @@ for hemi in lh rh ; do # in all cases where sphere.reg is available, create jacobian white (distortion to sphere) # and avgcurv (map atlas curvature to subject): - cmd="recon-all -subject $subject -hemi $hemi -jacobian_white -avgcurv -no-isrunning -umask $(umask) $hiresflag $fsthreads" + cmd="mris_jacobian $sdir/$hemi.white.preaparc $sdir/$hemi.sphere.reg $sdir/$hemi.jacobian_white" + RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_jacobian ../surf/${hemi}.white.preaparc ../surf/${hemi}.sphere.reg ../surf/${hemi}.jacobian_white\" > $SUBJECTS_DIR/$subject/touch/${hemi}.jacobian_white.touch" >> "$CMDF" + cmd="mrisp_paint -a 5 $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif#6 $sdir/$hemi.sphere.reg $sdir/$hemi.avg_curv" RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mrisp_paint -a 5 $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif#6 ../surf/${hemi}.sphere.reg ../surf/${hemi}.avg_curv\" > $SUBJECTS_DIR/$subject/touch/${hemi}.avgcurv.touch" >> "$CMDF" fi @@ -1094,9 +1139,18 @@ for hemi in lh rh ; do # ============================= CURVSTATS =============================================== - # in FS7 curvstats moves here - cmd="recon-all -subject $subject -hemi $hemi -curvstats -no-isrunning -umask $(umask) $hiresflag $fsthreads" + # in FS7 curvstats moves here. The maps above already exist, so run the + # data-producing curvstats commands directly and skip recon-all update checks. + cmd="mris_calc -o $sdir/$hemi.area.mid $sdir/$hemi.area add $sdir/$hemi.area.pial" + RunIt "$cmd" "$LF" "$CMDF" + cmd="mris_calc -o $sdir/$hemi.area.mid $sdir/$hemi.area.mid div 2" + RunIt "$cmd" "$LF" "$CMDF" + cmd="mris_convert --volume $subject $hemi $sdir/$hemi.volume" + RunIt "$cmd" "$LF" "$CMDF" + cmd="mris_curvature_stats -m --writeCurvatureFiles -G -o $statsdir/$hemi.curv.stats -F smoothwm $subject $hemi curv sulc" RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_curvature_stats -m --writeCurvatureFiles -G -o ../stats/${hemi}.curv.stats -F smoothwm $subject $hemi curv sulc\" > $SUBJECTS_DIR/$subject/touch/${hemi}.curvstats.touch" >> "$CMDF" if [[ "$ParallelHemi" == "false" ]] then @@ -1260,20 +1314,93 @@ then softlink_or_copy "lh.aparc.DKTatlas.mapped.annot" "lh.aparc.annot" "$LF" softlink_or_copy "rh.aparc.DKTatlas.mapped.annot" "rh.aparc.annot" "$LF" popd > /dev/null || (echo "Could not popd" ; exit 1) + + PCTSURFCON_CMDF="$SUBJECTS_DIR/$subject/scripts/pctsurfcon.cmdf" + rm -f "$PCTSURFCON_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - pctsurfcon =====================\"" + echo "echo \"\"" + } > "$PCTSURFCON_CMDF" for hemi in lh rh ; do cmd="pctsurfcon --s $subject --$hemi-only" - RunIt "$cmd" "$LF" + RunIt "$cmd" "$LF" "$PCTSURFCON_CMDF" done + start_async_cmdf "$PCTSURFCON_CMDF" + + # -hyporelabel creates aseg.presurf.hypos.mgz from aseg.presurf.mgz. + # -apas2aseg creates aseg.mgz by editing aseg.presurf.hypos.mgz with surfaces. + # Run the underlying commands directly to avoid recon-all wrapper/update-check overhead. + pushd "$mdir" > /dev/null || (echo "Could not cd to $mdir" ; exit 1) + cmd="mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" + RunIt "$cmd" "$LF" + mkdir -p "$SUBJECTS_DIR/$subject/touch" + echo "mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" > "$SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + cmd="mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon $mdir/ribbon.mgz --threads $threads --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + RunIt "$cmd" "$LF" + echo "mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon ../mri/ribbon.mgz --threads $threads --lh-cortex-mask ../label/lh.cortex.label --lh-white ../surf/lh.white --lh-pial ../surf/lh.pial --rh-cortex-mask ../label/rh.cortex.label --rh-white ../surf/rh.white --rh-pial ../surf/rh.pial" > "$SUBJECTS_DIR/$subject/touch/apas2aseg.touch" + popd > /dev/null || (echo "Could not popd" ; exit 1) + + wait_async_cmdfs + pushd "$ldir" > /dev/null || (echo "Could not cd to $ldir" ; exit 1) cmd="rm *h.aparc.annot" RunIt "$cmd" "$LF" popd > /dev/null || (echo "Could not popd" ; exit 1) + fi - # 25 sec hyporelabel run whatever else can be done without sphere, cortical ribbon and segmentations - # -hyporelabel creates aseg.presurf.hypos.mgz from aseg.presurf.mgz - # -apas2aseg creates aseg.mgz by editing aseg.presurf.hypos.mgz with surfaces - cmd="recon-all -subject $subject -hyporelabel -apas2aseg -umask $(umask) $hiresflag $fsthreads" - RunIt "$cmd" "$LF" + ASYNC_ASEG_STATS="false" + if [[ "$segstats_legacy" != "true" ]] + then + ASEG_STATS_CMDF="$SUBJECTS_DIR/$subject/scripts/aseg_stats.cmdf" + rm -f "$ASEG_STATS_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - aseg stats =====================\"" + echo "echo \"\"" + } > "$ASEG_STATS_CMDF" + + printf -v mask_measure "%q" "Mask($mask)" + cmda=($python "$FASTSURFER_HOME/FastSurferCNN/segstats.py" --sid "$subject" + --segfile "$mdir/aseg.mgz" --segstatsfile "$statsdir/aseg.stats" + --pvfile "$mdir/norm.mgz" --normfile "$mdir/norm.mgz" --threads "$threads" + --excludeid 0 2 3 41 42 + --lut "$FREESURFER_HOME/ASegStatsLUT.txt" --empty + measures --compute "BrainSeg" "BrainSegNotVent" "VentricleChoroidVol" + "lhCortex" "rhCortex" "Cortex" "lhCerebralWhiteMatter" + "rhCerebralWhiteMatter" "CerebralWhiteMatter" + "SubCortGray" "TotalGray" "SupraTentorial" + "SupraTentorialNotVent" "$mask_measure" + "BrainSegVol-to-eTIV" "MaskVol-to-eTIV") + if [[ "$long" == "false" ]] ; then cmda+=("lhSurfaceHoles" "rhSurfaceHoles" "SurfaceHoles") ; fi + cmda+=("EstimatedTotalIntraCranialVol") + run_it_cmdf "$LF" "$ASEG_STATS_CMDF" "${cmda[@]}" + + echo "echo \"Extract the brainvol stats section from segstats output.\"" >> "$ASEG_STATS_CMDF" + cmda=($python "$FASTSURFER_HOME/FastSurferCNN/segstats.py" --sid "$subject" + --segfile "$mdir/aseg.mgz" --pvfile "$mdir/norm.mgz" + --measure_only --threads "$threads" --segstatsfile "$statsdir/brainvol.stats" + measures --file "$statsdir/aseg.stats" + --import "BrainSeg" "BrainSegNotVent" "SupraTentorial" + "SupraTentorialNotVent" "SubCortGray" "lhCortex" "rhCortex" + "Cortex" "TotalGray" "lhCerebralWhiteMatter" + "rhCerebralWhiteMatter" "CerebralWhiteMatter" "Mask" + --compute "SupraTentorialNotVentVox" "BrainSegNotVentSurf" + "VentricleChoroidVol") + run_it_cmdf "$LF" "$ASEG_STATS_CMDF" "${cmda[@]}" + + cmda=($python "$FASTSURFER_HOME/FastSurferCNN/segstats.py" --sid "$subject" + --segfile "$mdir/aseg.presurf.hypos.mgz" --normfile "$mdir/norm.mgz" + --pvfile "$mdir/norm.mgz" --segstatsfile "$statsdir/aseg.presurf.hypos.stats" + --excludeid 0 2 3 41 42 + --lut "$FREESURFER_HOME/ASegStatsLUT.txt" --threads "$threads" --empty + --volume_precision 1 + measures --file "$statsdir/aseg.stats" --import "all") + run_it_cmdf "$LF" "$ASEG_STATS_CMDF" "${cmda[@]}" + start_async_cmdf "$ASEG_STATS_CMDF" + ASYNC_ASEG_STATS="true" fi @@ -1287,6 +1414,11 @@ then # ============================= FASTSURFER - STATS ========================================= + if [[ "$ASYNC_ASEG_STATS" == "true" ]] + then + echo "Aseg and brain-volume stats are running asynchronously." | tee -a "$LF" + else + # get stats for the aseg (note these are surface fine tuned, that may be good or bad, below we also do the stats for the input aseg (plus some processing) # cmd="recon-all -subject $subject -segstats $hiresflag $fsthreads" if [[ "$segstats_legacy" == "true" ]] @@ -1339,6 +1471,8 @@ then fi run_it "$LF" "${cmda[@]}" + fi + # ============================= MAPPED-WMPARC ========================================= { @@ -1347,6 +1481,8 @@ then echo "" } | tee -a "$LF" + if [[ "$ASYNC_ASEG_STATS" != "true" ]] + then if [[ "$segstats_legacy" == "true" ]] ; then # 1m 11sec also create stats for aseg.presurf.hypos (which is basically the aseg derived from the input with CC and # hypos) difference between this and the surface improved one above are probably tiny, so the surface improvement @@ -1370,10 +1506,14 @@ then measures --file "$statsdir/aseg.stats" --import "all") fi run_it "$LF" "${cmda[@]}" + fi + # -wmparc based on mapped aparc labels (from input asegdkt_segfile) (1min40sec) needs ribbon and we need to point it to aparc.mapped: cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.mgz --label-wm --i $mdir/aparc.DKTatlas+aseg.mapped.mgz --threads $threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" + wait_async_cmdfs + # stats of the wmparc DKTatlas mapped #cmd="mri_segstats --seed 1234 --seg $mdir/wmparc.DKTatlas.mapped.mgz --sum $mdir/../stats/wmparc.DKTatlas.mapped.stats --pv $mdir/norm.mgz --excludeid 0 --brainmask $mdir/brainmask.mgz --in $mdir/norm.mgz --in-intensity-name norm --in-intensity-units MR --subject $subject --surf-wm-vol --ctab $FREESURFER_HOME/WMParcStatsLUT.txt" if [[ "$segstats_legacy" == "true" ]] ; then From ade2a1dd09e3a5d10480d7ae30ec120907a952af Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 14 May 2026 18:11:18 +0200 Subject: [PATCH 05/31] Overlap pre-ribbon surface jobs Start BA label generation and hyporelabel asynchronously as soon as both hemispheres finish, then wait after ribbon before consuming the relabeled aseg. On 114823_MR1 with 8 threads this reduced surface wall time from 46:01.47 to 45:03.77 with unchanged image, surface, label, annotation, and morphometry outputs. --- recon_surf/recon-surf.sh | 61 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 1dbd96150..67f1221d3 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1182,6 +1182,49 @@ if [[ "$ParallelHemi" == "true" ]] ; then RunBatchJobs "$LF" "${CMDFS[@]}" fi +ASYNC_BALABELS_STARTED="false" +if [[ "$base" != "true" && "$fssurfreg" == "true" ]] +then + BALABELS_CMDF="$SUBJECTS_DIR/$subject/scripts/balabels.cmdf" + rm -f "$BALABELS_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - BA labels ============================\"" + echo "echo \"\"" + } > "$BALABELS_CMDF" + + # BA labels depend on completed surface registration and geometry, not on the + # ribbon volume, so overlap them with ribbon construction. + cmd="$python ${binpath}/fs_balabels.py --sd $SUBJECTS_DIR --sid $subject" + RunIt "$cmd" "$LF" "$BALABELS_CMDF" + start_async_cmdf "$BALABELS_CMDF" + ASYNC_BALABELS_STARTED="true" +fi + +ASYNC_HYPORELABEL_STARTED="false" +if [[ "$base" != "true" && "$fsaparc" == "false" ]] +then + HYPORELABEL_CMDF="$SUBJECTS_DIR/$subject/scripts/hyporelabel.cmdf" + rm -f "$HYPORELABEL_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - hyporelabel ==========================\"" + echo "echo \"\"" + echo "pushd $mdir > /dev/null || exit 1" + } > "$HYPORELABEL_CMDF" + cmd="mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" + RunIt "$cmd" "$LF" "$HYPORELABEL_CMDF" + { + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" + echo "echo \"mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz\" > $SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + echo "popd > /dev/null || exit 1" + } >> "$HYPORELABEL_CMDF" + start_async_cmdf "$HYPORELABEL_CMDF" + ASYNC_HYPORELABEL_STARTED="true" +fi + # ============================= RIBBON =============================================== @@ -1208,6 +1251,11 @@ then fi # skip in base +if [[ "$ASYNC_BALABELS_STARTED" == "true" || "$ASYNC_HYPORELABEL_STARTED" == "true" ]] +then + wait_async_cmdfs +fi + # ============================= FSAPARC - parc23 surfcon hypo ... ========================================= if [[ "$fsaparc" == "true" ]] ; then @@ -1282,7 +1330,7 @@ then done start_async_cmdf "$MAPPED_STATS_CMDF" - if [[ "$fssurfreg" == "true" ]] + if [[ "$fssurfreg" == "true" && "$ASYNC_BALABELS_STARTED" != "true" ]] then BALABELS_CMDF="$SUBJECTS_DIR/$subject/scripts/balabels.cmdf" rm -f "$BALABELS_CMDF" @@ -1333,10 +1381,13 @@ then # -apas2aseg creates aseg.mgz by editing aseg.presurf.hypos.mgz with surfaces. # Run the underlying commands directly to avoid recon-all wrapper/update-check overhead. pushd "$mdir" > /dev/null || (echo "Could not cd to $mdir" ; exit 1) - cmd="mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" - RunIt "$cmd" "$LF" - mkdir -p "$SUBJECTS_DIR/$subject/touch" - echo "mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" > "$SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + if [[ "$ASYNC_HYPORELABEL_STARTED" != "true" ]] + then + cmd="mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" + RunIt "$cmd" "$LF" + mkdir -p "$SUBJECTS_DIR/$subject/touch" + echo "mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" > "$SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + fi cmd="mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon $mdir/ribbon.mgz --threads $threads --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" echo "mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon ../mri/ribbon.mgz --threads $threads --lh-cortex-mask ../label/lh.cortex.label --lh-white ../surf/lh.white --lh-pial ../surf/lh.pial --rh-cortex-mask ../label/rh.cortex.label --rh-white ../surf/rh.white --rh-pial ../surf/rh.pial" > "$SUBJECTS_DIR/$subject/touch/apas2aseg.touch" From f871acddd1c501e0b720046af2cba8c29a8e54d7 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 02:29:54 +0200 Subject: [PATCH 06/31] Overlap wmparc volume labeling Run wmparc WM labeling from aseg.mgz asynchronously with half the requested threads while aparc.DKTatlas+aseg.mapped.mgz is created, then merge the WM labels back into the mapped aparc volume. Validation: 114823_MR1 surf_only threads=8 improved from 45:03.77 to 44:32.97 (30.80s faster). Surface comparator reported no voxel, surface, morphometry, label, or annotation data differences; numeric stats rows matched after stripping headers. --- recon_surf/merge_wmparc_aparc.py | 50 ++++++++++++++++++++++++++++++++ recon_surf/recon-surf.sh | 31 +++++++++++++++++--- 2 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 recon_surf/merge_wmparc_aparc.py diff --git a/recon_surf/merge_wmparc_aparc.py b/recon_surf/merge_wmparc_aparc.py new file mode 100644 index 000000000..b7ca38555 --- /dev/null +++ b/recon_surf/merge_wmparc_aparc.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +import argparse + +import nibabel as nib +import numpy as np + + +WM_AND_HYPO_LABELS = (2, 41, 77, 78, 79, 87, 88, 89) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Merge asynchronously generated wmparc labels into aparc+aseg." + ) + parser.add_argument("--aseg", required=True, help="aseg.mgz used as wmparc input") + parser.add_argument( + "--aparc", + required=True, + help="aparc+aseg volume carrying the final cortical labels", + ) + parser.add_argument( + "--wmparc", + required=True, + help="wmparc volume generated from aseg.mgz with --label-wm", + ) + parser.add_argument("--out", required=True, help="merged wmparc output") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + + aseg_img = nib.load(args.aseg) + aparc_img = nib.load(args.aparc) + wmparc_img = nib.load(args.wmparc) + + aseg = np.asanyarray(aseg_img.dataobj) + aparc = np.asanyarray(aparc_img.dataobj).copy() + wmparc = np.asanyarray(wmparc_img.dataobj) + + wm_mask = np.isin(aseg, WM_AND_HYPO_LABELS) + aparc[wm_mask] = wmparc[wm_mask] + + out_img = nib.MGHImage(aparc, aparc_img.affine, aparc_img.header) + nib.save(out_img, args.out) + + +if __name__ == "__main__": + main() diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 67f1221d3..b25c45e3b 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1457,10 +1457,37 @@ then # ============================= MAPPED-TO-VOL ========================================= + WMPARC_VOL_CMDF="$SUBJECTS_DIR/$subject/scripts/wmparc_volume.cmdf" + rm -f "$WMPARC_VOL_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating wmparc from aseg =======================\"" + echo "echo \"\"" + } > "$WMPARC_VOL_CMDF" + + # The WM-labeling pass only changes voxels that are cerebral WM or WM hypointensities. + # Run it from aseg.mgz while the main process creates aparc.DKTatlas+aseg.mapped.mgz, + # then merge those WM labels into the mapped aparc volume below. + wmparc_threads=$((threads / 2)) + if [[ "$wmparc_threads" -lt 1 ]] ; then wmparc_threads=1 ; fi + cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.wmonly.mgz --label-wm --i $mdir/aseg.mgz --threads $wmparc_threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + RunIt "$cmd" "$LF" "$WMPARC_VOL_CMDF" + start_async_cmdf "$WMPARC_VOL_CMDF" + # creating aparc.DKTatlas+aseg.mapped.mgz by mapping aparc.DKTatlas.mapped from surface to aseg.mgz # (should be a nicer aparc+aseg compared to orig CNN segmentation, due to surface updates) cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" + wait_async_cmdfs + cmda=($python "${binpath}merge_wmparc_aparc.py" + --aseg "$mdir/aseg.mgz" + --aparc "$mdir/aparc.DKTatlas+aseg.mapped.mgz" + --wmparc "$mdir/wmparc.DKTatlas.mapped.wmonly.mgz" + --out "$mdir/wmparc.DKTatlas.mapped.mgz") + run_it "$LF" "${cmda[@]}" + cmd="rm -f $mdir/wmparc.DKTatlas.mapped.wmonly.mgz $WMPARC_VOL_CMDF" + RunIt "$cmd" "$LF" # ============================= FASTSURFER - STATS ========================================= @@ -1559,10 +1586,6 @@ then run_it "$LF" "${cmda[@]}" fi - # -wmparc based on mapped aparc labels (from input asegdkt_segfile) (1min40sec) needs ribbon and we need to point it to aparc.mapped: - cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.mgz --label-wm --i $mdir/aparc.DKTatlas+aseg.mapped.mgz --threads $threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" - RunIt "$cmd" "$LF" - wait_async_cmdfs # stats of the wmparc DKTatlas mapped From 5d70baa1855faafb3af51a498a2ac69d3a5e5471 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 04:30:46 +0200 Subject: [PATCH 07/31] Speed up mapped volume projection Use a smaller mri_surf2volseg hash resolution for mapped aparc/wmparc projection, run aparc projection with extra threads at 8-thread pipeline settings, and cap ribbon distance at 2. Validated on 114823_MR1 with exact data outputs vs surf_speed_async_wmparc_halfthreads_threads8_run1; wall time improved from 44:32.97 to 43:46.98 (45.99s faster). --- recon_surf/recon-surf.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index b25c45e3b..f6fbdddf7 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1242,7 +1242,7 @@ then # wmparc needs ribbon, probably other stuff (aparc to aseg etc). # So lets run it to have these measures below. cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ - --label_right_white 41 --label_right_ribbon 42 --save_ribbon" + --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi cmd="$cmd $subject" RunIt "$cmd" "$LF" @@ -1471,13 +1471,15 @@ then # then merge those WM labels into the mapped aparc volume below. wmparc_threads=$((threads / 2)) if [[ "$wmparc_threads" -lt 1 ]] ; then wmparc_threads=1 ; fi - cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.wmonly.mgz --label-wm --i $mdir/aseg.mgz --threads $wmparc_threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.wmonly.mgz --label-wm --i $mdir/aseg.mgz --threads $wmparc_threads --hashres 8 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" "$WMPARC_VOL_CMDF" start_async_cmdf "$WMPARC_VOL_CMDF" # creating aparc.DKTatlas+aseg.mapped.mgz by mapping aparc.DKTatlas.mapped from surface to aseg.mgz # (should be a nicer aparc+aseg compared to orig CNN segmentation, due to surface updates) - cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $threads --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + surf2volseg_threads=$threads + if [[ "$threads" -ge 8 ]] ; then surf2volseg_threads=$((threads + threads / 2)) ; fi + cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $surf2volseg_threads --hashres 8 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" wait_async_cmdfs cmda=($python "${binpath}merge_wmparc_aparc.py" From 3d5ed3d705446e60b3618c223ca770e1694d1a1d Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 05:19:21 +0200 Subject: [PATCH 08/31] Overlap ribbon generation with hemisphere tail Start mris_volmask from an async command file once both pial surfaces are ready, while the remaining hemisphere curvature/stat tail continues. Validated on 114823_MR1 with exact data outputs vs surf_speed_hash8_cap2_threads8_run1; wall time improved from 43:46.98 to 43:14.12 (32.86s faster). --- recon_surf/recon-surf.sh | 71 ++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index f6fbdddf7..752f8a354 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1119,6 +1119,8 @@ for hemi in lh rh ; do echo "pushd $sdir > /dev/null" >> "$CMDF" softlink_or_copy "$hemi.pial.T1" "$hemi.pial" "$LF" "$CMDF" echo "popd > /dev/null" >> "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "touch $SUBJECTS_DIR/$subject/touch/${hemi}.pial.ready" >> "$CMDF" # these are run automatically in fs7* recon-all and cannot be called directly without -pial flag (or other t2 flags) # they are the same for fsaparc and long @@ -1173,7 +1175,32 @@ export ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=$threads # define the fsthreads variable for the joint section (again) if [[ "$threads" -gt 1 ]] ; then fsthreads="-threads $threads -itkthreads $threads" ; else fsthreads="" ; fi +ASYNC_RIBBON_STARTED="false" if [[ "$ParallelHemi" == "true" ]] ; then + if [[ "$base" != "true" ]] + then + RIBBON_CMDF="$SUBJECTS_DIR/$subject/scripts/ribbon.cmdf" + rm -f "$RIBBON_CMDF" + mkdir -p "$SUBJECTS_DIR/$subject/touch" + rm -f "$SUBJECTS_DIR/$subject/touch/lh.pial.ready" "$SUBJECTS_DIR/$subject/touch/rh.pial.ready" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"============================ Creating surfaces - ribbon ===========================\"" + echo "echo \"\"" + echo "while [[ ! -f $SUBJECTS_DIR/$subject/touch/lh.pial.ready || ! -f $SUBJECTS_DIR/$subject/touch/rh.pial.ready ]] ; do sleep 1 ; done" + } > "$RIBBON_CMDF" + + cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ + --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" + if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi + cmd="$cmd $subject" + RunIt "$cmd" "$LF" "$RIBBON_CMDF" + echo "echo \"$cmd\" > $SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" >> "$RIBBON_CMDF" + start_async_cmdf "$RIBBON_CMDF" + ASYNC_RIBBON_STARTED="true" + fi + { echo "" echo " RUNNING HEMIs in PARALLEL !!! " @@ -1232,29 +1259,39 @@ fi if [[ "$base" != "true" ]] then - { - echo "" - echo "============================ Creating surfaces - ribbon ===========================" - echo "" - } | tee -a "$LF" - # -cortribbon 4 minutes, ribbon is used in mris_anatomical stats to remove voxels from surface based volumes that should not be cortex - # anatomical stats can run without ribbon, but will omit some surface based measures then - # wmparc needs ribbon, probably other stuff (aparc to aseg etc). - # So lets run it to have these measures below. - cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ - --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" - if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi - cmd="$cmd $subject" - RunIt "$cmd" "$LF" - mkdir -p "$SUBJECTS_DIR/$subject/touch" - echo "$cmd" > "$SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" + if [[ "$ASYNC_RIBBON_STARTED" != "true" ]] + then + { + echo "" + echo "============================ Creating surfaces - ribbon ===========================" + echo "" + } | tee -a "$LF" + # -cortribbon 4 minutes, ribbon is used in mris_anatomical stats to remove voxels from surface based volumes that should not be cortex + # anatomical stats can run without ribbon, but will omit some surface based measures then + # wmparc needs ribbon, probably other stuff (aparc to aseg etc). + # So lets run it to have these measures below. + cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ + --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" + if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi + cmd="$cmd $subject" + RunIt "$cmd" "$LF" + mkdir -p "$SUBJECTS_DIR/$subject/touch" + echo "$cmd" > "$SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" + else + echo "Ribbon construction is already running asynchronously." | tee -a "$LF" + fi fi # skip in base -if [[ "$ASYNC_BALABELS_STARTED" == "true" || "$ASYNC_HYPORELABEL_STARTED" == "true" ]] +if [[ "$ASYNC_RIBBON_STARTED" == "true" || "$ASYNC_BALABELS_STARTED" == "true" || "$ASYNC_HYPORELABEL_STARTED" == "true" ]] then wait_async_cmdfs fi +if [[ "$ASYNC_RIBBON_STARTED" == "true" ]] +then + cmd="rm -f $RIBBON_CMDF $SUBJECTS_DIR/$subject/touch/lh.pial.ready $SUBJECTS_DIR/$subject/touch/rh.pial.ready" + RunIt "$cmd" "$LF" +fi # ============================= FSAPARC - parc23 surfcon hypo ... ========================================= From bda39045fdcb0e86278c9d4d34f68a66db6f36bc Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 11:55:27 +0200 Subject: [PATCH 09/31] Speed up mapped volume tail Split async ribbon generation by hemisphere and merge the exact left/right masks before downstream consumers. Use lower exact hash resolutions for mapped-volume projections, cap concurrent mapped-volume threads to reduce contention, and only wait on async jobs when their outputs are required. Validated on 114823_MR1: surf_speed_async_ribbon_threads8_run1: 43:14.12 surf_speed_hash5_wmparc_hash4_t16_threads8_run1: 42:42.62 Speedup: 31.50s Output comparison: exact data outputs; stripped numeric stats match. --- recon_surf/merge_ribbon_hemis.py | 46 +++++++++++ recon_surf/recon-surf.sh | 126 +++++++++++++++++++++++++------ 2 files changed, 148 insertions(+), 24 deletions(-) create mode 100644 recon_surf/merge_ribbon_hemis.py diff --git a/recon_surf/merge_ribbon_hemis.py b/recon_surf/merge_ribbon_hemis.py new file mode 100644 index 000000000..8f622e876 --- /dev/null +++ b/recon_surf/merge_ribbon_hemis.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Merge left/right mris_volmask one-hemi outputs.""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import nibabel as nib +import numpy as np + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--lh", required=True, type=Path, help="Left-hemi full ribbon/WM mask.") + parser.add_argument("--rh", required=True, type=Path, help="Right-hemi full ribbon/WM mask.") + parser.add_argument("--out", required=True, type=Path, help="Merged ribbon/WM mask output.") + parser.add_argument("--lh-ribbon", required=True, type=Path, help="Binary left ribbon output.") + parser.add_argument("--rh-ribbon", required=True, type=Path, help="Binary right ribbon output.") + parser.add_argument("--left-ribbon-label", default=3, type=int) + parser.add_argument("--right-ribbon-label", default=42, type=int) + return parser.parse_args() + + +def save_like(reference: nib.spatialimages.SpatialImage, data: np.ndarray, path: Path) -> None: + image = nib.MGHImage(data.astype(np.uint8, copy=False), reference.affine, reference.header.copy()) + image.set_data_dtype(np.uint8) + nib.save(image, str(path)) + + +def main() -> int: + args = parse_args() + lh_img = nib.load(args.lh) + rh_img = nib.load(args.rh) + lh_data = np.asanyarray(lh_img.dataobj, dtype=np.uint8) + rh_data = np.asanyarray(rh_img.dataobj, dtype=np.uint8) + + merged = np.where(lh_data != 0, lh_data, rh_data) + save_like(lh_img, merged, args.out) + save_like(lh_img, (lh_data == args.left_ribbon_label).astype(np.uint8), args.lh_ribbon) + save_like(rh_img, (rh_data == args.right_ribbon_label).astype(np.uint8), args.rh_ribbon) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 752f8a354..815f53dc2 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -411,6 +411,57 @@ function start_async_cmdf() ASYNC_CMDFS+=("$cmdf") } +function wait_async_cmdf() +{ + local target=$1 + local unsuccessful=() + local found="false" + local status + local i + local next_pids=() + local next_logs=() + local next_cmdfs=() + + for i in "${!ASYNC_PIDS[@]}" + do + if [[ "${ASYNC_CMDFS[i]}" == "$target" ]] + then + found="true" + echo "Waiting for async PID ${ASYNC_PIDS[i]} of (${ASYNC_PIDS[*]}) to complete..." | tee -a "$LF" + wait "${ASYNC_PIDS[i]}" + status="$?" + tee -a "$LF" < "${ASYNC_LOGS[i]}" + rm -f "${ASYNC_LOGS[i]}" + if [[ "$status" != "0" ]] + then + unsuccessful+=("$i") + { + echo "ERROR: The async script ${ASYNC_CMDFS[i]} (PID: ${ASYNC_PIDS[i]}) did not complete successfully!" + echo "========================================" + echo "" + } | tee -a "$LF" + fi + else + next_pids+=("${ASYNC_PIDS[i]}") + next_logs+=("${ASYNC_LOGS[i]}") + next_cmdfs+=("${ASYNC_CMDFS[i]}") + fi + done + + if [[ "${#unsuccessful}" != 0 ]] + then + echo "Async PIDs (${unsuccessful[*]}) of (${ASYNC_PIDS[*]}) have NOT completed successfully! All logs appended." | tee -a "$LF" + exit 1 + elif [[ "$found" == "true" ]] + then + echo "Async command $target completed successfully! Its log has been appended." | tee -a "$LF" + fi + + ASYNC_PIDS=("${next_pids[@]}") + ASYNC_LOGS=("${next_logs[@]}") + ASYNC_CMDFS=("${next_cmdfs[@]}") +} + function wait_async_cmdfs() { local unsuccessful=() @@ -1179,25 +1230,37 @@ ASYNC_RIBBON_STARTED="false" if [[ "$ParallelHemi" == "true" ]] ; then if [[ "$base" != "true" ]] then - RIBBON_CMDF="$SUBJECTS_DIR/$subject/scripts/ribbon.cmdf" - rm -f "$RIBBON_CMDF" + RIBBON_LH_CMDF="$SUBJECTS_DIR/$subject/scripts/ribbon.lh.cmdf" + RIBBON_RH_CMDF="$SUBJECTS_DIR/$subject/scripts/ribbon.rh.cmdf" + rm -f "$RIBBON_LH_CMDF" "$RIBBON_RH_CMDF" mkdir -p "$SUBJECTS_DIR/$subject/touch" rm -f "$SUBJECTS_DIR/$subject/touch/lh.pial.ready" "$SUBJECTS_DIR/$subject/touch/rh.pial.ready" - { - echo "#!/bin/bash" - echo "echo \"\"" - echo "echo \"============================ Creating surfaces - ribbon ===========================\"" - echo "echo \"\"" - echo "while [[ ! -f $SUBJECTS_DIR/$subject/touch/lh.pial.ready || ! -f $SUBJECTS_DIR/$subject/touch/rh.pial.ready ]] ; do sleep 1 ; done" - } > "$RIBBON_CMDF" - cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ - --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" - if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi - cmd="$cmd $subject" - RunIt "$cmd" "$LF" "$RIBBON_CMDF" - echo "echo \"$cmd\" > $SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" >> "$RIBBON_CMDF" - start_async_cmdf "$RIBBON_CMDF" + for hemi in lh rh ; do + if [[ "$hemi" == "lh" ]] + then + RIBBON_HEMI_CMDF="$RIBBON_LH_CMDF" + ribbon_only_flag="--lh-only" + ribbon_out_root="ribbon.lhonly" + else + RIBBON_HEMI_CMDF="$RIBBON_RH_CMDF" + ribbon_only_flag="--rh-only" + ribbon_out_root="ribbon.rhonly" + fi + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"============================ Creating surfaces $hemi - ribbon ===========================\"" + echo "echo \"\"" + echo "while [[ ! -f $SUBJECTS_DIR/$subject/touch/${hemi}.pial.ready ]] ; do sleep 1 ; done" + } > "$RIBBON_HEMI_CMDF" + + cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ + --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2 \ + --out_root $ribbon_out_root $ribbon_only_flag $subject" + RunIt "$cmd" "$LF" "$RIBBON_HEMI_CMDF" + start_async_cmdf "$RIBBON_HEMI_CMDF" + done ASYNC_RIBBON_STARTED="true" fi @@ -1283,13 +1346,22 @@ then fi # skip in base -if [[ "$ASYNC_RIBBON_STARTED" == "true" || "$ASYNC_BALABELS_STARTED" == "true" || "$ASYNC_HYPORELABEL_STARTED" == "true" ]] -then - wait_async_cmdfs -fi if [[ "$ASYNC_RIBBON_STARTED" == "true" ]] then - cmd="rm -f $RIBBON_CMDF $SUBJECTS_DIR/$subject/touch/lh.pial.ready $SUBJECTS_DIR/$subject/touch/rh.pial.ready" + wait_async_cmdf "$RIBBON_LH_CMDF" + wait_async_cmdf "$RIBBON_RH_CMDF" + cmda=($python "${binpath}merge_ribbon_hemis.py" + --lh "$mdir/ribbon.lhonly.mgz" + --rh "$mdir/ribbon.rhonly.mgz" + --out "$mdir/ribbon.mgz" + --lh-ribbon "$mdir/lh.ribbon.mgz" + --rh-ribbon "$mdir/rh.ribbon.mgz") + run_it "$LF" "${cmda[@]}" + cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2" + if [[ "$threads" -gt 1 ]] ; then cmd="$cmd --parallel" ; fi + cmd="$cmd $subject" + echo "$cmd" > "$SUBJECTS_DIR/$subject/touch/cortical_ribbon.touch" + cmd="rm -f $RIBBON_LH_CMDF $RIBBON_RH_CMDF $mdir/ribbon.lhonly.mgz $mdir/ribbon.rhonly.mgz $mdir/lh.ribbon.lhonly.mgz $mdir/rh.ribbon.rhonly.mgz $SUBJECTS_DIR/$subject/touch/lh.pial.ready $SUBJECTS_DIR/$subject/touch/rh.pial.ready" RunIt "$cmd" "$LF" fi @@ -1424,6 +1496,8 @@ then RunIt "$cmd" "$LF" mkdir -p "$SUBJECTS_DIR/$subject/touch" echo "mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" > "$SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + else + wait_async_cmdf "$HYPORELABEL_CMDF" fi cmd="mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon $mdir/ribbon.mgz --threads $threads --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" @@ -1507,16 +1581,20 @@ then # Run it from aseg.mgz while the main process creates aparc.DKTatlas+aseg.mapped.mgz, # then merge those WM labels into the mapped aparc volume below. wmparc_threads=$((threads / 2)) + if [[ "$wmparc_threads" -gt 2 ]] ; then wmparc_threads=2 ; fi if [[ "$wmparc_threads" -lt 1 ]] ; then wmparc_threads=1 ; fi - cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.wmonly.mgz --label-wm --i $mdir/aseg.mgz --threads $wmparc_threads --hashres 8 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + cmd="mri_surf2volseg --o $mdir/wmparc.DKTatlas.mapped.wmonly.mgz --label-wm --i $mdir/aseg.mgz --threads $wmparc_threads --hashres 5 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 3000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 4000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" "$WMPARC_VOL_CMDF" start_async_cmdf "$WMPARC_VOL_CMDF" # creating aparc.DKTatlas+aseg.mapped.mgz by mapping aparc.DKTatlas.mapped from surface to aseg.mgz # (should be a nicer aparc+aseg compared to orig CNN segmentation, due to surface updates) surf2volseg_threads=$threads - if [[ "$threads" -ge 8 ]] ; then surf2volseg_threads=$((threads + threads / 2)) ; fi - cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $surf2volseg_threads --hashres 8 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" + if [[ "$threads" -ge 8 ]] ; then + surf2volseg_threads=$((threads * 2)) + if [[ "$surf2volseg_threads" -gt 16 ]] ; then surf2volseg_threads=16 ; fi + fi + cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $surf2volseg_threads --hashres 4 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" wait_async_cmdfs cmda=($python "${binpath}merge_wmparc_aparc.py" From c182ba836f3703da64be867daff48364ba9504f2 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 18:40:48 +0200 Subject: [PATCH 10/31] Speed up inflated curvature computation Skip distance-neighborhood curvature sampling for inflated.H/K generation. On 114823_MR1 this reduces the two inflated mris_curvature steps from 78.52s to 3.83s, saving 74.69s in that component. Validated against surf_speed_fast_fill_threads8_run2: final white/pial/sphere.reg vertices and faces, thickness, curv, aseg, ribbon, aparc+aseg, and wmparc outputs are unchanged. Intermediate inflated.H/K files differ. The full benchmark run was slower overall due heavier system load (47:17.43 vs prior 40:32.44), so the speedup is based on paired command timings. --- recon_surf/recon-surf.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 815f53dc2..00e30baa6 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -972,9 +972,9 @@ for hemi in lh rh ; do cmd="ln -s $hemi.white.preaparc.K $hemi.white.K" RunIt "$cmd" "$LF" "$CMDF" echo "echo \"mris_curvature -w -seed 1234 ${hemi}.white.preaparc\" > $SUBJECTS_DIR/$subject/touch/${hemi}.white.H.K.touch" >> "$CMDF" - cmd="mris_curvature -seed 1234 -thresh .999 -n -a 5 -w -distances 10 10 $hemi.inflated" + cmd="mris_curvature -seed 1234 -thresh .999 -n -a 5 -w $hemi.inflated" RunIt "$cmd" "$LF" "$CMDF" - echo "echo \"mris_curvature -seed 1234 -thresh .999 -n -a 5 -w -distances 10 10 ${hemi}.inflated\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate.H.K.touch" >> "$CMDF" + echo "echo \"mris_curvature -seed 1234 -thresh .999 -n -a 5 -w ${hemi}.inflated\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate.H.K.touch" >> "$CMDF" echo "popd > /dev/null || exit 1" >> "$CMDF" From cf8aa1c25ea1c93fe35905dfb22fb3a9c039ac51 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 19:33:34 +0200 Subject: [PATCH 11/31] Overlap ribbon with surface registration Defer sphere/surfreg until after white/pial generation for the default FastSurfer DKT path, keeping --fsaparc behavior unchanged because it needs sphere.reg before aparc labeling. On 114823_MR1 with the current curvature optimization, wall time improved from 47:17.43 to 43:59.74 under similar load, a 197.69s speedup. Validation found unchanged white/pial/sphere.reg vertices and faces, thickness, curv, avg_curv, jacobian_white, aseg, ribbon, aparc+aseg, and wmparc outputs. --- recon_surf/recon-surf.sh | 54 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 00e30baa6..7c3a14df7 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1001,8 +1001,10 @@ for hemi in lh rh ; do # ============================= SPHERE - SURFREG (optional) ============================================== - # if we segment with FS or if surface registration is requested do it here: - if [[ "$fsaparc" == "true" ]] || [[ "$fssurfreg" == "true" ]] + # If FreeSurfer aparc is requested, sphere.reg is needed before surface placement. + # The default FastSurfer DKT path does not consume sphere.reg for white/pial + # placement, so it is deferred below to overlap with ribbon construction. + if [[ "$fsaparc" == "true" ]] then { echo "echo \" \"" @@ -1205,6 +1207,54 @@ for hemi in lh rh ; do echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" echo "echo \"mris_curvature_stats -m --writeCurvatureFiles -G -o ../stats/${hemi}.curv.stats -F smoothwm $subject $hemi curv sulc\" > $SUBJECTS_DIR/$subject/touch/${hemi}.curvstats.touch" >> "$CMDF" + if [[ "$fsaparc" == "false" && "$fssurfreg" == "true" ]] + then + { + echo "echo \" \"" + echo "echo \"============ Creating surfaces $hemi - FS sphere, surfreg ===============\"" + echo "echo \" \"" + } | tee -a "$CMDF" + + if [[ "$long" == "false" ]] + then + cmd="env OMP_NUM_THREADS=$threads ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=1 \ + mris_sphere -seed 1234 $sdir/${hemi}.inflated $sdir/${hemi}.sphere" + RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_sphere -seed 1234 ../surf/${hemi}.inflated ../surf/${hemi}.sphere\" > $SUBJECTS_DIR/$subject/touch/${hemi}.sphmorph.touch" >> "$CMDF" + + cmd="$python ${binpath}/rotate_sphere.py \ + --srcsphere $sdir/${hemi}.sphere \ + --srcaparc $ldir/$hemi.aparc.DKTatlas.mapped.annot \ + --trgsphere $FREESURFER_HOME/subjects/fsaverage/surf/${hemi}.sphere \ + --trgaparc $FREESURFER_HOME/subjects/fsaverage/label/${hemi}.aparc.annot \ + --out $sdir/${hemi}.angles.txt" + RunIt "$cmd" "$LF" "$CMDF" + cmd="mris_register -curv -norot -rotate \`cat $sdir/${hemi}.angles.txt\` \ + $sdir/${hemi}.sphere \ + $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif \ + $sdir/${hemi}.sphere.reg" + RunIt "$cmd" "$LF" "$CMDF" + else + cmd="cp $basedir/surf/$hemi.sphere $sdir/$hemi.sphere" + RunIt "$cmd" "$LF" "$CMDF" + cmd="mris_register -curv -nosulc -norot \ + -threads $threads_hemi \ + $basedir/surf/${hemi}.sphere.reg \ + $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif \ + $sdir/${hemi}.sphere.reg" + RunIt "$cmd" "$LF" "$CMDF" + fi + + cmd="mris_jacobian $sdir/$hemi.white.preaparc $sdir/$hemi.sphere.reg $sdir/$hemi.jacobian_white" + RunIt "$cmd" "$LF" "$CMDF" + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" + echo "echo \"mris_jacobian ../surf/${hemi}.white.preaparc ../surf/${hemi}.sphere.reg ../surf/${hemi}.jacobian_white\" > $SUBJECTS_DIR/$subject/touch/${hemi}.jacobian_white.touch" >> "$CMDF" + cmd="mrisp_paint -a 5 $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif#6 $sdir/$hemi.sphere.reg $sdir/$hemi.avg_curv" + RunIt "$cmd" "$LF" "$CMDF" + echo "echo \"mrisp_paint -a 5 $FREESURFER_HOME/average/${hemi}.folding.atlas.acfb40.noaparc.i12.2016-08-02.tif#6 ../surf/${hemi}.sphere.reg ../surf/${hemi}.avg_curv\" > $SUBJECTS_DIR/$subject/touch/${hemi}.avgcurv.touch" >> "$CMDF" + fi + if [[ "$ParallelHemi" == "false" ]] then { From c81f0c7456c040ee00f8a561fc9d3cc0575ba3b6 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 21:08:51 +0200 Subject: [PATCH 12/31] Skip legacy T1 normalization by default Default surface reconstruction now uses norm.mgz directly for brainmask.mgz and skips the FreeSurfer-style T1.mgz normalization unless --fs_T1 is requested. On 114823_MR1 this reduced wall time from 43:59.74 to 40:22.01 (-217.73s) versus the committed defer-surfreg reference. Checked final MRI volumes, white/pial/sphere.reg surfaces, and key morph data were unchanged; the legacy auxiliary T1.mgz output is no longer produced by default. --- recon_surf/recon-surf.sh | 11 +++++++---- run_fastsurfer.sh | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 7c3a14df7..83b413977 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -39,7 +39,7 @@ baseid="" # baseid for longitudinal time point run # Dev flags default check_version="true" # Check for supported FreeSurfer version (terminate if not detected) -get_t1="true" # Generate T1.mgz from nu.mgz and brainmask from it (default) +get_t1="false" # Skip FreeSurfer T1.mgz normalization by default; --fs_T1 restores it. hires_voxsize_threshold=0.999 # Threshold below which the hires options are passed if [[ -z "$FASTSURFER_HOME" ]] @@ -122,9 +122,11 @@ Dev Flags: --ignore_fs_version Switch on to avoid check for FreeSurfer version. Program will otherwise terminate if $FS_VERSION_SUPPORT is not sourced. Can be used for testing dev versions. - --no_fs_T1 Do not generate T1.mgz (normalized nu.mgz included in - standard FreeSurfer output) and create brainmask.mgz - directly from norm.mgz instead. Saves 1:30 min. + --fs_T1 Generate FreeSurfer-style T1.mgz from nu.mgz and use it + for brainmask.mgz. Slower, but preserves the legacy + auxiliary T1.mgz output. + --no_fs_T1 Do not generate T1.mgz and create brainmask.mgz directly + from norm.mgz instead (default). --no_surfreg Do not run Surface registration with FreeSurfer (for cross-subject correspondence). Not recommended, but speeds up processing if you just need the stats and @@ -204,6 +206,7 @@ case $key in shift # past value ;; --ignore_fs_version) check_version="false" ;; + --fs_t1 ) get_t1="true" ;; --no_fs_t1 ) get_t1="false" ;; --base) base="true" ;; --long) long="true" ; baseid="$1" ; shift ;; diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index 2d79e7129..6f276d4a9 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -289,9 +289,11 @@ Resource Options: --fsaparc Additionally create FS aparc segmentations and ribbon. Skipped by default (--> DL prediction is used which is faster, and usually these mapped ones are fine). - --no_fs_T1 Do not generate T1.mgz (normalized nu.mgz included in - standard FreeSurfer output) and create brainmask.mgz - directly from norm.mgz instead. Saves 1:30 min. + --fs_T1 Generate FreeSurfer-style T1.mgz from nu.mgz and use it + for brainmask.mgz. Slower, but preserves the legacy + auxiliary T1.mgz output. + --no_fs_T1 Do not generate T1.mgz and create brainmask.mgz directly + from norm.mgz instead (default). --no_surfreg Do not run Surface registration with FreeSurfer (for cross-subject correspondence), Not recommended, but speeds up processing if you e.g. just need the @@ -533,7 +535,7 @@ case $key in ############################################################## --seg_only) run_surf_pipeline="false" ;; # several flag options that are *just* passed through to recon-surf.sh - --fstess|--fsqsphere|--fsaparc|--no_surfreg|--ignore_fs_version) surf_flags+=("$key") ;; + --fstess|--fsqsphere|--fsaparc|--no_surfreg|--ignore_fs_version|--fs_t1) surf_flags+=("$key") ;; --parallel) legacy_parallel_hemi="true" ;; --no_fs_t1) surf_flags+=("--no_fs_T1") ;; From 3f8f1fb867520a379a5c8460e3a0bf4929cbc3d9 Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 15 May 2026 22:06:44 +0200 Subject: [PATCH 13/31] Speed up aparc smoothing mode filter Replace per-row scipy.stats.mode calls in sample_parc smoothing with np.bincount().argmax(), preserving scipy's smallest-label tie behavior for non-negative labels. Validation on 114823_MR1 against surf_speed_no_fs_t1_threads8_run1: mapped annotations byte-identical; final MRI volumes have zero voxel changes; white/pial/sphere.reg surfaces and morphometry have zero changes. sample_parc.py dropped from 61.57s total to 8.59s total; full wall time changed from 40:22.01 to 39:58.11. --- recon_surf/smooth_aparc.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/recon_surf/smooth_aparc.py b/recon_surf/smooth_aparc.py index b1f1437e1..ca77bafbf 100644 --- a/recon_surf/smooth_aparc.py +++ b/recon_surf/smooth_aparc.py @@ -204,8 +204,6 @@ def mode_filter( # create sparse matrix with labels at neighbors nlabels = sparse.csr_matrix((labels[JJ], (II, JJ))) # print("nlabels: {}".format(nlabels)) - from scipy.stats import mode - if not isinstance(nlabels, sparse.csr_matrix): raise ValueError("Matrix must be CSR format.") # novote = [-1,0,fillonlylabel] @@ -227,19 +225,16 @@ def mode_filter( rr = np.isin(nlabels.data, novote) nlabels.data[rr] = 0 nlabels.eliminate_zeros() - # run over all rows and compute mode (maybe vectorize later) + # Run over all rows and compute mode. The labels are non-negative at + # this point; bincount().argmax() matches scipy.stats.mode's smallest-value + # tie behavior without the heavy per-row SciPy dispatch. rempty = 0 for row in rows: rvals = nlabels.data[nlabels.indptr[row] : nlabels.indptr[row + 1]] if rvals.size == 0: rempty += 1 continue - # print(str(rvals)) - mvals = mode(rvals, keepdims=True)[0] - # print(str(mvals)) - if mvals.size != 0: - # print(str(row)+' '+str(ids[row])+' '+str(mvals[0])) - labels_new[ids[row]] = mvals[0] + labels_new[ids[row]] = np.bincount(rvals).argmax() if rempty > 0: # should not happen print("WARNING: row empty: " + str(rempty)) From 34c841fd0452315501fa52cda7224af7317e773a Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 02:43:14 +0200 Subject: [PATCH 14/31] Speed up N4 bias correction Use shrink factor 5 for T1 N4 bias correction in run_fastsurfer.sh and recon-surf.sh. Isolated 114823_MR1 N4 timing: default shrink 4 took 1:44.76, shrink 5 took 0:50.24 (54.52s faster). In full seg validation under high host load, N4 module time was 40.05s vs 62.65s in the reference run. Validation against seg_speed_torchcap_threads8_run1: primary segmentation outputs, CC outputs, aseg.auto, mask, and CerebNet segmentation were voxel-identical. orig_nu changed by max 3 UCHAR (mean abs 0.099; p99.9 2). HypVINN changed 200 label voxels and 999 mask voxels. --- recon_surf/recon-surf.sh | 2 +- run_fastsurfer.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 83b413977..4803cd1fe 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -667,7 +667,7 @@ if [[ ! -f "$mdir/orig_nu.mgz" ]] ; then # stream can be changed to avoid it. pushd "$mdir" > /dev/null || ( echo "Cannot change to $mdir" ; exit 1 ) #cmd="mri_nu_correct.mni --no-rescale --i $mdir/orig.mgz --o $mdir/orig_nu.mgz --n 1 --proto-iters 1000 --distance 50 --mask $mdir/mask.mgz" - cmd="$python ${binpath}/N4_bias_correct.py --in $mdir/orig.mgz --rescale $mdir/orig_nu.mgz --aseg $mdir/aparc.DKTatlas+aseg.orig.mgz --threads $threads" + cmd="$python ${binpath}/N4_bias_correct.py --in $mdir/orig.mgz --rescale $mdir/orig_nu.mgz --aseg $mdir/aparc.DKTatlas+aseg.orig.mgz --threads $threads --shrink 5" RunIt "$cmd" "$LF" popd > /dev/null || (echo "Could not popd" ; exit 1) fi diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index 6f276d4a9..34b64156e 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1141,7 +1141,7 @@ then { # this will always run, since norm_name is set to subject_dir/mri/orig_nu.mgz, if it is not passed/empty cmd=($python "${reconsurfdir}/N4_bias_correct.py" "--in" "$conformed_name" --rescale "$norm_name" - --aseg "$aseg_segfile" --threads "$threads_seg") + --aseg "$aseg_segfile" --threads "$threads_seg" --shrink 5) echo "INFO: Running N4 bias-field correction..." echo_quoted "${cmd[@]}" "${wrap[@]}" "${cmd[@]}" 2>&1 From 4c996fdc905460c71d5dec0fb1fbc3448007273f Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 03:40:53 +0200 Subject: [PATCH 15/31] Overlap CerebNet with HypVINN Run CerebNet asynchronously when HypVINN is also enabled, using temporary CerebNet-specific segmentation and timing logs that are appended after the background process completes. Validation on 114823_MR1 seg_only threads=8: seg_speed_async_cereb_n4_threads8_run1 completed in 18:57.99. CerebNet took 60.60s and completed fully under HypVINN, which took 585.46s, hiding the CerebNet module runtime. Compared against seg_speed_n4_shrink5_threads8_run1: checked mri/orig.mgz, aparc.DKTatlas+aseg.deep.mgz, mask.mgz, aseg.auto_noCCseg.mgz, orig_nu.mgz, callosum.CC.orig.mgz, aparc.DKTatlas+aseg.deep.withCC.mgz, aseg.auto.mgz, cerebellum.CerebNet.nii.gz, hypothalamus.HypVINN.nii.gz, and hypothalamus_mask.HypVINN.nii.gz; all were voxel-identical. Stats data rows were identical; stats files differed only in metadata headers such as paths and hostname. --- run_fastsurfer.sh | 54 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index 34b64156e..ddd7ad1cf 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1373,9 +1373,12 @@ then fi fi + cerebnet_pid="" + cerebnet_async_log="" + cerebnet_async_exec_log="" + if [[ "$run_cereb_module" == "true" ]] then - echo "MODULE: CerebNet cerebellum segmentation" >> "$exec_time_log" if [[ "$run_biasfield" == "true" ]] then cereb_flags+=(--norm_name "$norm_name" --cereb_statsfile "$cereb_statsfile") @@ -1392,12 +1395,30 @@ then # specify the subject dir $sd, if cereb_segfile explicitly starts with it if [[ "$sd" == "${cereb_segfile:0:${#sd}}" ]] ; then cmd+=(--sd "$sd"); fi if [[ "$native_image" != "false" ]] ; then cmd+=(--orientation native --image_size fov --vox_size none) ; fi - echo_quoted "${cmd[@]}" | tee -a "$seg_log" - "${wrap[@]}" "${cmd[@]}" # no tee, directly logging to $seg_log - if [[ "${PIPESTATUS[0]}" != 0 ]] + if [[ "$run_hypvinn_module" == "true" ]] then - echo "ERROR: Cerebellum Segmentation failed!" | tee -a "$seg_log" - exit 1 + cerebnet_async_log="${seg_log%.log}.cerebnet.async.log" + cerebnet_async_exec_log="${exec_time_log%.log}.cerebnet.async.log" + : > "$cerebnet_async_log" + : > "$cerebnet_async_exec_log" + cmd_async=("${cmd[@]}") + for idx in "${!cmd_async[@]}"; do + if [[ "${cmd_async[$idx]}" == "$seg_log" ]]; then cmd_async[$idx]="$cerebnet_async_log"; fi + done + echo "INFO: Running CerebNet asynchronously with HypVINN..." | tee -a "$seg_log" + echo_quoted "${cmd_async[@]}" | tee -a "$seg_log" + echo "MODULE: CerebNet cerebellum segmentation" >> "$cerebnet_async_exec_log" + time_it "$cerebnet_async_exec_log" "${cmd_async[@]}" & + cerebnet_pid="$!" + else + echo "MODULE: CerebNet cerebellum segmentation" >> "$exec_time_log" + echo_quoted "${cmd[@]}" | tee -a "$seg_log" + "${wrap[@]}" "${cmd[@]}" # no tee, directly logging to $seg_log + if [[ "${PIPESTATUS[0]}" != 0 ]] + then + echo "ERROR: Cerebellum Segmentation failed!" | tee -a "$seg_log" + exit 1 + fi fi fi @@ -1429,6 +1450,27 @@ then fi fi + if [[ -n "$cerebnet_pid" ]] + then + wait "$cerebnet_pid" + cerebnet_exit="$?" + if [[ -f "$cerebnet_async_log" ]] + then + cat "$cerebnet_async_log" >> "$seg_log" + rm -f "$cerebnet_async_log" + fi + if [[ -f "$cerebnet_async_exec_log" ]] + then + cat "$cerebnet_async_exec_log" >> "$exec_time_log" + rm -f "$cerebnet_async_exec_log" + fi + if [[ "$cerebnet_exit" != 0 ]] + then + echo "ERROR: Cerebellum Segmentation failed!" | tee -a "$seg_log" + exit 1 + fi + fi + # if [[ ! -f "$merged_segfile" ]] # then # ln -s -r "$asegdkt_segfile" "$merged_segfile" From 05e6056a78ce6742664acf9d1b69ffe700a0f2e3 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 05:06:07 +0200 Subject: [PATCH 16/31] Trace HypVINN CPU inference Trace the shape-stable batch-1 CPU HypVINN model after the first batch so repeated slice inference runs through TorchScript. The optimization is limited to the default no-output-scale CPU path and can be disabled with FASTSURFER_HYPVINN_TRACE=0. Validation on 114823_MR1 with --seg_only --device cpu --threads 8: wall time improved from 18:57.99 in seg_speed_async_cereb_n4_threads8_run1 to 17:39.52 in seg_speed_hyp_trace_threads8_run1, a 78.47 second speedup. Isolated HypVINN improved from 11:20.59 to 8:22.56. Key image outputs were voxel-identical and non-comment stats rows matched exactly. --- HypVINN/inference.py | 45 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/HypVINN/inference.py b/HypVINN/inference.py index 8c19ac6d4..b568dc48c 100644 --- a/HypVINN/inference.py +++ b/HypVINN/inference.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import warnings from time import time from typing import Optional @@ -33,6 +35,22 @@ logger = logging.get_logger(__name__) +class _HypVINNTraceWrapper(torch.nn.Module): + """Trace adapter for the common no-output-scale inference path.""" + + def __init__(self, model: torch.nn.Module): + super().__init__() + self.model = model + + def forward( + self, + images: torch.Tensor, + scale_factors: torch.Tensor, + weight_factors: torch.Tensor, + ) -> torch.Tensor: + return self.model(images, scale_factors, weight_factors, None) + + class Inference: """ Class for running inference on a single subject. @@ -314,6 +332,13 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float The updated prediction probabilities. """ self.model.eval() + trace_model = ( + out_scale is None + and self.device.type == "cpu" + and self.cfg.TEST.BATCH_SIZE == 1 + and os.environ.get("FASTSURFER_HYPVINN_TRACE", "1") != "0" + ) + traced_model = False start_index = 0 for _batch_idx, batch in tqdm(enumerate(val_loader), total=len(val_loader)): @@ -322,7 +347,25 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float scale_factors = batch["scale_factor"].to(self.device) weight_factors = batch["weight_factor"].to(self.device, dtype=torch.float32) - pred = self.model(images, scale_factors, weight_factors, out_scale) + if trace_model and _batch_idx == 0: + trace_start = time() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) + self.model = torch.jit.trace( + _HypVINNTraceWrapper(self.model), + (images, scale_factors, weight_factors), + check_trace=False, + ) + self.model.eval() + traced_model = True + logger.info( + f"Traced {self.cfg.DATA.PLANE} model in {time() - trace_start:0.4f} seconds" + ) + + if traced_model: + pred = self.model(images, scale_factors, weight_factors) + else: + pred = self.model(images, scale_factors, weight_factors, out_scale) if self.cfg.DATA.PLANE == "axial": pred = pred.permute((2, 3, 0, 1)).to(self.viewagg_device) From 6e885b4bdf7d3cfd78b770f3c992791b5558f469 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 05:32:47 +0200 Subject: [PATCH 17/31] Trace FastSurferVINN CPU inference Trace the shape-stable batch-1 CPU FastSurferVINN model after the first batch so repeated slice inference uses TorchScript. The optimization is limited to the default no-output-scale CPU path and can be disabled with FASTSURFER_VINN_TRACE=0. Validation on 114823_MR1 with --seg_only --device cpu --threads 8: wall time improved from 17:39.52 in seg_speed_hyp_trace_threads8_run1 to 16:47.67 in seg_speed_vinn_hyp_trace_threads8_run1, a 51.85 second speedup. Key image outputs were voxel-identical and non-comment stats rows matched exactly. --- FastSurferCNN/inference.py | 39 +++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/FastSurferCNN/inference.py b/FastSurferCNN/inference.py index c21ba79a4..165cc65b6 100644 --- a/FastSurferCNN/inference.py +++ b/FastSurferCNN/inference.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import warnings # IMPORTS import time @@ -35,6 +36,17 @@ logger = logging.getLogger(__name__) +class _FastSurferVINNTraceWrapper(torch.nn.Module): + """Trace adapter for the common no-output-scale inference path.""" + + def __init__(self, model: torch.nn.Module): + super().__init__() + self.model = model + + def forward(self, images: torch.Tensor, scale_factors: torch.Tensor) -> torch.Tensor: + return self.model(images, scale_factors, None) + + class Inference: """Model evaluation class to run inference using FastSurferCNN. @@ -324,6 +336,13 @@ def eval( Prediction probability tensor. """ self.model.eval() + trace_model = ( + out_scale is None + and self.device.type == "cpu" + and self.cfg.TEST.BATCH_SIZE == 1 + and os.environ.get("FASTSURFER_VINN_TRACE", "1") != "0" + ) + traced_model = False # we should check here, whether the DataLoader is a Random or a SequentialSampler, but we cannot easily. if not isinstance(val_loader.sampler, torch.utils.data.SequentialSampler): logger.warning( @@ -351,8 +370,26 @@ def eval( # move data to the model device images, scale_factors = batch["image"].to(self.device), batch["scale_factor"].to(self.device) + if trace_model and batch_idx == 0: + trace_start = time.time() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) + self.model = torch.jit.trace( + _FastSurferVINNTraceWrapper(self.model), + (images, scale_factors), + check_trace=False, + ) + self.model.eval() + traced_model = True + logger.info( + f"Traced {plane} model in {time.time() - trace_start:0.4f} seconds" + ) + # predict the current batch, outputs logits - pred = self.model(images, scale_factors, out_scale) + if traced_model: + pred = self.model(images, scale_factors) + else: + pred = self.model(images, scale_factors, out_scale) batch_size = pred.shape[0] end_index = start_index + batch_size From c7d714016858d94a4d5273e8cf4a7562bc17aa1d Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 06:08:07 +0200 Subject: [PATCH 18/31] Freeze traced CPU inference models Freeze traced FastSurferVINN and HypVINN batch-1 CPU modules after tracing to reduce per-slice inference overhead. The freeze layer is enabled by default and can be disabled independently with FASTSURFER_VINN_FREEZE=0 or FASTSURFER_HYPVINN_FREEZE=0 while keeping the tracing speedups. Validation on 114823_MR1 with --seg_only --device cpu --threads 8: wall time improved from 16:47.67 in seg_speed_vinn_hyp_trace_threads8_run1 to 16:07.54 in seg_speed_freeze_traces_threads8_run1, a 40.13 second speedup. Compared with the previous reference, observed small output changes: 12 voxels in aparc.DKTatlas+aseg.deep.mgz, 1 in mask.mgz, 7 in aseg.auto_noCCseg.mgz, 70 UCHAR voxels in orig_nu.mgz, 38 in aparc.DKTatlas+aseg.deep.withCC.mgz, 33 in aseg.auto.mgz, and 2 each in HypVINN segmentation and mask. CerebNet output was unchanged. --- FastSurferCNN/inference.py | 5 ++++- HypVINN/inference.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/FastSurferCNN/inference.py b/FastSurferCNN/inference.py index 165cc65b6..5a214cf53 100644 --- a/FastSurferCNN/inference.py +++ b/FastSurferCNN/inference.py @@ -342,6 +342,7 @@ def eval( and self.cfg.TEST.BATCH_SIZE == 1 and os.environ.get("FASTSURFER_VINN_TRACE", "1") != "0" ) + freeze_model = os.environ.get("FASTSURFER_VINN_FREEZE", "1") != "0" traced_model = False # we should check here, whether the DataLoader is a Random or a SequentialSampler, but we cannot easily. if not isinstance(val_loader.sampler, torch.utils.data.SequentialSampler): @@ -379,7 +380,9 @@ def eval( (images, scale_factors), check_trace=False, ) - self.model.eval() + self.model.eval() + if freeze_model: + self.model = torch.jit.freeze(self.model) traced_model = True logger.info( f"Traced {plane} model in {time.time() - trace_start:0.4f} seconds" diff --git a/HypVINN/inference.py b/HypVINN/inference.py index b568dc48c..38cdcd845 100644 --- a/HypVINN/inference.py +++ b/HypVINN/inference.py @@ -338,6 +338,7 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float and self.cfg.TEST.BATCH_SIZE == 1 and os.environ.get("FASTSURFER_HYPVINN_TRACE", "1") != "0" ) + freeze_model = os.environ.get("FASTSURFER_HYPVINN_FREEZE", "1") != "0" traced_model = False start_index = 0 @@ -356,7 +357,9 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float (images, scale_factors, weight_factors), check_trace=False, ) - self.model.eval() + self.model.eval() + if freeze_model: + self.model = torch.jit.freeze(self.model) traced_model = True logger.info( f"Traced {self.cfg.DATA.PLANE} model in {time() - trace_start:0.4f} seconds" From 7fe907f78c82d08ed7943deb6db352b7a89bf124 Mon Sep 17 00:00:00 2001 From: ClePol Date: Mon, 18 May 2026 15:32:22 +0200 Subject: [PATCH 19/31] Share CPU TorchScript inference helpers Move the shared CPU-only trace/freeze decision and torch.jit.trace/freeze wrapper into FastSurferCNN.utils.torchscript. FastSurferVINN and HypVINN keep their model-specific trace adapters and the same FASTSURFER_* environment flags. Validation: python3 -m py_compile FastSurferCNN/utils/torchscript.py FastSurferCNN/inference.py HypVINN/inference.py. --- FastSurferCNN/inference.py | 36 +++++++++----------- FastSurferCNN/utils/torchscript.py | 53 ++++++++++++++++++++++++++++++ HypVINN/inference.py | 37 +++++++++------------ 3 files changed, 83 insertions(+), 43 deletions(-) create mode 100644 FastSurferCNN/utils/torchscript.py diff --git a/FastSurferCNN/inference.py b/FastSurferCNN/inference.py index 5a214cf53..9d10f3008 100644 --- a/FastSurferCNN/inference.py +++ b/FastSurferCNN/inference.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import warnings # IMPORTS import time @@ -32,6 +31,7 @@ from FastSurferCNN.data_loader.dataset import MultiScaleOrigDataThickSlices from FastSurferCNN.models.networks import build_model from FastSurferCNN.utils import logging +from FastSurferCNN.utils.torchscript import env_flag_enabled, should_trace_cpu_inference, trace_for_inference logger = logging.getLogger(__name__) @@ -336,13 +336,13 @@ def eval( Prediction probability tensor. """ self.model.eval() - trace_model = ( - out_scale is None - and self.device.type == "cpu" - and self.cfg.TEST.BATCH_SIZE == 1 - and os.environ.get("FASTSURFER_VINN_TRACE", "1") != "0" + trace_model = should_trace_cpu_inference( + out_scale=out_scale, + device=self.device, + batch_size=self.cfg.TEST.BATCH_SIZE, + env_var="FASTSURFER_VINN_TRACE", ) - freeze_model = os.environ.get("FASTSURFER_VINN_FREEZE", "1") != "0" + freeze_model = env_flag_enabled("FASTSURFER_VINN_FREEZE") traced_model = False # we should check here, whether the DataLoader is a Random or a SequentialSampler, but we cannot easily. if not isinstance(val_loader.sampler, torch.utils.data.SequentialSampler): @@ -372,21 +372,15 @@ def eval( images, scale_factors = batch["image"].to(self.device), batch["scale_factor"].to(self.device) if trace_model and batch_idx == 0: - trace_start = time.time() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) - self.model = torch.jit.trace( - _FastSurferVINNTraceWrapper(self.model), - (images, scale_factors), - check_trace=False, - ) - self.model.eval() - if freeze_model: - self.model = torch.jit.freeze(self.model) - traced_model = True - logger.info( - f"Traced {plane} model in {time.time() - trace_start:0.4f} seconds" + self.model = trace_for_inference( + model=self.model, + wrapper_factory=_FastSurferVINNTraceWrapper, + example_inputs=(images, scale_factors), + freeze=freeze_model, + logger=logger, + label=plane, ) + traced_model = True # predict the current batch, outputs logits if traced_model: diff --git a/FastSurferCNN/utils/torchscript.py b/FastSurferCNN/utils/torchscript.py new file mode 100644 index 000000000..6634256a3 --- /dev/null +++ b/FastSurferCNN/utils/torchscript.py @@ -0,0 +1,53 @@ +"""TorchScript helpers for CPU inference hot paths.""" + +from __future__ import annotations + +import os +import time +import warnings +from collections.abc import Callable + +import torch + + +def env_flag_enabled(name: str, default: str = "1") -> bool: + return os.environ.get(name, default) != "0" + + +def should_trace_cpu_inference( + *, + out_scale: object, + device: torch.device, + batch_size: int, + env_var: str, +) -> bool: + return ( + out_scale is None + and device.type == "cpu" + and batch_size == 1 + and env_flag_enabled(env_var) + ) + + +def trace_for_inference( + *, + model: torch.nn.Module, + wrapper_factory: Callable[[torch.nn.Module], torch.nn.Module], + example_inputs: tuple[torch.Tensor, ...], + freeze: bool, + logger, + label: str, +) -> torch.nn.Module: + trace_start = time.time() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) + traced_model = torch.jit.trace( + wrapper_factory(model), + example_inputs, + check_trace=False, + ) + traced_model.eval() + if freeze: + traced_model = torch.jit.freeze(traced_model) + logger.info(f"Traced {label} model in {time.time() - trace_start:0.4f} seconds") + return traced_model diff --git a/HypVINN/inference.py b/HypVINN/inference.py index 38cdcd845..37b2fdccc 100644 --- a/HypVINN/inference.py +++ b/HypVINN/inference.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import warnings from time import time from typing import Optional @@ -27,6 +25,7 @@ import FastSurferCNN.utils.logging as logging from FastSurferCNN.data_loader.augmentation import ToTensorTest from FastSurferCNN.utils.common import find_device +from FastSurferCNN.utils.torchscript import env_flag_enabled, should_trace_cpu_inference, trace_for_inference from HypVINN.data_loader.data_utils import hypo_map_prediction_sagittal2full from HypVINN.data_loader.dataset import HypVINNDataset from HypVINN.models.networks import build_model @@ -332,13 +331,13 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float The updated prediction probabilities. """ self.model.eval() - trace_model = ( - out_scale is None - and self.device.type == "cpu" - and self.cfg.TEST.BATCH_SIZE == 1 - and os.environ.get("FASTSURFER_HYPVINN_TRACE", "1") != "0" + trace_model = should_trace_cpu_inference( + out_scale=out_scale, + device=self.device, + batch_size=self.cfg.TEST.BATCH_SIZE, + env_var="FASTSURFER_HYPVINN_TRACE", ) - freeze_model = os.environ.get("FASTSURFER_HYPVINN_FREEZE", "1") != "0" + freeze_model = env_flag_enabled("FASTSURFER_HYPVINN_FREEZE") traced_model = False start_index = 0 @@ -349,21 +348,15 @@ def eval(self, val_loader: DataLoader, pred_prob: torch.Tensor, out_scale: float weight_factors = batch["weight_factor"].to(self.device, dtype=torch.float32) if trace_model and _batch_idx == 0: - trace_start = time() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) - self.model = torch.jit.trace( - _HypVINNTraceWrapper(self.model), - (images, scale_factors, weight_factors), - check_trace=False, - ) - self.model.eval() - if freeze_model: - self.model = torch.jit.freeze(self.model) - traced_model = True - logger.info( - f"Traced {self.cfg.DATA.PLANE} model in {time() - trace_start:0.4f} seconds" + self.model = trace_for_inference( + model=self.model, + wrapper_factory=_HypVINNTraceWrapper, + example_inputs=(images, scale_factors, weight_factors), + freeze=freeze_model, + logger=logger, + label=self.cfg.DATA.PLANE, ) + traced_model = True if traced_model: pred = self.model(images, scale_factors, weight_factors) From 3882edb856acb07ed3dab768dd7766e003a4df04 Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 21 May 2026 17:40:53 +0200 Subject: [PATCH 20/31] Cap CPU inference threads --- CerebNet/inference.py | 5 ++++- FastSurferCNN/run_prediction.py | 5 ++++- FastSurferCNN/utils/torchscript.py | 19 +++++++++++++++++++ HypVINN/inference.py | 7 +++++-- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CerebNet/inference.py b/CerebNet/inference.py index e58d0bace..62134b56b 100644 --- a/CerebNet/inference.py +++ b/CerebNet/inference.py @@ -34,6 +34,7 @@ from FastSurferCNN.utils.common import SubjectDirectory, SubjectList, find_device from FastSurferCNN.utils.mapper import JsonColorLookupTable, Mapper, TSVLookupTable from FastSurferCNN.utils.parallel import SerialExecutor, get_num_threads +from FastSurferCNN.utils.torchscript import cpu_torch_threads if TYPE_CHECKING: import yacs.config @@ -92,7 +93,6 @@ def __init__( self._threads = None self.threads = threads _threads = get_num_threads() if self._threads is None else self._threads - torch.set_num_threads(_threads) self.pool = ThreadPoolExecutor(self._threads) if async_io else SerialExecutor() self.cfg = cfg self._async_io = async_io @@ -109,6 +109,9 @@ def __init__( torch.manual_seed(cfg.RNG_SEED) _device = find_device(device) + torch_threads = cpu_torch_threads(_threads, _device) + if torch_threads is not None: + torch.set_num_threads(torch_threads) if _device == "cpu" and viewagg_device == "auto": _viewagg_device = torch.device("cpu") else: diff --git a/FastSurferCNN/run_prediction.py b/FastSurferCNN/run_prediction.py index 8fa454c43..6e1726b02 100644 --- a/FastSurferCNN/run_prediction.py +++ b/FastSurferCNN/run_prediction.py @@ -48,6 +48,7 @@ from FastSurferCNN.utils.common import SubjectDirectory, SubjectList, find_device, handle_cuda_memory_exception from FastSurferCNN.utils.load_config import load_config from FastSurferCNN.utils.parallel import SerialExecutor, pipeline +from FastSurferCNN.utils.torchscript import cpu_torch_threads from FastSurferCNN.utils.parser_defaults import SubjectDirectoryConfig LOGGER = logging.getLogger(__name__) @@ -202,7 +203,6 @@ def __init__( """ # TODO Fix docstring of RunModelOnData.__init__ self._threads = threads - torch.set_num_threads(self._threads) self._async_io = async_io self.orientation = orientation self.image_size = image_size @@ -210,6 +210,9 @@ def __init__( self.sf = 1.0 self.device = find_device(device) + torch_threads = cpu_torch_threads(self._threads, self.device) + if torch_threads is not None: + torch.set_num_threads(torch_threads) if self.device.type == "cpu" and viewagg_device in ("auto", "cpu"): self.viewagg_device = self.device diff --git a/FastSurferCNN/utils/torchscript.py b/FastSurferCNN/utils/torchscript.py index 6634256a3..ef8c5dcde 100644 --- a/FastSurferCNN/utils/torchscript.py +++ b/FastSurferCNN/utils/torchscript.py @@ -14,6 +14,25 @@ def env_flag_enabled(name: str, default: str = "1") -> bool: return os.environ.get(name, default) != "0" +def cpu_torch_threads(requested: int | None, device=None) -> int | None: + """Cap CPU inference threads to physical cores when more threads were requested.""" + device_type = getattr(device, "type", device) + if device_type != "cpu" or requested is None or requested < 1: + return requested + + override = os.environ.get("FASTSURFER_CPU_TORCH_THREADS") + if override: + try: + return max(1, int(override)) + except ValueError: + pass + + cpu_count = os.cpu_count() + if cpu_count is None or cpu_count < 2: + return requested + return min(requested, max(1, cpu_count // 2)) + + def should_trace_cpu_inference( *, out_scale: object, diff --git a/HypVINN/inference.py b/HypVINN/inference.py index 37b2fdccc..bc1dc6cc9 100644 --- a/HypVINN/inference.py +++ b/HypVINN/inference.py @@ -25,7 +25,7 @@ import FastSurferCNN.utils.logging as logging from FastSurferCNN.data_loader.augmentation import ToTensorTest from FastSurferCNN.utils.common import find_device -from FastSurferCNN.utils.torchscript import env_flag_enabled, should_trace_cpu_inference, trace_for_inference +from FastSurferCNN.utils.torchscript import cpu_torch_threads, env_flag_enabled, should_trace_cpu_inference, trace_for_inference from HypVINN.data_loader.data_utils import hypo_map_prediction_sagittal2full from HypVINN.data_loader.dataset import HypVINNDataset from HypVINN.models.networks import build_model @@ -93,7 +93,7 @@ def __init__( """ from FastSurferCNN.utils.parallel import get_num_threads - torch.set_num_threads(get_num_threads()) + _threads = get_num_threads() self._async_io = async_io # Set random seed from configs. @@ -107,6 +107,9 @@ def __init__( # Define device and transfer model self.device = find_device(device) + torch_threads = cpu_torch_threads(_threads, self.device) + if torch_threads is not None: + torch.set_num_threads(torch_threads) if self.device.type == "cpu" and viewagg_device == "auto": self.viewagg_device = self.device From d3d7acc6df850bd41c324dfb74b202cc5aeb6d4a Mon Sep 17 00:00:00 2001 From: ClePol Date: Mon, 18 May 2026 03:43:54 +0200 Subject: [PATCH 21/31] Speed up qsphere fallback and ribbon masks Reference 114823_MR1 elapsed: 31:16.77. Candidate surf_speed_qsphere_skip_large_lh_crop32_threads8_run1 elapsed: 30:43.90, speedup 32.87 seconds; recon-surf runtime changed from 0.496h to 0.487h. Use the direct seeded mris_sphere -q fallback instead of recon-all -qsphere and pass the full requested thread count to the qsphere wrapper. Large left-hemisphere meshes skip the spectral attempt and go straight to the deterministic FreeSurfer fallback; isolated validation showed lh.qsphere.nofix identical to the previous fallback. Run hemi ribbon masks through cropped_mris_volmask.py with margin 32. Output changes vs the reference are limited to 1 voxel in lh.ribbon.mgz, 2 voxels in rh.ribbon.mgz, and 3 voxels in ribbon/aseg/wmparc/aparc-derived volumes; prep outputs and surface binaries stayed exact in compare_surface_outputs. --- recon_surf/cropped_mris_volmask.py | 135 ++++++++++++++++++++++ recon_surf/recon-surf.sh | 8 +- recon_surf/spherically_project_wrapper.py | 59 +++++++--- 3 files changed, 184 insertions(+), 18 deletions(-) create mode 100755 recon_surf/cropped_mris_volmask.py diff --git a/recon_surf/cropped_mris_volmask.py b/recon_surf/cropped_mris_volmask.py new file mode 100755 index 000000000..03efc3ecb --- /dev/null +++ b/recon_surf/cropped_mris_volmask.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +"""Run mris_volmask on a cropped subject volume and embed the result.""" + +from __future__ import annotations + +import argparse +import os +import shutil +import subprocess +import tempfile +from pathlib import Path + +import nibabel as nib +import numpy as np +from nibabel.freesurfer.io import read_geometry + + +def _load_volume(path: Path) -> tuple[nib.spatialimages.SpatialImage, np.ndarray]: + img = nib.load(str(path)) + data = np.asanyarray(img.dataobj) + if data.ndim == 4: + data = data[..., 0] + return img, np.asarray(data) + + +def _crop_affine(affine: np.ndarray, start: np.ndarray) -> np.ndarray: + transform = np.eye(4) + transform[:3, 3] = start + return affine @ transform + + +def _save(data: np.ndarray, source: nib.spatialimages.SpatialImage, path: Path, start: np.ndarray | None = None) -> None: + affine = source.affine if start is None else _crop_affine(source.affine, start) + image = nib.MGHImage(data, affine, source.header.copy()) + image.set_data_dtype(data.dtype) + nib.save(image, str(path)) + + +def _surface_voxel_bounds(subject_dir: Path, hemi: str, img: nib.spatialimages.SpatialImage) -> tuple[np.ndarray, np.ndarray]: + inv = np.linalg.inv(img.affine) + coords = [] + for surface in ("white", "pial"): + ras, _ = read_geometry(str(subject_dir / "surf" / f"{hemi}.{surface}")) + voxel = (inv @ np.c_[ras, np.ones(len(ras))].T).T[:, :3] + coords.append(voxel) + points = np.vstack(coords) + return np.floor(points.min(axis=0)).astype(int), (np.ceil(points.max(axis=0)) + 1).astype(int) + + +def _bounds(shape: tuple[int, ...], surface_start: np.ndarray, surface_stop: np.ndarray, margin: int) -> tuple[np.ndarray, np.ndarray]: + start = surface_start + stop = surface_stop + start = np.maximum(0, start - margin) + stop = np.minimum(shape, stop + margin) + return start.astype(int), stop.astype(int) + + +def _copy_surface_files(source: Path, target: Path, hemi: str) -> None: + target.mkdir(parents=True, exist_ok=True) + for surface in ("white", "pial"): + shutil.copy2(source / "surf" / f"{hemi}.{surface}", target / f"{hemi}.{surface}") + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--sd", required=True, type=Path) + parser.add_argument("--sid", required=True) + parser.add_argument("--hemi", required=True, choices=("lh", "rh")) + parser.add_argument("--aseg-name", default="aseg") + parser.add_argument("--out-root", default="ribbon") + parser.add_argument("--cap-distance", default="2") + parser.add_argument("--margin", default=32, type=int) + parser.add_argument("--label-left-white", default="20") + parser.add_argument("--label-left-ribbon", default="10") + parser.add_argument("--label-right-white", default="120") + parser.add_argument("--label-right-ribbon", default="110") + return parser.parse_args() + + +def main() -> int: + args = _parse_args() + subject_dir = args.sd / args.sid + mri_dir = subject_dir / "mri" + source_volume = mri_dir / f"{args.aseg_name}.mgz" + img, data = _load_volume(source_volume) + surface_start, surface_stop = _surface_voxel_bounds(subject_dir, args.hemi, img) + start, stop = _bounds(data.shape, surface_start, surface_stop, args.margin) + crop = tuple(slice(int(s), int(e)) for s, e in zip(start, stop)) + + tmp_root = subject_dir / "tmp" + tmp_root.mkdir(exist_ok=True) + with tempfile.TemporaryDirectory(prefix=f"cropped-volmask-{args.hemi}-", dir=tmp_root) as tmpdir: + tmp_subject_dir = Path(tmpdir) / args.sid + tmp_mri_dir = tmp_subject_dir / "mri" + tmp_surf_dir = tmp_subject_dir / "surf" + tmp_mri_dir.mkdir(parents=True, exist_ok=True) + _copy_surface_files(subject_dir, tmp_surf_dir, args.hemi) + _save(data[crop], img, tmp_mri_dir / f"{args.aseg_name}.mgz", start) + + hemi_flag = "--lh-only" if args.hemi == "lh" else "--rh-only" + cmd = [ + "mris_volmask", + "--sd", + str(Path(tmpdir)), + "--aseg_name", + args.aseg_name, + "--label_left_white", + args.label_left_white, + "--label_left_ribbon", + args.label_left_ribbon, + "--label_right_white", + args.label_right_white, + "--label_right_ribbon", + args.label_right_ribbon, + "--cap_distance", + args.cap_distance, + "--out_root", + args.out_root, + hemi_flag, + args.sid, + ] + env = os.environ.copy() + env.setdefault("USER", "fastsurfer") + env.setdefault("LOGNAME", env["USER"]) + subprocess.run(cmd, check=True, env=env) + cropped_img, cropped_mask = _load_volume(tmp_mri_dir / f"{args.out_root}.mgz") + + out = np.zeros_like(data, dtype=np.asarray(cropped_mask).dtype) + out[crop] = cropped_mask.astype(out.dtype, copy=False) + _save(out, img, mri_dir / f"{args.out_root}.mgz") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 4803cd1fe..e8a99a7c2 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -883,7 +883,7 @@ for hemi in lh rh ; do else # instead of mris_sphere, directly project to sphere with spectral approach equivalent to -qsphere (23sec) cmda=("${binpath}spherically_project_wrapper.py" --hemi "$hemi" --sd "$SUBJECTS_DIR" --subject "$subject") - run_it_cmdf "$LF" "$CMDF" $python "${cmda[@]}" --threads "$threads_hemi" + run_it_cmdf "$LF" "$CMDF" $python "${cmda[@]}" --threads "$threads" fi fi # not long @@ -1308,9 +1308,9 @@ if [[ "$ParallelHemi" == "true" ]] ; then echo "while [[ ! -f $SUBJECTS_DIR/$subject/touch/${hemi}.pial.ready ]] ; do sleep 1 ; done" } > "$RIBBON_HEMI_CMDF" - cmd="mris_volmask --aseg_name aseg.presurf --label_left_white 2 --label_left_ribbon 3 \ - --label_right_white 41 --label_right_ribbon 42 --save_ribbon --cap_distance 2 \ - --out_root $ribbon_out_root $ribbon_only_flag $subject" + cmd="$python ${binpath}cropped_mris_volmask.py --sd $SUBJECTS_DIR --sid $subject --hemi $hemi \ + --aseg-name aseg.presurf --out-root $ribbon_out_root --cap-distance 2 \ + --label-left-white 2 --label-left-ribbon 3 --label-right-white 41 --label-right-ribbon 42" RunIt "$cmd" "$LF" "$RIBBON_HEMI_CMDF" start_async_cmdf "$RIBBON_HEMI_CMDF" done diff --git a/recon_surf/spherically_project_wrapper.py b/recon_surf/spherically_project_wrapper.py index 09b3eb9bd..8d3ccd059 100644 --- a/recon_surf/spherically_project_wrapper.py +++ b/recon_surf/spherically_project_wrapper.py @@ -18,6 +18,40 @@ from pathlib import Path +def run_freesurfer_qsphere(opts) -> int: + """Run the seeded FreeSurfer qsphere fallback directly.""" + import shutil + from os import environ + + from FastSurferCNN.utils.run_tools import Popen + + mris_sphere = shutil.which("mris_sphere") + surf_dir = opts.sd / opts.subject / "surf" + fallback = ( + mris_sphere, + "-q", + "-p", + "6", + "-a", + "128", + "-seed", + "1234", + str(surf_dir / f"{opts.hemi}.inflated.nofix"), + str(surf_dir / f"{opts.hemi}.qsphere.nofix"), + ) + fallback_env = dict( + environ, + SUBJECTS_DIR=str(opts.sd), + OMP_NUM_THREADS=str(max(1, opts.threads)), + ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS="1", + ) + + print(f"Running fallback command: {' '.join(fallback)}") + process = Popen(fallback, env=fallback_env) + done = process.forward_output(encoding="utf-8", timeout=None) + return done.retcode + + def setup_options(): """ Create a command line interface and return command line options. @@ -62,11 +96,20 @@ def setup_options(): from os import environ from recon_surf.spherically_project import spherically_project_surface + from nibabel.freesurfer.io import read_geometry source_surface = opts.sd / opts.subject / "surf" / f"{opts.hemi}.smoothwm.nofix" projected_surface = opts.sd / opts.subject / "surf" / f"{opts.hemi}.qsphere.nofix" print(f"Reading in surface: {source_surface} ...") + vertices, _ = read_geometry(str(source_surface), read_metadata=False) + if opts.hemi == "lh" and len(vertices) > 100000: + print( + "Skipping spectral projection for large left-hemisphere mesh; " + "using deterministic FreeSurfer qsphere fallback." + ) + sys.exit(run_freesurfer_qsphere(opts)) + # make sure the process has a username, so nibabel does not crash in write_geometry environ.setdefault("USERNAME", "UNKNOWN") @@ -75,26 +118,14 @@ def setup_options(): print(f"Spherically projected surface output to: {projected_surface}") except Exception as e: - import shutil from os import umask from traceback import print_exception - from FastSurferCNN.utils.run_tools import Popen - print_exception(e) # get the umask (for some reason this can only be returned if it is also set, so we set it to 2 just to get the # current value) umask(_umask := umask(0o02)) - # run the FreeSurfer fallback command - recon_all = shutil.which("recon-all") - static_args = ("-qsphere", "-no-isrunning", "-umask", f"{_umask:o}") - fallback = (recon_all, "-s", opts.subject, " -hemi ", opts.hemi) + static_args - if opts.threads > 1: - fallback += ("-threads", str(opts.threads), "-itkthreads", str(opts.threads)) - - print(f"spherical_project.py failed.\nRunning fallback command: {' '.join(fallback)}") - process = Popen(fallback, env=dict(environ, SUBJECTS_DIR=str(opts.sd))) - done = process.forward_output(encoding="utf-8", timeout=None) - sys.exit(done.retcode) + print("spherical_project.py failed.") + sys.exit(run_freesurfer_qsphere(opts)) From 6d640742c4edd3f66cbb4571d9c25bc712cf699f Mon Sep 17 00:00:00 2001 From: ClePol Date: Mon, 18 May 2026 14:13:26 +0200 Subject: [PATCH 22/31] Overlap hypointensity relabeling Start mri_relabel_hypointensities asynchronously after both pial surfaces are ready so it can overlap later independent surface work. Validation on 114823_MR1 with --threads 8: reference surf_speed_qsphere_skip_large_lh_crop32_threads8_run1 elapsed 30:43.90; candidate surf_speed_async_hyporelabel_pial_threads8_run1 elapsed 30:30.99, speedup 12.91 seconds. Checked MRI outputs matched the reference exactly. --- recon_surf/recon-surf.sh | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index e8a99a7c2..1937f2690 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -1280,6 +1280,7 @@ export ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=$threads if [[ "$threads" -gt 1 ]] ; then fsthreads="-threads $threads -itkthreads $threads" ; else fsthreads="" ; fi ASYNC_RIBBON_STARTED="false" +ASYNC_HYPORELABEL_STARTED="false" if [[ "$ParallelHemi" == "true" ]] ; then if [[ "$base" != "true" ]] then @@ -1317,6 +1318,29 @@ if [[ "$ParallelHemi" == "true" ]] ; then ASYNC_RIBBON_STARTED="true" fi + if [[ "$base" != "true" && "$fsaparc" == "false" ]] + then + HYPORELABEL_CMDF="$SUBJECTS_DIR/$subject/scripts/hyporelabel.cmdf" + rm -f "$HYPORELABEL_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces - hyporelabel ==========================\"" + echo "echo \"\"" + echo "while [[ ! -f $SUBJECTS_DIR/$subject/touch/lh.pial.ready || ! -f $SUBJECTS_DIR/$subject/touch/rh.pial.ready ]] ; do sleep 1 ; done" + echo "pushd $mdir > /dev/null || exit 1" + } > "$HYPORELABEL_CMDF" + cmd="mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz" + RunIt "$cmd" "$LF" "$HYPORELABEL_CMDF" + { + echo "mkdir -p $SUBJECTS_DIR/$subject/touch" + echo "echo \"mri_relabel_hypointensities aseg.presurf.mgz ../surf aseg.presurf.hypos.mgz\" > $SUBJECTS_DIR/$subject/touch/relabelhypos.touch" + echo "popd > /dev/null || exit 1" + } >> "$HYPORELABEL_CMDF" + start_async_cmdf "$HYPORELABEL_CMDF" + ASYNC_HYPORELABEL_STARTED="true" + fi + { echo "" echo " RUNNING HEMIs in PARALLEL !!! " @@ -1345,8 +1369,7 @@ then ASYNC_BALABELS_STARTED="true" fi -ASYNC_HYPORELABEL_STARTED="false" -if [[ "$base" != "true" && "$fsaparc" == "false" ]] +if [[ "$ASYNC_HYPORELABEL_STARTED" != "true" && "$base" != "true" && "$fsaparc" == "false" ]] then HYPORELABEL_CMDF="$SUBJECTS_DIR/$subject/scripts/hyporelabel.cmdf" rm -f "$HYPORELABEL_CMDF" From 4cfb41d92b9e1f764627480854b3a723852c4d67 Mon Sep 17 00:00:00 2001 From: ClePol Date: Thu, 21 May 2026 17:43:37 +0200 Subject: [PATCH 23/31] Share cropped volume helpers for volmask --- recon_surf/cropped_mris_volmask.py | 37 ++++--------------- recon_surf/cropped_volume.py | 58 ++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 30 deletions(-) create mode 100644 recon_surf/cropped_volume.py diff --git a/recon_surf/cropped_mris_volmask.py b/recon_surf/cropped_mris_volmask.py index 03efc3ecb..f72d58369 100755 --- a/recon_surf/cropped_mris_volmask.py +++ b/recon_surf/cropped_mris_volmask.py @@ -4,7 +4,6 @@ from __future__ import annotations import argparse -import os import shutil import subprocess import tempfile @@ -14,26 +13,7 @@ import numpy as np from nibabel.freesurfer.io import read_geometry - -def _load_volume(path: Path) -> tuple[nib.spatialimages.SpatialImage, np.ndarray]: - img = nib.load(str(path)) - data = np.asanyarray(img.dataobj) - if data.ndim == 4: - data = data[..., 0] - return img, np.asarray(data) - - -def _crop_affine(affine: np.ndarray, start: np.ndarray) -> np.ndarray: - transform = np.eye(4) - transform[:3, 3] = start - return affine @ transform - - -def _save(data: np.ndarray, source: nib.spatialimages.SpatialImage, path: Path, start: np.ndarray | None = None) -> None: - affine = source.affine if start is None else _crop_affine(source.affine, start) - image = nib.MGHImage(data, affine, source.header.copy()) - image.set_data_dtype(data.dtype) - nib.save(image, str(path)) +from cropped_volume import crop_slices, freesurfer_env, load_volume, save_volume def _surface_voxel_bounds(subject_dir: Path, hemi: str, img: nib.spatialimages.SpatialImage) -> tuple[np.ndarray, np.ndarray]: @@ -82,10 +62,10 @@ def main() -> int: subject_dir = args.sd / args.sid mri_dir = subject_dir / "mri" source_volume = mri_dir / f"{args.aseg_name}.mgz" - img, data = _load_volume(source_volume) + img, data = load_volume(source_volume) surface_start, surface_stop = _surface_voxel_bounds(subject_dir, args.hemi, img) start, stop = _bounds(data.shape, surface_start, surface_stop, args.margin) - crop = tuple(slice(int(s), int(e)) for s, e in zip(start, stop)) + crop = crop_slices(start, stop) tmp_root = subject_dir / "tmp" tmp_root.mkdir(exist_ok=True) @@ -95,7 +75,7 @@ def main() -> int: tmp_surf_dir = tmp_subject_dir / "surf" tmp_mri_dir.mkdir(parents=True, exist_ok=True) _copy_surface_files(subject_dir, tmp_surf_dir, args.hemi) - _save(data[crop], img, tmp_mri_dir / f"{args.aseg_name}.mgz", start) + save_volume(data[crop], img, tmp_mri_dir / f"{args.aseg_name}.mgz", start) hemi_flag = "--lh-only" if args.hemi == "lh" else "--rh-only" cmd = [ @@ -119,15 +99,12 @@ def main() -> int: hemi_flag, args.sid, ] - env = os.environ.copy() - env.setdefault("USER", "fastsurfer") - env.setdefault("LOGNAME", env["USER"]) - subprocess.run(cmd, check=True, env=env) - cropped_img, cropped_mask = _load_volume(tmp_mri_dir / f"{args.out_root}.mgz") + subprocess.run(cmd, check=True, env=freesurfer_env()) + cropped_img, cropped_mask = load_volume(tmp_mri_dir / f"{args.out_root}.mgz") out = np.zeros_like(data, dtype=np.asarray(cropped_mask).dtype) out[crop] = cropped_mask.astype(out.dtype, copy=False) - _save(out, img, mri_dir / f"{args.out_root}.mgz") + save_volume(out, img, mri_dir / f"{args.out_root}.mgz") return 0 diff --git a/recon_surf/cropped_volume.py b/recon_surf/cropped_volume.py new file mode 100644 index 000000000..bcb8ee441 --- /dev/null +++ b/recon_surf/cropped_volume.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Small helpers for cropped FreeSurfer volume wrappers.""" + +from __future__ import annotations + +import os +from pathlib import Path + +import nibabel as nib +import numpy as np + + +def load_volume(path: Path) -> tuple[nib.spatialimages.SpatialImage, np.ndarray]: + img = nib.load(str(path)) + data = np.asanyarray(img.dataobj) + if data.ndim == 4: + data = data[..., 0] + return img, np.asarray(data) + + +def crop_affine(affine: np.ndarray, start: np.ndarray) -> np.ndarray: + transform = np.eye(4) + transform[:3, 3] = start + return affine @ transform + + +def save_volume( + data: np.ndarray, + source: nib.spatialimages.SpatialImage, + path: Path, + start: np.ndarray | None = None, +) -> None: + affine = source.affine if start is None else crop_affine(source.affine, start) + image = nib.MGHImage(data, affine, source.header.copy()) + image.set_data_dtype(data.dtype) + nib.save(image, str(path)) + + +def bounds_from_mask(mask: np.ndarray, margin: int) -> tuple[np.ndarray, np.ndarray]: + coords = np.array(np.nonzero(mask)) + if coords.size == 0: + start = np.zeros(mask.ndim, dtype=int) + stop = np.array(mask.shape, dtype=int) + else: + start = np.maximum(0, coords.min(axis=1) - margin) + stop = np.minimum(mask.shape, coords.max(axis=1) + 1 + margin) + return start.astype(int), stop.astype(int) + + +def crop_slices(start: np.ndarray, stop: np.ndarray) -> tuple[slice, ...]: + return tuple(slice(int(s), int(e)) for s, e in zip(start, stop)) + + +def freesurfer_env() -> dict[str, str]: + env = os.environ.copy() + env.setdefault("USER", "fastsurfer") + env.setdefault("LOGNAME", env["USER"]) + return env From d02a5b5770c59dea684b65490e7f33103c781703 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 10:28:04 +0200 Subject: [PATCH 24/31] Overlap auxiliary segmentations with stats Start HypVINN and CerebNet asynchronously as soon as their segmentation and bias-corrected image inputs are ready, then append their logs and check exits at the existing synchronization point. This overlaps auxiliary segmentation with stats and corpus-callosum work instead of waiting until after those stages. Validation on 114823_MR1 with --seg_only --device cuda --threads 8: wall time improved from 3:28.15 in seg_speed_gpu_threads8_run2 to 2:43.31 in seg_speed_gpu_aux_async_threads8_run1, a 44.84 second speedup. Key image outputs were voxel-identical and non-comment stats rows matched exactly against the prior GPU reference. --- run_fastsurfer.sh | 194 ++++++++++++++++++++++++++++------------------ 1 file changed, 118 insertions(+), 76 deletions(-) diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index ddd7ad1cf..2228c7a82 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1081,6 +1081,93 @@ then echo "SEGMENTATION PIPELINE" >> "$exec_time_log" echo "=====================" >> "$exec_time_log" + aux_seg_started="false" + cerebnet_pid="" + cerebnet_async_log="" + cerebnet_async_exec_log="" + hypvinn_pid="" + hypvinn_async_log="" + hypvinn_async_exec_log="" + + start_aux_segmentations_async() + { + if [[ "$aux_seg_started" == "true" ]] + then + return + fi + aux_seg_started="true" + + if [[ "$run_cereb_module" == "true" ]] + then + if [[ "$run_biasfield" == "true" ]] + then + cereb_flags+=(--norm_name "$norm_name" --cereb_statsfile "$cereb_statsfile") + else + { + echo "INFO: Running CerebNet without generating a statsfile, since biasfield" + echo " correction deactivated '--no_biasfield'..." + } | tee -a "$seg_log" + fi + + local -a cmd_cereb cmd_cereb_async + cmd_cereb=($python "$cerebnetdir/run_prediction.py" --t1 "$t1" --asegdkt_segfile "$asegdkt_segfile" + --seg_log "$seg_log" --conformed_name "$conformed_name" --cereb_segfile "$cereb_segfile" + --async_io --batch_size "$batch_size" --viewagg_device "$viewagg" --device "$device" + --threads "$threads_seg" "${cereb_flags[@]}") + # specify the subject dir $sd, if cereb_segfile explicitly starts with it + if [[ "$sd" == "${cereb_segfile:0:${#sd}}" ]] ; then cmd_cereb+=(--sd "$sd"); fi + if [[ "$native_image" != "false" ]] ; then cmd_cereb+=(--orientation native --image_size fov --vox_size none) ; fi + + cerebnet_async_log="${seg_log%.log}.cerebnet.async.log" + cerebnet_async_exec_log="${exec_time_log%.log}.cerebnet.async.log" + : > "$cerebnet_async_log" + : > "$cerebnet_async_exec_log" + cmd_cereb_async=("${cmd_cereb[@]}") + for idx in "${!cmd_cereb_async[@]}"; do + if [[ "${cmd_cereb_async[$idx]}" == "$seg_log" ]]; then cmd_cereb_async[$idx]="$cerebnet_async_log"; fi + done + echo "INFO: Running CerebNet asynchronously..." | tee -a "$seg_log" + echo_quoted "${cmd_cereb_async[@]}" | tee -a "$seg_log" + echo "MODULE: CerebNet cerebellum segmentation" >> "$cerebnet_async_exec_log" + time_it "$cerebnet_async_exec_log" "${cmd_cereb_async[@]}" & + cerebnet_pid="$!" + fi + + if [[ "$run_hypvinn_module" == "true" ]] + then + local -a cmd_hyp cmd_hyp_async + cmd_hyp=($python "$hypvinndir/run_prediction.py" --sd "${sd}" --sid "${subject}" --reg_mode "$hypvinn_regmode" + "${hypvinn_flags[@]}" --threads "$threads_seg" --async_io --batch_size "$batch_size" --seg_log "$seg_log" + --device "$device" --viewagg_device "$viewagg" --t1) + if [[ "$run_biasfield" == "true" ]] + then + cmd_hyp+=("$norm_name") + if [[ -n "$t2" ]] ; then cmd_hyp+=(--t2 "$norm_name_t2") ; fi + else + { + echo "WARNING: We strongly recommend to *not* exclude the biasfield (--no_biasfield)" + echo " with the hypothal module!" + } | tee -a "$seg_log" + cmd_hyp+=("$t1") + if [[ -n "$t2" ]] ; then cmd_hyp+=(--t2 "$norm_name_t2") ; fi + fi + + hypvinn_async_log="${seg_log%.log}.hypvinn.async.log" + hypvinn_async_exec_log="${exec_time_log%.log}.hypvinn.async.log" + : > "$hypvinn_async_log" + : > "$hypvinn_async_exec_log" + cmd_hyp_async=("${cmd_hyp[@]}") + for idx in "${!cmd_hyp_async[@]}"; do + if [[ "${cmd_hyp_async[$idx]}" == "$seg_log" ]]; then cmd_hyp_async[$idx]="$hypvinn_async_log"; fi + done + echo "INFO: Running HypVINN asynchronously..." | tee -a "$seg_log" + echo_quoted "${cmd_hyp_async[@]}" | tee -a "$seg_log" + echo "MODULE: HypVINN hypothalamus segmentation" >> "$hypvinn_async_exec_log" + time_it "$hypvinn_async_exec_log" "${cmd_hyp_async[@]}" & + hypvinn_pid="$!" + fi + } + # "============= Running FastSurferCNN (Creating Segmentation aparc.DKTatlas.aseg.mgz) ===============" # use FastSurferCNN to create cortical parcellation + anatomical segmentation into 95 classes. @@ -1172,6 +1259,13 @@ then fi fi + # HypVINN and CerebNet only need the bias-field-corrected image and segmentation inputs. + # Start them before stats/CC work when T2 preprocessing is not pending. + if [[ -z "$t2" ]] + then + start_aux_segmentations_async + fi + if [[ "$run_asegdkt_module" == "true" ]] then mask_name_manedit=$(add_file_suffix "$mask_name" "manedit") @@ -1284,6 +1378,8 @@ then fi fi + start_aux_segmentations_async + if [[ "$run_cc_module" == "true" ]] then # ============================= CC SEGMENTATION ============================================ @@ -1373,82 +1469,7 @@ then fi fi - cerebnet_pid="" - cerebnet_async_log="" - cerebnet_async_exec_log="" - - if [[ "$run_cereb_module" == "true" ]] - then - if [[ "$run_biasfield" == "true" ]] - then - cereb_flags+=(--norm_name "$norm_name" --cereb_statsfile "$cereb_statsfile") - else - { - echo "INFO: Running CerebNet without generating a statsfile, since biasfield" - echo " correction deactivated '--no_biasfield'..." - } | tee -a "$seg_log" - fi - - cmd=($python "$cerebnetdir/run_prediction.py" --t1 "$t1" --asegdkt_segfile "$asegdkt_segfile" --seg_log "$seg_log" - --conformed_name "$conformed_name" --cereb_segfile "$cereb_segfile" --async_io --batch_size "$batch_size" - --viewagg_device "$viewagg" --device "$device" --threads "$threads_seg" "${cereb_flags[@]}") - # specify the subject dir $sd, if cereb_segfile explicitly starts with it - if [[ "$sd" == "${cereb_segfile:0:${#sd}}" ]] ; then cmd+=(--sd "$sd"); fi - if [[ "$native_image" != "false" ]] ; then cmd+=(--orientation native --image_size fov --vox_size none) ; fi - if [[ "$run_hypvinn_module" == "true" ]] - then - cerebnet_async_log="${seg_log%.log}.cerebnet.async.log" - cerebnet_async_exec_log="${exec_time_log%.log}.cerebnet.async.log" - : > "$cerebnet_async_log" - : > "$cerebnet_async_exec_log" - cmd_async=("${cmd[@]}") - for idx in "${!cmd_async[@]}"; do - if [[ "${cmd_async[$idx]}" == "$seg_log" ]]; then cmd_async[$idx]="$cerebnet_async_log"; fi - done - echo "INFO: Running CerebNet asynchronously with HypVINN..." | tee -a "$seg_log" - echo_quoted "${cmd_async[@]}" | tee -a "$seg_log" - echo "MODULE: CerebNet cerebellum segmentation" >> "$cerebnet_async_exec_log" - time_it "$cerebnet_async_exec_log" "${cmd_async[@]}" & - cerebnet_pid="$!" - else - echo "MODULE: CerebNet cerebellum segmentation" >> "$exec_time_log" - echo_quoted "${cmd[@]}" | tee -a "$seg_log" - "${wrap[@]}" "${cmd[@]}" # no tee, directly logging to $seg_log - if [[ "${PIPESTATUS[0]}" != 0 ]] - then - echo "ERROR: Cerebellum Segmentation failed!" | tee -a "$seg_log" - exit 1 - fi - fi - fi - - if [[ "$run_hypvinn_module" == "true" ]] - then - echo "MODULE: HypVINN hypothalamus segmentation" >> "$exec_time_log" - # currently, the order of the T2 preprocessing only is registration to T1w - cmd=($python "$hypvinndir/run_prediction.py" --sd "${sd}" --sid "${subject}" --reg_mode "$hypvinn_regmode" - "${hypvinn_flags[@]}" --threads "$threads_seg" --async_io --batch_size "$batch_size" --seg_log "$seg_log" - --device "$device" --viewagg_device "$viewagg" --t1) - if [[ "$run_biasfield" == "true" ]] - then - cmd+=("$norm_name") - if [[ -n "$t2" ]] ; then cmd+=(--t2 "$norm_name_t2") ; fi - else - { - echo "WARNING: We strongly recommend to *not* exclude the biasfield (--no_biasfield)" - echo " with the hypothal module!" - } | tee -a "$seg_log" - cmd+=("$t1") - if [[ -n "$t2" ]] ; then cmd+=(--t2 "$norm_name_t2") ; fi - fi - echo_quoted "${cmd[@]}" | tee -a "$seg_log" - "${wrap[@]}" "${cmd[@]}" # no tee, directly logging to $seg_log - if [[ "${PIPESTATUS[0]}" != 0 ]] - then - echo "ERROR: Hypothalamus Segmentation failed!" | tee -a "$seg_log" - exit 1 - fi - fi + start_aux_segmentations_async if [[ -n "$cerebnet_pid" ]] then @@ -1471,6 +1492,27 @@ then fi fi + if [[ -n "$hypvinn_pid" ]] + then + wait "$hypvinn_pid" + hypvinn_exit="$?" + if [[ -f "$hypvinn_async_log" ]] + then + cat "$hypvinn_async_log" >> "$seg_log" + rm -f "$hypvinn_async_log" + fi + if [[ -f "$hypvinn_async_exec_log" ]] + then + cat "$hypvinn_async_exec_log" >> "$exec_time_log" + rm -f "$hypvinn_async_exec_log" + fi + if [[ "$hypvinn_exit" != 0 ]] + then + echo "ERROR: Hypothalamus Segmentation failed!" | tee -a "$seg_log" + exit 1 + fi + fi + # if [[ ! -f "$merged_segfile" ]] # then # ln -s -r "$asegdkt_segfile" "$merged_segfile" From cf57f025b294d8a4c6721af673ddabd49df63bc5 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 10:49:14 +0200 Subject: [PATCH 25/31] Speed up GPU segmentation tail Overlap FastSurfer-CC generation/inpainting with N4 and early stats, and raise T1 N4 shrink from 5 to 6. Validation on 114823_MR1 seg_only cuda threads=8: seg_speed_gpu_aux_async_threads8_run1 2:43.31 -> seg_speed_gpu_cc_async_n4s6_threads8_run1 2:16.02, speedup 47.29s. Output comparison against the prior GPU reference: main aseg+DKT, mask, aseg auto_noCC, CC, withCC, aseg.auto, and CerebNet segmentations were voxel-identical. orig_nu changed in 3,544,455 voxels with max absolute UCHAR delta 5 and mean absolute delta 0.130. HypVINN changed 195 segmentation voxels and 918 mask voxels. --- run_fastsurfer.sh | 95 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index 2228c7a82..bd19fd80a 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1088,6 +1088,51 @@ then hypvinn_pid="" hypvinn_async_log="" hypvinn_async_exec_log="" + cc_started="false" + cc_pid="" + cc_async_log="" + cc_async_exec_log="" + asegdkt_withcc_segfile="$(add_file_suffix "$asegdkt_segfile" "withCC")" + asegdkt_withcc_vinn_statsfile="$(add_file_suffix "$asegdkt_vinn_statsfile" "withCC")" + aseg_auto_statsfile="$(dirname "$aseg_vinn_statsfile")/aseg.auto.mgz" + callosum_seg_manedit="$(add_file_suffix "$callosum_seg" "manedit")" + + start_cc_inpaint_async() + { + if [[ "$run_cc_module" != "true" ]] || [[ "$cc_started" == "true" ]] + then + return + fi + cc_started="true" + cc_async_log="${seg_log%.log}.cc.async.log" + cc_async_exec_log="${exec_time_log%.log}.cc.async.log" + : > "$cc_async_log" + : > "$cc_async_exec_log" + echo "INFO: Running FastSurfer-CC asynchronously..." | tee -a "$seg_log" + ( + echo "MODULE: FastSurfer-CC Corpus Callosum processing" >> "$cc_async_exec_log" + local_callosum_seg="$callosum_seg" + cmd=($python "$CorpusCallosumDir/fastsurfer_cc.py" --sd "$sd" --sid "$subject" + "--threads" "$threads_seg" "--conformed_name" "$conformed_name" "--aseg_name" "$aseg_segfile" + "--segmentation_in_orig" "$callosum_seg" "${cc_flags[@]}") + echo_quoted "${cmd[@]}" + time_it "$cc_async_exec_log" "${cmd[@]}" + exit_code="$?" + if [[ "$exit_code" != 0 ]] ; then + echo "ERROR: FastSurferCC corpus callosum analysis failed!" + exit "$exit_code" + fi + if [[ "$edits" == "true" ]] && [[ -f "$callosum_seg_manedit" ]] ; then local_callosum_seg="$callosum_seg_manedit" ; fi + + cmd=($python "$CorpusCallosumDir/paint_cc_into_pred.py" -in_cc "$local_callosum_seg" -in_pred "$asegdkt_segfile" + "-out" "$asegdkt_withcc_segfile" "-aseg" "$aseg_auto_segfile") + if [[ "$native_image" != "false" ]] ; then cmd+=(--keepgeom) ; fi + echo_quoted "${cmd[@]}" + time_it "$cc_async_exec_log" "${cmd[@]}" + if [[ "$?" != 0 ]] ; then echo "ERROR: asegdkt cc inpainting failed!" ; exit 1 ; fi + ) > "$cc_async_log" 2>&1 & + cc_pid="$!" + } start_aux_segmentations_async() { @@ -1222,13 +1267,15 @@ then fi fi + start_cc_inpaint_async + if [[ "$run_biasfield" == "true" ]] then echo "MODULE: Biasfield correction" >> "$exec_time_log" { # this will always run, since norm_name is set to subject_dir/mri/orig_nu.mgz, if it is not passed/empty cmd=($python "${reconsurfdir}/N4_bias_correct.py" "--in" "$conformed_name" --rescale "$norm_name" - --aseg "$aseg_segfile" --threads "$threads_seg" --shrink 5) + --aseg "$aseg_segfile" --threads "$threads_seg" --shrink 6) echo "INFO: Running N4 bias-field correction..." echo_quoted "${cmd[@]}" "${wrap[@]}" "${cmd[@]}" 2>&1 @@ -1384,34 +1431,28 @@ then then # ============================= CC SEGMENTATION ============================================ - echo "MODULE: FastSurfer-CC Corpus Callosum processing" >> "$exec_time_log" - # generate file names of for the analysis - asegdkt_withcc_segfile="$(add_file_suffix "$asegdkt_segfile" "withCC")" - asegdkt_withcc_vinn_statsfile="$(add_file_suffix "$asegdkt_vinn_statsfile" "withCC")" - aseg_auto_statsfile="$(dirname "$aseg_vinn_statsfile")/aseg.auto.mgz" - # note: callosum manedit currently only affects inpainting and not internal FastSurferCC processing (surfaces etc) - callosum_seg_manedit="$(add_file_suffix "$callosum_seg" "manedit")" - # generate callosum segmentation, mesh, shape and downstream measure files - cmd=($python "$CorpusCallosumDir/fastsurfer_cc.py" --sd "$sd" --sid "$subject" - "--threads" "$threads_seg" "--conformed_name" "$conformed_name" "--aseg_name" "$aseg_segfile" - "--segmentation_in_orig" "$callosum_seg" "${cc_flags[@]}") - echo_quoted "${cmd[@]}" | tee -a "$seg_log" - "${wrap[@]}" "${cmd[@]}" 2>&1 | tee -a "$seg_log" - exit_code=${PIPESTATUS[0]} - if [[ "$exit_code" != 0 ]] ; then - echo "ERROR: FastSurferCC corpus callosum analysis failed!" | tee -a "$seg_log" - exit "$exit_code" + if [[ -n "$cc_pid" ]] + then + wait "$cc_pid" + cc_exit="$?" + if [[ -f "$cc_async_log" ]] + then + cat "$cc_async_log" >> "$seg_log" + rm -f "$cc_async_log" + fi + if [[ -f "$cc_async_exec_log" ]] + then + cat "$cc_async_exec_log" >> "$exec_time_log" + rm -f "$cc_async_exec_log" + fi + if [[ "$cc_exit" != 0 ]] + then + echo "ERROR: FastSurferCC corpus callosum analysis failed!" | tee -a "$seg_log" + exit "$cc_exit" + fi fi - if [[ "$edits" == "true" ]] && [[ -f "$callosum_seg_manedit" ]] ; then callosum_seg="$callosum_seg_manedit" ; fi - { - # add CC into aparc.DKTatlas+aseg.deep.mgz and aseg.auto.mgz as mri_cc did before. - cmd=($python "$CorpusCallosumDir/paint_cc_into_pred.py" -in_cc "$callosum_seg" -in_pred "$asegdkt_segfile" - "-out" "$asegdkt_withcc_segfile" "-aseg" "$aseg_auto_segfile") - if [[ "$native_image" != "false" ]] ; then cmd+=(--keepgeom) ; fi - echo_quoted "${cmd[@]}" - "${wrap[@]}" "${cmd[@]}" - if [[ "${PIPESTATUS[0]}" != 0 ]] ; then echo "ERROR: asegdkt cc inpainting failed!" ; exit 1 ; fi + { if [[ "$run_biasfield" == "true" ]] then cmd=($python "${fastsurfercnndir}/segstats.py" --segfile "$asegdkt_withcc_segfile" --normfile "$norm_name" From 8a8975b0f9b4c28144f891dc4ae85ddff944f520 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sat, 16 May 2026 11:05:10 +0200 Subject: [PATCH 26/31] Tune GPU auxiliary segmentation tail Use 40 N4 iterations after shrink=6 and use HypVINN batch 4 for the default CUDA batch=1 path. CerebNet remains on the requested global batch size to avoid the batch-size output drift seen with global batch 4. Validation on 114823_MR1 seg_only cuda threads=8: seg_speed_gpu_cc_async_n4s6_threads8_run1 2:16.02 -> seg_speed_gpu_hypbatch4_n4iter40_threads8_run1 2:08.61, speedup 7.41s. Output comparison against the previous reference: main aseg+DKT, mask, aseg.auto_noCCseg, CC, withCC, aseg.auto, and CerebNet segmentations were voxel-identical. orig_nu changed in 2,913,929 voxels with max absolute UCHAR delta 4 and mean absolute delta 0.104. HypVINN changed 153 segmentation voxels and 730 mask voxels. --- run_fastsurfer.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index bd19fd80a..e0813addb 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1088,6 +1088,11 @@ then hypvinn_pid="" hypvinn_async_log="" hypvinn_async_exec_log="" + hypvinn_batch_size="${FASTSURFER_HYPVINN_BATCH_SIZE:-$batch_size}" + if [[ -z "${FASTSURFER_HYPVINN_BATCH_SIZE+x}" ]] && [[ "$batch_size" == "1" ]] && [[ "$device" == cuda* ]] + then + hypvinn_batch_size="4" + fi cc_started="false" cc_pid="" cc_async_log="" @@ -1182,7 +1187,7 @@ then then local -a cmd_hyp cmd_hyp_async cmd_hyp=($python "$hypvinndir/run_prediction.py" --sd "${sd}" --sid "${subject}" --reg_mode "$hypvinn_regmode" - "${hypvinn_flags[@]}" --threads "$threads_seg" --async_io --batch_size "$batch_size" --seg_log "$seg_log" + "${hypvinn_flags[@]}" --threads "$threads_seg" --async_io --batch_size "$hypvinn_batch_size" --seg_log "$seg_log" --device "$device" --viewagg_device "$viewagg" --t1) if [[ "$run_biasfield" == "true" ]] then @@ -1275,7 +1280,7 @@ then { # this will always run, since norm_name is set to subject_dir/mri/orig_nu.mgz, if it is not passed/empty cmd=($python "${reconsurfdir}/N4_bias_correct.py" "--in" "$conformed_name" --rescale "$norm_name" - --aseg "$aseg_segfile" --threads "$threads_seg" --shrink 6) + --aseg "$aseg_segfile" --threads "$threads_seg" --shrink 6 --numiter 40) echo "INFO: Running N4 bias-field correction..." echo_quoted "${cmd[@]}" "${wrap[@]}" "${cmd[@]}" 2>&1 From 611e5b4f7a79df6fad812ca6fb71b64056734fe6 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sun, 17 May 2026 01:52:42 +0200 Subject: [PATCH 27/31] Overlap surface reconstruction with segmentation tail Start recon-surf as soon as CC inpainting has produced aseg.auto.mgz, while the remaining segmentation stats and auxiliary HypVINN/CerebNet outputs continue in parallel. 114823_MR1 validation: 34:26.62 vs 35:04.86 reference, speedup 38.24s. Output comparison showed no voxel differences, no normal surface geometry/value differences, identical callosum.surf geometry, and identical .stats data bodies; remaining differences are path/timestamp/header metadata. --- run_fastsurfer.sh | 87 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/run_fastsurfer.sh b/run_fastsurfer.sh index e0813addb..fc9492bbe 100755 --- a/run_fastsurfer.sh +++ b/run_fastsurfer.sh @@ -1101,6 +1101,48 @@ then asegdkt_withcc_vinn_statsfile="$(add_file_suffix "$asegdkt_vinn_statsfile" "withCC")" aseg_auto_statsfile="$(dirname "$aseg_vinn_statsfile")/aseg.auto.mgz" callosum_seg_manedit="$(add_file_suffix "$callosum_seg" "manedit")" + surf_async_started="false" + surf_pid="" + + start_surface_recon_async() + { + if [[ "$run_surf_pipeline" != "true" ]] || [[ "$surf_async_started" == "true" ]] + then + return + fi + surf_async_started="true" + if [[ "$threads_surf" == "max" ]]; then threads_surf="$(nproc)" ; fi + if [[ "$threads_surf" == "0" ]]; then threads_surf=1 ; fi + echo "SURFACE RECONSTRUCTION PIPELINE" >> "$exec_time_log" + echo "===============================" >> "$exec_time_log" + cmd=("./recon-surf.sh" --sid "$subject" --sd "$sd" --t1 "$conformed_name" --mask_name "$mask_name" + --asegdkt_segfile "$asegdkt_segfile" --threads "$threads_surf" --py "$python" "${surf_flags[@]}") + { + echo "cd $reconsurfdir" + echo_quoted "${cmd[@]}" + } | tee -a "$seg_log" + ( + pushd "$reconsurfdir" > /dev/null || exit 1 + "${wrap[@]}" "${cmd[@]}" + ) & + surf_pid="$!" + } + + wait_surface_recon_async() + { + if [[ "$surf_async_started" != "true" ]] + then + return + fi + wait "$surf_pid" + surf_exit="$?" + if [[ "$surf_exit" != 0 ]] + then + echo "ERROR: Surface reconstruction failed! See recon-surf log: $subject_dir/scripts/recon-surf.log" | \ + tee -a "$seg_log" + exit "$surf_exit" + fi + } start_cc_inpaint_async() { @@ -1457,6 +1499,10 @@ then fi fi + # recon-surf only depends on the completed aseg.auto.mgz and not on the + # segmentation stats or auxiliary HypVINN/CerebNet outputs below. + start_surface_recon_async + { if [[ "$run_biasfield" == "true" ]] then @@ -1572,26 +1618,31 @@ fi if [[ "$run_surf_pipeline" == "true" ]] then - echo "SURFACE RECONSTRUCTION PIPELINE" >> "$exec_time_log" - echo "===============================" >> "$exec_time_log" - - if [[ "$threads_surf" == "max" ]]; then threads_surf="$(nproc)" ; fi - if [[ "$threads_surf" == "0" ]]; then threads_surf=1 ; fi - # ============= Running recon-surf (surfaces, thickness etc.) =============== - # use recon-surf to create surface models based on the FastSurferCNN segmentation. - pushd "$reconsurfdir" > /dev/null || exit 1 - echo "cd $reconsurfdir" | tee -a "$seg_log" - cmd=("./recon-surf.sh" --sid "$subject" --sd "$sd" --t1 "$conformed_name" --mask_name "$mask_name" - --asegdkt_segfile "$asegdkt_segfile" --threads "$threads_surf" --py "$python" "${surf_flags[@]}") - echo_quoted "${cmd[@]}" | tee -a "$seg_log" - "${wrap[@]}" "${cmd[@]}" # no tee, this gets logged to recon-surf.log from inside recon-surf.sh - if [[ "${PIPESTATUS[0]}" != 0 ]] + if [[ "${surf_async_started:-false}" == "true" ]] then - echo "ERROR: Surface reconstruction failed! See recon-surf log: $subject_dir/scripts/recon-surf.log" | \ - tee -a "$seg_log" - exit 1 + wait_surface_recon_async + else + echo "SURFACE RECONSTRUCTION PIPELINE" >> "$exec_time_log" + echo "===============================" >> "$exec_time_log" + + if [[ "$threads_surf" == "max" ]]; then threads_surf="$(nproc)" ; fi + if [[ "$threads_surf" == "0" ]]; then threads_surf=1 ; fi + # ============= Running recon-surf (surfaces, thickness etc.) =============== + # use recon-surf to create surface models based on the FastSurferCNN segmentation. + pushd "$reconsurfdir" > /dev/null || exit 1 + echo "cd $reconsurfdir" | tee -a "$seg_log" + cmd=("./recon-surf.sh" --sid "$subject" --sd "$sd" --t1 "$conformed_name" --mask_name "$mask_name" + --asegdkt_segfile "$asegdkt_segfile" --threads "$threads_surf" --py "$python" "${surf_flags[@]}") + echo_quoted "${cmd[@]}" | tee -a "$seg_log" + "${wrap[@]}" "${cmd[@]}" # no tee, this gets logged to recon-surf.log from inside recon-surf.sh + if [[ "${PIPESTATUS[0]}" != 0 ]] + then + echo "ERROR: Surface reconstruction failed! See recon-surf log: $subject_dir/scripts/recon-surf.log" | \ + tee -a "$seg_log" + exit 1 + fi + popd > /dev/null || return fi - popd > /dev/null || return fi # ============= Running LIT Postprocessing ==================================== From b64278cc2d6d25a91e238e04c7514e633eb631e1 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sun, 17 May 2026 03:51:22 +0200 Subject: [PATCH 28/31] Handle embedded timing markers in recon-surf logs The surface scheduling work can place FreeSurfer progress text before @#@FSTIME markers on the same physical log line. Anchor timing parsing on the marker token so recon-surf_times.yaml is still generated. Validation: reran extract_recon_surf_time_info.py on surf_speed_surface_tail_overlap_threads8_run1/114823_MR1; output YAML was generated successfully. No processing output changes. --- recon_surf/utils/extract_recon_surf_time_info.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/recon_surf/utils/extract_recon_surf_time_info.py b/recon_surf/utils/extract_recon_surf_time_info.py index aebee1709..8654561e6 100644 --- a/recon_surf/utils/extract_recon_surf_time_info.py +++ b/recon_surf/utils/extract_recon_surf_time_info.py @@ -142,17 +142,18 @@ def get_recon_all_stage_duration(line: str, previous_datetime_str: str) -> float ## Parse out cmd name, start time, and duration: entry_dict = {} - cmd_name = line_parts[2] + timestamp_index = line_parts.index(timestamp_feature) + cmd_name = line_parts[timestamp_index + 2] if cmd_name in filtered_cmds: continue - date_time_str = line_parts[1] + date_time_str = line_parts[timestamp_index + 1] start_time = date_time_str[11:] start_date_time = datetime.strptime( date_time_str, "%Y:%m:%d:%H:%M:%S" ) - assert line_parts[5] == "e" - cmd_duration = float(line_parts[6]) + assert line_parts[timestamp_index + 5] == "e" + cmd_duration = float(line_parts[timestamp_index + 6]) end_date_time = start_date_time + timedelta(0, float(cmd_duration)) end_date_time_str = end_date_time.strftime("%Y:%m:%d:%H:%M:%S") From f3691cb62307127c3550acfc8a231783bbd577c6 Mon Sep 17 00:00:00 2001 From: ClePol Date: Sun, 17 May 2026 03:51:40 +0200 Subject: [PATCH 29/31] Overlap independent surface tail work Overlap cortex+hipamyg label creation, inflate/curvature products, mapped anatomical stats, and pctsurfcon work with independent downstream surface steps. This keeps the same FreeSurfer commands and waits at the points where their outputs are needed. Validation on 114823_MR1 (--threads 8, cuda): - reference surf_speed_early_surface_after_cc_clean_threads8_run1: 34:26.62 - optimized surf_speed_surface_tail_overlap_threads8_run1: 33:52.75 - speedup: 33.87 seconds - compare_surface_outputs: no surface geometry or morph value differences; volume outputs unchanged by the comparator. Differences are logs/scripts/path metadata/stats headers plus callosum.surf byte hash. - stats data bodies are identical after ignoring comment/header lines. - callosum.surf geometry is identical: coords_equal=True, faces_equal=True, coord_max_abs=0.0. --- recon_surf/recon-surf.sh | 64 +++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 1937f2690..51b338d80 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -944,19 +944,25 @@ for hemi in lh rh ; do echo "echo \"\"" } | tee -a "$CMDF" - # create cortex label (1min) - # create nicer inflated surface from topo fixed (not needed, just later for visualization) - # identical for long processing. Run the underlying commands directly to avoid - # repeated recon-all wrapper/update-check overhead. + # Create the cortex labels and the visualization/surfreg inflated surface. + # Only the cortex label is needed before sampling the DKT annotation. The + # cortex+hipamyg label is needed later for pial placement, and the inflated + # products are needed later for curvstats/sphere, so overlap those independent + # commands with sample_parc and white placement. echo "pushd $mdir > /dev/null || exit 1" >> "$CMDF" cmd="mri_label2label --label-cortex ../surf/$hemi.white.preaparc aseg.presurf.mgz 0 ../label/$hemi.cortex.label" RunIt "$cmd" "$LF" "$CMDF" echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" echo "echo \"mri_label2label --label-cortex ../surf/${hemi}.white.preaparc aseg.presurf.mgz 0 ../label/${hemi}.cortex.label\" > $SUBJECTS_DIR/$subject/touch/${hemi}.cortex.touch" >> "$CMDF" + echo "popd > /dev/null || exit 1" >> "$CMDF" + echo "(" >> "$CMDF" + echo "pushd $mdir > /dev/null || exit 1" >> "$CMDF" cmd="mri_label2label --label-cortex ../surf/$hemi.white.preaparc aseg.presurf.mgz 1 ../label/$hemi.cortex+hipamyg.label" RunIt "$cmd" "$LF" "$CMDF" echo "echo \"mri_label2label --label-cortex ../surf/${hemi}.white.preaparc aseg.presurf.mgz 1 ../label/${hemi}.cortex+hipamyg.label\" > $SUBJECTS_DIR/$subject/touch/${hemi}.cortex+hipamyg.touch" >> "$CMDF" echo "popd > /dev/null || exit 1" >> "$CMDF" + echo ") & cortex_hipamyg_pid=\$!" >> "$CMDF" + echo "(" >> "$CMDF" cmd="mris_smooth -n 3 -nw -seed 1234 $sdir/$hemi.white.preaparc $sdir/$hemi.smoothwm" RunIt "$cmd" "$LF" "$CMDF" echo "echo \"mris_smooth -n 3 -nw -seed 1234 ../surf/${hemi}.white.preaparc ../surf/${hemi}.smoothwm\" > $SUBJECTS_DIR/$subject/touch/${hemi}.smoothwm2.touch" >> "$CMDF" @@ -979,6 +985,7 @@ for hemi in lh rh ; do RunIt "$cmd" "$LF" "$CMDF" echo "echo \"mris_curvature -seed 1234 -thresh .999 -n -a 5 -w ${hemi}.inflated\" > $SUBJECTS_DIR/$subject/touch/${hemi}.inflate.H.K.touch" >> "$CMDF" echo "popd > /dev/null || exit 1" >> "$CMDF" + echo ") & inflate_curv_pid=\$!" >> "$CMDF" # ============================= MAP-DKT ========================================================== @@ -1009,6 +1016,11 @@ for hemi in lh rh ; do # placement, so it is deferred below to overlap with ribbon construction. if [[ "$fsaparc" == "true" ]] then + echo "if [[ -n \"\${inflate_curv_pid:-}\" ]] ; then" >> "$CMDF" + echo " wait \"\$inflate_curv_pid\"" >> "$CMDF" + echo " if [[ \$? != 0 ]] ; then exit 1 ; fi" >> "$CMDF" + echo " unset inflate_curv_pid" >> "$CMDF" + echo "fi" >> "$CMDF" { echo "echo \" \"" echo "echo \"============ Creating surfaces $hemi - FS sphere, surfreg ===============\"" @@ -1158,6 +1170,8 @@ for hemi in lh rh ; do # CREAT PIAL SURFACE # 4 min compute pial : + echo "wait \"\$cortex_hipamyg_pid\"" >> "$CMDF" + echo "if [[ \$? != 0 ]] ; then exit 1 ; fi" >> "$CMDF" cmd="mris_place_surface --adgws-in ../surf/autodet.gw.stats.${hemi}.dat --seg aseg.presurf.mgz \ --threads $threads_hemi --wm wm.mgz --invol brain.finalsurfs.mgz --$hemi --o ../surf/${hemi}.pial.T1 \ --pial --nsmooth 0 --rip-label ../label/${hemi}.cortex+hipamyg.label \ @@ -1205,6 +1219,11 @@ for hemi in lh rh ; do RunIt "$cmd" "$LF" "$CMDF" cmd="mris_convert --volume $subject $hemi $sdir/$hemi.volume" RunIt "$cmd" "$LF" "$CMDF" + echo "if [[ -n \"\${inflate_curv_pid:-}\" ]] ; then" >> "$CMDF" + echo " wait \"\$inflate_curv_pid\"" >> "$CMDF" + echo " if [[ \$? != 0 ]] ; then exit 1 ; fi" >> "$CMDF" + echo " unset inflate_curv_pid" >> "$CMDF" + echo "fi" >> "$CMDF" cmd="mris_curvature_stats -m --writeCurvatureFiles -G -o $statsdir/$hemi.curv.stats -F smoothwm $subject $hemi curv sulc" RunIt "$cmd" "$LF" "$CMDF" echo "mkdir -p $SUBJECTS_DIR/$subject/touch" >> "$CMDF" @@ -1497,23 +1516,22 @@ then # ============================= MAPPED SURF-STATS ========================================= - MAPPED_STATS_CMDF="$SUBJECTS_DIR/$subject/scripts/mapped_stats.cmdf" - rm -f "$MAPPED_STATS_CMDF" - { - echo "#!/bin/bash" - echo "echo \"\"" - echo "echo \"===================== Creating surfaces - mapped stats =========================\"" - echo "echo \"\"" - } > "$MAPPED_STATS_CMDF" - # 2x18sec create stats from mapped aparc. This only depends on completed surfaces and # ribbon outputs, so it can overlap with the later volume-labeling/statistics chain. for hemi in lh rh do + MAPPED_STATS_CMDF="$SUBJECTS_DIR/$subject/scripts/mapped_stats.$hemi.cmdf" + rm -f "$MAPPED_STATS_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces $hemi - mapped stats =========================\"" + echo "echo \"\"" + } > "$MAPPED_STATS_CMDF" cmd="mris_anatomical_stats -th3 -mgz -cortex $ldir/$hemi.cortex.label -f $statsdir/$hemi.aparc.DKTatlas.mapped.stats -b -a $ldir/$hemi.aparc.DKTatlas.mapped.annot -c $ldir/aparc.annot.mapped.ctab $subject $hemi white" RunIt "$cmd" "$LF" "$MAPPED_STATS_CMDF" + start_async_cmdf "$MAPPED_STATS_CMDF" done - start_async_cmdf "$MAPPED_STATS_CMDF" if [[ "$fssurfreg" == "true" && "$ASYNC_BALABELS_STARTED" != "true" ]] then @@ -1548,19 +1566,19 @@ then softlink_or_copy "rh.aparc.DKTatlas.mapped.annot" "rh.aparc.annot" "$LF" popd > /dev/null || (echo "Could not popd" ; exit 1) - PCTSURFCON_CMDF="$SUBJECTS_DIR/$subject/scripts/pctsurfcon.cmdf" - rm -f "$PCTSURFCON_CMDF" - { - echo "#!/bin/bash" - echo "echo \"\"" - echo "echo \"===================== Creating surfaces - pctsurfcon =====================\"" - echo "echo \"\"" - } > "$PCTSURFCON_CMDF" for hemi in lh rh ; do + PCTSURFCON_CMDF="$SUBJECTS_DIR/$subject/scripts/pctsurfcon.$hemi.cmdf" + rm -f "$PCTSURFCON_CMDF" + { + echo "#!/bin/bash" + echo "echo \"\"" + echo "echo \"===================== Creating surfaces $hemi - pctsurfcon =====================\"" + echo "echo \"\"" + } > "$PCTSURFCON_CMDF" cmd="pctsurfcon --s $subject --$hemi-only" RunIt "$cmd" "$LF" "$PCTSURFCON_CMDF" + start_async_cmdf "$PCTSURFCON_CMDF" done - start_async_cmdf "$PCTSURFCON_CMDF" # -hyporelabel creates aseg.presurf.hypos.mgz from aseg.presurf.mgz. # -apas2aseg creates aseg.mgz by editing aseg.presurf.hypos.mgz with surfaces. From 678a3271b18bd055d5f3616a63d4847fefb9321e Mon Sep 17 00:00:00 2001 From: ClePol Date: Sun, 17 May 2026 07:50:45 +0200 Subject: [PATCH 30/31] Overlap Talairach and defer tail wait Run Talairach registration asynchronously and use orig_nu.mgz as the normalization source while Talairach only updates transform metadata. Also defer the final async command-file wait and aparc cleanup until after the cortex-label surf2volseg pass so independent tail work can overlap. Validated on 114823_MR1 with Docker --user 4758:1001, --threads 8, --device cuda. Wall time improved from 33:52.75 to 33:04.89, a 47.86s speedup versus the previous optimized reference. Output comparison versus surf_speed_surface_tail_overlap_threads8_run1 showed no changed MRI volumes or primary surface geometry. Stats bodies are identical; callosum.surf byte hash differs but coordinates/faces are identical. Remaining differences are logs, command files, stats headers, run paths, and Talairach path/timestamp text. --- recon_surf/recon-surf.sh | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/recon_surf/recon-surf.sh b/recon_surf/recon-surf.sh index 51b338d80..4666e2b70 100755 --- a/recon_surf/recon-surf.sh +++ b/recon_surf/recon-surf.sh @@ -675,6 +675,9 @@ fi # ============================= TALAIRACH ============================================== +TALAIRACH_PID="" +TALAIRACH_ASYNC="false" +norm_source="$mdir/nu.mgz" if [[ ! -f "$mdir/transforms/talairach.lta" ]] || [[ ! -f "$mdir/transforms/talairach_with_skull.lta" ]] ; then # if talairach registration is missing, compute it here # this also creates talairach.auto.xfm and talairach.xfm and talairach.xfm.lta @@ -691,7 +694,13 @@ if [[ ! -f "$mdir/transforms/talairach.lta" ]] || [[ ! -f "$mdir/transforms/tala echo " " } | tee -a "$LF" echo_quoted "${cmda[@]}" - "${cmda[@]}" + "${cmda[@]}" & + TALAIRACH_PID=$! + TALAIRACH_ASYNC="true" + # mri_add_xform_to_header only changes transform metadata for nu.mgz. The + # following normalization/masking steps are voxel-identical when run from + # orig_nu.mgz, so overlap them with Talairach registration. + norm_source="$mdir/orig_nu.mgz" fi @@ -704,7 +713,7 @@ fi # the difference between nu and orig_nu is the fact that nu has the talairach-registration header # create norm by masking nu (supports manedit-ed mask) -cmda=(mri_mask "$mdir/nu.mgz" "$mask" "$mdir/norm.mgz") +cmda=(mri_mask "$norm_source" "$mask" "$mdir/norm.mgz") run_it "$LF" "${cmda[@]}" if [[ "$get_t1" == "true" ]] then @@ -757,6 +766,17 @@ else # cross and base RunIt "$cmd" "$LF" fi +if [[ "$TALAIRACH_ASYNC" == "true" ]] +then + echo "Waiting for async Talairach registration to complete." | tee -a "$LF" + wait "$TALAIRACH_PID" + if [[ $? != 0 ]] + then + echo "ERROR: Async Talairach registration failed!" | tee -a "$LF" + exit 1 + fi +fi + # ======= # ================================================== SURFACES ============================================================== @@ -1598,12 +1618,6 @@ then echo "mri_surf2volseg --o aseg.mgz --i aseg.presurf.hypos.mgz --fix-presurf-with-ribbon ../mri/ribbon.mgz --threads $threads --lh-cortex-mask ../label/lh.cortex.label --lh-white ../surf/lh.white --lh-pial ../surf/lh.pial --rh-cortex-mask ../label/rh.cortex.label --rh-white ../surf/rh.white --rh-pial ../surf/rh.pial" > "$SUBJECTS_DIR/$subject/touch/apas2aseg.touch" popd > /dev/null || (echo "Could not popd" ; exit 1) - wait_async_cmdfs - - pushd "$ldir" > /dev/null || (echo "Could not cd to $ldir" ; exit 1) - cmd="rm *h.aparc.annot" - RunIt "$cmd" "$LF" - popd > /dev/null || (echo "Could not popd" ; exit 1) fi ASYNC_ASEG_STATS="false" @@ -1691,6 +1705,13 @@ then cmd="mri_surf2volseg --o $mdir/aparc.DKTatlas+aseg.mapped.mgz --label-cortex --i $mdir/aseg.mgz --threads $surf2volseg_threads --hashres 4 --lh-annot $ldir/lh.aparc.DKTatlas.mapped.annot 1000 --lh-cortex-mask $ldir/lh.cortex.label --lh-white $sdir/lh.white --lh-pial $sdir/lh.pial --rh-annot $ldir/rh.aparc.DKTatlas.mapped.annot 2000 --rh-cortex-mask $ldir/rh.cortex.label --rh-white $sdir/rh.white --rh-pial $sdir/rh.pial" RunIt "$cmd" "$LF" wait_async_cmdfs + if [[ "$fsaparc" == "false" ]] + then + pushd "$ldir" > /dev/null || (echo "Could not cd to $ldir" ; exit 1) + cmd="rm *h.aparc.annot" + RunIt "$cmd" "$LF" + popd > /dev/null || (echo "Could not popd" ; exit 1) + fi cmda=($python "${binpath}merge_wmparc_aparc.py" --aseg "$mdir/aseg.mgz" --aparc "$mdir/aparc.DKTatlas+aseg.mapped.mgz" From 572fb9eee007e5fb65b38ced1982a87f950757ea Mon Sep 17 00:00:00 2001 From: ClePol Date: Fri, 22 May 2026 14:07:32 +0200 Subject: [PATCH 31/31] fix ruff error --- FastSurferCNN/run_prediction.py | 2 +- HypVINN/inference.py | 7 ++++++- recon_surf/cropped_mris_volmask.py | 11 +++++++---- recon_surf/cropped_volume.py | 2 +- recon_surf/merge_wmparc_aparc.py | 1 - recon_surf/spherically_project_wrapper.py | 3 ++- 6 files changed, 17 insertions(+), 9 deletions(-) diff --git a/FastSurferCNN/run_prediction.py b/FastSurferCNN/run_prediction.py index 6e1726b02..38d34c2e4 100644 --- a/FastSurferCNN/run_prediction.py +++ b/FastSurferCNN/run_prediction.py @@ -48,8 +48,8 @@ from FastSurferCNN.utils.common import SubjectDirectory, SubjectList, find_device, handle_cuda_memory_exception from FastSurferCNN.utils.load_config import load_config from FastSurferCNN.utils.parallel import SerialExecutor, pipeline -from FastSurferCNN.utils.torchscript import cpu_torch_threads from FastSurferCNN.utils.parser_defaults import SubjectDirectoryConfig +from FastSurferCNN.utils.torchscript import cpu_torch_threads LOGGER = logging.getLogger(__name__) diff --git a/HypVINN/inference.py b/HypVINN/inference.py index bc1dc6cc9..f7b4a1b8f 100644 --- a/HypVINN/inference.py +++ b/HypVINN/inference.py @@ -25,7 +25,12 @@ import FastSurferCNN.utils.logging as logging from FastSurferCNN.data_loader.augmentation import ToTensorTest from FastSurferCNN.utils.common import find_device -from FastSurferCNN.utils.torchscript import cpu_torch_threads, env_flag_enabled, should_trace_cpu_inference, trace_for_inference +from FastSurferCNN.utils.torchscript import ( + cpu_torch_threads, + env_flag_enabled, + should_trace_cpu_inference, + trace_for_inference, +) from HypVINN.data_loader.data_utils import hypo_map_prediction_sagittal2full from HypVINN.data_loader.dataset import HypVINNDataset from HypVINN.models.networks import build_model diff --git a/recon_surf/cropped_mris_volmask.py b/recon_surf/cropped_mris_volmask.py index f72d58369..e497a271a 100755 --- a/recon_surf/cropped_mris_volmask.py +++ b/recon_surf/cropped_mris_volmask.py @@ -11,12 +11,13 @@ import nibabel as nib import numpy as np -from nibabel.freesurfer.io import read_geometry - from cropped_volume import crop_slices, freesurfer_env, load_volume, save_volume +from nibabel.freesurfer.io import read_geometry -def _surface_voxel_bounds(subject_dir: Path, hemi: str, img: nib.spatialimages.SpatialImage) -> tuple[np.ndarray, np.ndarray]: +def _surface_voxel_bounds( + subject_dir: Path, hemi: str, img: nib.spatialimages.SpatialImage +) -> tuple[np.ndarray, np.ndarray]: inv = np.linalg.inv(img.affine) coords = [] for surface in ("white", "pial"): @@ -27,7 +28,9 @@ def _surface_voxel_bounds(subject_dir: Path, hemi: str, img: nib.spatialimages.S return np.floor(points.min(axis=0)).astype(int), (np.ceil(points.max(axis=0)) + 1).astype(int) -def _bounds(shape: tuple[int, ...], surface_start: np.ndarray, surface_stop: np.ndarray, margin: int) -> tuple[np.ndarray, np.ndarray]: +def _bounds( + shape: tuple[int, ...], surface_start: np.ndarray, surface_stop: np.ndarray, margin: int +) -> tuple[np.ndarray, np.ndarray]: start = surface_start stop = surface_stop start = np.maximum(0, start - margin) diff --git a/recon_surf/cropped_volume.py b/recon_surf/cropped_volume.py index bcb8ee441..eec774114 100644 --- a/recon_surf/cropped_volume.py +++ b/recon_surf/cropped_volume.py @@ -48,7 +48,7 @@ def bounds_from_mask(mask: np.ndarray, margin: int) -> tuple[np.ndarray, np.ndar def crop_slices(start: np.ndarray, stop: np.ndarray) -> tuple[slice, ...]: - return tuple(slice(int(s), int(e)) for s, e in zip(start, stop)) + return tuple(slice(int(s), int(e)) for s, e in zip(start, stop, strict=True)) def freesurfer_env() -> dict[str, str]: diff --git a/recon_surf/merge_wmparc_aparc.py b/recon_surf/merge_wmparc_aparc.py index b7ca38555..5fc85233d 100644 --- a/recon_surf/merge_wmparc_aparc.py +++ b/recon_surf/merge_wmparc_aparc.py @@ -5,7 +5,6 @@ import nibabel as nib import numpy as np - WM_AND_HYPO_LABELS = (2, 41, 77, 78, 79, 87, 88, 89) diff --git a/recon_surf/spherically_project_wrapper.py b/recon_surf/spherically_project_wrapper.py index 8d3ccd059..2e09353a2 100644 --- a/recon_surf/spherically_project_wrapper.py +++ b/recon_surf/spherically_project_wrapper.py @@ -95,9 +95,10 @@ def setup_options(): try: from os import environ - from recon_surf.spherically_project import spherically_project_surface from nibabel.freesurfer.io import read_geometry + from recon_surf.spherically_project import spherically_project_surface + source_surface = opts.sd / opts.subject / "surf" / f"{opts.hemi}.smoothwm.nofix" projected_surface = opts.sd / opts.subject / "surf" / f"{opts.hemi}.qsphere.nofix" print(f"Reading in surface: {source_surface} ...")