diff --git a/client/bundle.css b/client/bundle.css new file mode 100644 index 000000000..b2f0abd65 --- /dev/null +++ b/client/bundle.css @@ -0,0 +1,477 @@ +.event-chart { + position: relative; + height: calc(100% - 10px); + margin: 5px 0; + overflow-y: auto; + overflow-x: hidden; +} +.event-chart .tooltip { + position: absolute; + background: black; + border: 1px solid white; + padding: 0px 5px; + font-size: 14px; + z-index: 2; +} + +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.border-radius[data-v-77dee125] { + border: 1px solid #888888; + padding: 2px 5px; + border-radius: 5px; +} + +.line-chart { + height: 100%; +} +.line-chart .line { + fill: none; + stroke-width: 1.5px; +} +.line-chart .axis-y { + font-size: 12px; +} +.line-chart .axis-y g:first-of-type, +.line-chart .axis-y g:last-of-type { + display: none; +} +.line-chart .tooltip { + position: absolute; + background: black; + border: 1px solid white; + padding: 0px 5px; + font-size: 14px; +} + +.timeline .tick { + shape-rendering: crispEdges; + font-size: 12px; + stroke-opacity: 0.5; + stroke-dasharray: 2, 2; +} + +.timeline[data-v-0d0fe2ba] { + min-height: 175px; + position: relative; + display: flex; + flex-direction: column; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] { + flex: 1; + position: relative; + overflow: hidden; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .hand[data-v-0d0fe2ba] { + position: absolute; + top: 0; + width: 0; + height: 100%; + border-left: 1px solid #299be3; + z-index: 10; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-line[data-v-0d0fe2ba] { + position: absolute; + top: 0; + width: 0; + height: 100%; + z-index: 2; + cursor: col-resize; + pointer-events: auto; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-tooltip[data-v-0d0fe2ba] { + position: absolute; + top: 30px; + transform: translateX(-50%); + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 4px 8px; + border-radius: 4px; + font-size: 12px; + white-space: nowrap; + pointer-events: none; + z-index: 20; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-start-line[data-v-0d0fe2ba] { + border-left: 3px solid #4caf50; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-end-line[data-v-0d0fe2ba] { + border-left: 3px solid #f44336; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .time-filter-dimming[data-v-0d0fe2ba] { + position: absolute; + top: 0; + height: 100%; + background-color: rgba(0, 0, 0, 0.3); + pointer-events: none; + z-index: 1; +} +.timeline[data-v-0d0fe2ba] .work-area[data-v-0d0fe2ba] .child[data-v-0d0fe2ba] { + position: absolute; + top: 0; + bottom: 17px; + left: 0; + right: 0; + z-index: 0; +} +.timeline[data-v-0d0fe2ba] .minimap[data-v-0d0fe2ba] { + height: 10px; +} +.timeline[data-v-0d0fe2ba] .minimap[data-v-0d0fe2ba] .fill[data-v-0d0fe2ba] { + position: relative; + height: 100%; + background-color: #80c6e8; +} + +.controls { + position: absolute; +} + +.video-annotator { + position: relative; + left: 0; + right: 0; + top: 0; + bottom: 0; + z-index: 0; + display: flex; + flex-direction: column; +} +.video-annotator .geojs-map { + margin: 2px; +} +.video-annotator .geojs-map.geojs-map:focus { + outline: none; +} +.video-annotator .playback-container { + flex: 1; +} +.video-annotator .loadingSpinnerContainer { + z-index: 20; + margin: 0; + position: absolute; + top: 50%; + left: 50%; + -ms-transform: translate(-50%, -50%); + transform: translate(-50%, -50%); +} +.video-annotator .geojs-map.annotation-input { + cursor: inherit; +} + +.selected-camera { + box-sizing: content-box; +} +.selected-camera .geojs-map { + outline: 3px cyan dashed; +} +.selected-camera .geojs-map.geojs-map:focus { + outline: 3px cyan dashed; +} + +.imageCursor { + z-index: 10; + position: fixed; + backface-visibility: hidden; + top: 0; + left: 0; + pointer-events: none; +} + +.controls { + bottom: 0; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.border-highlight[data-v-0d46f934] { + border-bottom: 1px solid gray; +} + +.type-checkbox[data-v-0d46f934] { + max-width: 80%; + overflow-wrap: anywhere; +} + +.hover-show-parent[data-v-0d46f934] .hover-show-child[data-v-0d46f934] { + display: none; +} +.hover-show-parent[data-v-0d46f934][data-v-0d46f934]:hover .hover-show-child[data-v-0d46f934] { + display: inherit; +} + +.outlined[data-v-0d46f934] { + background-color: gray; + color: #222; + font-weight: 600; + border-radius: 6px; + padding: 0 5px; + font-size: 12px; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.freeform-input[data-v-d679c59c] { + width: 150px; +} + +.groups[data-v-c26ed586] { + overflow-y: auto; + overflow-x: hidden; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.track-item[data-v-7a688bfe] { + border-radius: inherit; +} +.track-item[data-v-7a688bfe] .item-row[data-v-7a688bfe] { + width: 100%; +} +.track-item[data-v-7a688bfe] .type-color-box[data-v-7a688bfe] { + margin: 7px; + margin-top: 4px; + min-width: 15px; + max-width: 15px; + min-height: 15px; + max-height: 15px; +} + +.strcoller { + height: 100%; +} + +.trackHeader { + height: auto; +} + +.tracks { + overflow-y: auto; + overflow-x: hidden; +} +.tracks .v-input--checkbox label { + white-space: pre-wrap; +} + +.nowrap[data-v-a4da19c6] { + white-space: nowrap; + overflow: hidden; + max-width: var(--content-width); + text-overflow: ellipsis; +} + +.hover-show-parent[data-v-a4da19c6] .hover-show-child[data-v-a4da19c6] { + display: none; +} +.hover-show-parent[data-v-a4da19c6][data-v-a4da19c6]:hover .hover-show-child[data-v-a4da19c6] { + display: inherit; +} + +.outlined[data-v-a4da19c6] { + background-color: gray; + color: #222; + font-weight: 600; + border-radius: 6px; + padding: 0 5px; + font-size: 12px; +} + +.input-box { + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 4px; + padding: 0 6px; + color: white; +} + +.trackNumber { + font-family: monospace; + max-width: 80px; + overflow: hidden; + white-space: nowrap; + text-overflow: ellipsis; +} +.trackNumber:hover { + cursor: pointer; + font-weight: bolder; + text-decoration: underline; +} + +.freeform-input[data-v-07a75698] { + width: 135px; +} + +.select-input[data-v-07a75698] { + width: 120px; + background-color: #1e1e1e; + appearance: menulist; +} \ No newline at end of file diff --git a/client/dive-common/apispec.ts b/client/dive-common/apispec.ts index d8888c6b1..036ca7eb6 100644 --- a/client/dive-common/apispec.ts +++ b/client/dive-common/apispec.ts @@ -238,6 +238,122 @@ function useApi() { return use>(ApiSymbol); } +/** + * Interactive Segmentation Types + */ +export interface SegmentationPredictRequest { + /** Path to the image file */ + imagePath: string; + /** Point coordinates as [x, y] pairs */ + points: [number, number][]; + /** Point labels: 1 for foreground, 0 for background */ + pointLabels: number[]; + /** Optional low-res mask from previous prediction for refinement */ + maskInput?: number[][]; + /** Whether to return multiple mask options */ + multimaskOutput?: boolean; +} + +export interface SegmentationPredictResponse { + /** Whether the prediction succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Bounding box [x_min, y_min, x_max, y_max] */ + bounds?: [number, number, number, number]; + /** Quality score from segmentation model */ + score?: number; + /** Low-res mask for subsequent refinement */ + lowResMask?: number[][]; + /** Mask dimensions [height, width] */ + maskShape?: [number, number]; + /** RLE-encoded full-resolution mask for display: [[value, count], ...] */ + rleMask?: [number, number][]; +} + +export interface SegmentationStatusResponse { + /** Whether segmentation is available */ + available: boolean; + /** Whether the model is currently loaded */ + loaded?: boolean; + /** Whether the service is ready for predictions */ + ready?: boolean; +} + +/** + * Text Query Types for open-vocabulary detection/segmentation + */ + +/** A single detection returned from a text query */ +export interface TextQueryDetection { + /** Bounding box [x1, y1, x2, y2] */ + box: [number, number, number, number]; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Confidence score */ + score: number; + /** Label/class name (often the query text) */ + label: string; + /** Low-res mask for refinement (optional) */ + lowResMask?: number[][]; +} + +export interface TextQueryRequest { + /** Path to the image file */ + imagePath: string; + /** Text query describing what to find (e.g., "fish", "person swimming") */ + text: string; + /** Confidence threshold for detections (default: 0.3) */ + boxThreshold?: number; + /** Maximum number of detections to return (default: 10) */ + maxDetections?: number; + /** Optional boxes to refine [x1, y1, x2, y2][] */ + boxes?: [number, number, number, number][]; + /** Optional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Optional masks to refine */ + masks?: number[][][]; +} + +export interface TextQueryResponse { + /** Whether the query succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** List of detections found */ + detections?: TextQueryDetection[]; + /** The original query text */ + query?: string; + /** Whether fallback method was used (no native text support) */ + fallback?: boolean; +} + +export interface RefineDetectionsRequest { + /** Path to the image file */ + imagePath: string; + /** Detections to refine */ + detections: TextQueryDetection[]; + /** Optional additional keypoints for refinement [x, y][] */ + points?: [number, number][]; + /** Labels for additional points: 1 for foreground, 0 for background */ + pointLabels?: number[]; + /** Whether to include refined masks in response */ + refineMasks?: boolean; +} + +export interface RefineDetectionsResponse { + /** Whether the refinement succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Refined detections */ + detections?: TextQueryDetection[]; +} + export { provideApi, useApi, diff --git a/client/dive-common/components/DeleteControls.vue b/client/dive-common/components/DeleteControls.vue index 50ecdd08a..3d10226e5 100644 --- a/client/dive-common/components/DeleteControls.vue +++ b/client/dive-common/components/DeleteControls.vue @@ -24,8 +24,20 @@ export default Vue.extend({ if (this.editingMode === 'rectangle') { return true; // deleting rectangle is unsupported } + if (this.editingMode === 'Point') { + return true; // Point mode uses reset instead of delete + } return false; }, + isPolygonMode(): boolean { + return this.editingMode === 'Polygon'; + }, + editModeIcon(): string { + if (this.editingMode === 'Polygon') return 'mdi-vector-polygon'; + if (this.editingMode === 'LineString') return 'mdi-vector-line'; + if (this.editingMode === 'rectangle') return 'mdi-vector-square'; + return 'mdi-shape'; + }, }, methods: { @@ -39,33 +51,104 @@ export default Vue.extend({ this.$emit('delete-annotation'); } }, + addHole() { + this.$emit('add-hole'); + }, + addPolygon() { + this.$emit('add-polygon'); + }, }, }); diff --git a/client/dive-common/components/EditorMenu.vue b/client/dive-common/components/EditorMenu.vue index a7bea7bb8..5bf223e42 100644 --- a/client/dive-common/components/EditorMenu.vue +++ b/client/dive-common/components/EditorMenu.vue @@ -11,6 +11,7 @@ import { flatten } from 'lodash'; import { Mousetrap } from 'vue-media-annotator/types'; import { EditAnnotationTypes, VisibleAnnotationTypes } from 'vue-media-annotator/layers'; import Recipe from 'vue-media-annotator/recipe'; +import SegmentationPointClick from 'dive-common/recipes/segmentationpointclick'; import AnnotationVisibilityMenu from './AnnotationVisibilityMenu.vue'; @@ -19,6 +20,7 @@ interface ButtonData { icon: string; type?: VisibleAnnotationTypes; active: boolean; + loading?: boolean; mousetrap?: Mousetrap[]; description: string; click: () => void; @@ -67,7 +69,14 @@ export default defineComponent({ default: true, }, }, - emits: ['set-annotation-state', 'update:tail-settings', 'update:show-user-created-icon'], + emits: [ + 'set-annotation-state', + 'update:tail-settings', + 'update:show-user-created-icon', + 'text-query-init', + 'text-query', + 'text-query-all-frames', + ], setup(props, { emit }) { const toolTimeTimeout = ref(null); const STORAGE_KEY = 'editorMenu.editButtonsExpanded'; @@ -85,6 +94,59 @@ export default defineComponent({ localStorage.setItem(STORAGE_KEY, String(value)); }); + // Text query state + const textQueryDialogOpen = ref(false); + const textQueryInput = ref(''); + const textQueryLoading = ref(false); + const textQueryThreshold = ref(0.3); + const textQueryInitializing = ref(false); + const textQueryServiceError = ref(''); + const textQueryAllFrames = ref(false); + + const openTextQueryDialog = () => { + textQueryDialogOpen.value = true; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryAllFrames.value = false; + textQueryInitializing.value = true; + emit('text-query-init'); + }; + + const closeTextQueryDialog = () => { + textQueryDialogOpen.value = false; + textQueryInput.value = ''; + textQueryServiceError.value = ''; + textQueryInitializing.value = false; + textQueryAllFrames.value = false; + }; + + const onTextQueryServiceReady = (success: boolean, error?: string) => { + textQueryInitializing.value = false; + if (!success) { + textQueryServiceError.value = error || 'Text query service is not available'; + } + }; + + const submitTextQuery = () => { + if (!textQueryInput.value.trim()) { + return; + } + textQueryLoading.value = true; + if (textQueryAllFrames.value) { + emit('text-query-all-frames', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } else { + emit('text-query', { + text: textQueryInput.value.trim(), + boxThreshold: textQueryThreshold.value, + }); + } + closeTextQueryDialog(); + textQueryLoading.value = false; + }; + const modeToolTips = { Creating: { rectangle: 'Drag to draw rectangle. Press ESC to exit.', @@ -121,6 +183,7 @@ export default defineComponent({ id: r.name, icon: r.icon.value || 'mdi-pencil', active: props.editingTrack && r.active.value, + loading: r.loading?.value ?? false, description: r.name, click: () => r.activate(), mousetrap: [ @@ -134,7 +197,13 @@ export default defineComponent({ ]; }); - const mousetrap = computed((): Mousetrap[] => flatten(editButtons.value.map((b) => b.mousetrap || []))); + const mousetrap = computed((): Mousetrap[] => [ + ...flatten(editButtons.value.map((b) => b.mousetrap || [])), + { + bind: 't', + handler: () => openTextQueryDialog(), + }, + ]); const activeEditButton = computed(() => editButtons.value.find((b) => b.active) || editButtons.value[0]); @@ -161,6 +230,13 @@ export default defineComponent({ return { text: 'Not editing', icon: 'mdi-pencil-off-outline', color: '' }; }); + const activeSegmentationRecipe = computed((): SegmentationPointClick | null => { + const segRecipe = props.recipes.find( + (r) => r instanceof SegmentationPointClick && r.active.value, + ) as SegmentationPointClick | undefined; + return segRecipe || null; + }); + const editingTooltip = computed(() => { if (props.editingDetails === 'disabled' || !props.editingMode || typeof props.editingMode !== 'string') { return ''; @@ -194,6 +270,19 @@ export default defineComponent({ toggleEditButtonsExpanded, activeEditButton, editButtonsMenuKey, + activeSegmentationRecipe, + // Text query + textQueryDialogOpen, + textQueryInput, + textQueryLoading, + textQueryThreshold, + textQueryInitializing, + textQueryServiceError, + textQueryAllFrames, + openTextQueryDialog, + closeTextQueryDialog, + onTextQueryServiceReady, + submitTextQuery, }; }, }); @@ -244,7 +333,7 @@ export default defineComponent({ + + +
T:
+ mdi-text-search +
+ + - + + @@ -335,6 +463,103 @@ export default defineComponent({ @update:show-user-created-icon="$emit('update:show-user-created-icon', $event)" /> + + + + + + + mdi-text-search + + Text Query + + + +
+ +

+ Loading text query model... +

+
+ +
+ + mdi-alert-circle + +

+ {{ textQueryServiceError }} +

+
+ + +
+ + + + {{ textQueryServiceError ? 'Close' : 'Cancel' }} + + + Search + + +
+
diff --git a/client/dive-common/components/Sidebar.vue b/client/dive-common/components/Sidebar.vue index d7b277dc9..7642c1fdd 100644 --- a/client/dive-common/components/Sidebar.vue +++ b/client/dive-common/components/Sidebar.vue @@ -18,6 +18,7 @@ import { } from 'vue-media-annotator/provides'; import { clientSettings } from 'dive-common/store/settings'; +import ConfidenceFilter from 'dive-common/components/ConfidenceFilter.vue'; import TrackDetailsPanel from 'dive-common/components/TrackDetailsPanel.vue'; import TrackSettingsPanel from 'dive-common/components/TrackSettingsPanel.vue'; import TypeSettingsPanel from 'dive-common/components/TypeSettingsPanel.vue'; @@ -27,6 +28,7 @@ import { usePrompt } from 'dive-common/vue-utilities/prompt-service'; export default defineComponent({ components: { + ConfidenceFilter, StackedVirtualSidebarContainer, TrackDetailsPanel, TrackSettingsPanel, @@ -43,6 +45,14 @@ export default defineComponent({ type: Boolean, default: true, }, + horizontal: { + type: Boolean, + default: false, + }, + isStereoDataset: { + type: Boolean, + default: false, + }, }, setup() { @@ -63,7 +73,9 @@ export default defineComponent({ const styleManager = useTrackStyleManager(); const data = reactive({ - currentTab: 'tracks' as 'tracks' | 'attributes', + currentTab: 'tracks' as 'tracks' | 'attributes' | 'types', + // For horizontal mode, cycle through 3 tabs + horizontalTab: 'tracks' as 'tracks' | 'attributes' | 'types', }); function swapTabs() { @@ -74,6 +86,28 @@ export default defineComponent({ } } + function cycleHorizontalTabs() { + if (data.horizontalTab === 'tracks') { + data.horizontalTab = 'attributes'; + } else if (data.horizontalTab === 'attributes') { + data.horizontalTab = 'types'; + } else { + data.horizontalTab = 'tracks'; + } + } + + const horizontalTabIcon = computed(() => { + if (data.horizontalTab === 'tracks') return 'mdi-format-list-bulleted'; + if (data.horizontalTab === 'attributes') return 'mdi-card-text'; + return 'mdi-filter-variant'; + }); + + const horizontalTabTooltip = computed(() => { + if (data.horizontalTab === 'tracks') return 'Detection List (click to cycle)'; + if (data.horizontalTab === 'attributes') return 'Detection Details (click to cycle)'; + return 'Type Filters (click to cycle)'; + }); + function doToggleMerge() { if (toggleMerge().length) { data.currentTab = 'attributes'; @@ -121,17 +155,23 @@ export default defineComponent({ readOnlyMode, styleManager, disableAnnotationFilters: trackFilterControls.disableAnnotationFilters, + confidenceFilters: trackFilterControls.confidenceFilters, visible, + horizontalTabIcon, + horizontalTabTooltip, /* methods */ doToggleMerge, swapTabs, + cycleHorizontalTabs, }; }, }); + + +
+ + + {{ horizontalTabTooltip }} + + +
+ +
+ +
+ +
+ + + +
+
+ +
+ +
+ +
+ + + +
+
diff --git a/client/dive-common/components/TrackSettingsPanel.vue b/client/dive-common/components/TrackSettingsPanel.vue index 71b1ec1b2..ba8672680 100644 --- a/client/dive-common/components/TrackSettingsPanel.vue +++ b/client/dive-common/components/TrackSettingsPanel.vue @@ -16,6 +16,10 @@ export default defineComponent({ type: Array as PropType>, required: true, }, + isStereoDataset: { + type: Boolean, + default: false, + }, }, setup(props) { @@ -33,6 +37,7 @@ export default defineComponent({ filterTracksByFrame: 'Filter the track list by those with detections in the current frame', autoZoom: 'Automatically zoom to the track when selected', showMultiCamToolbar: 'Show multi-camera tools in the top toolbar when a track is selected', + stereoInteractiveMode: 'When enabled, annotations created on one camera are automatically warped to the other camera using stereo disparity', }); const modes = ref(['Track', 'Detection']); // Add unknown as the default type to the typeList @@ -362,6 +367,47 @@ export default defineComponent({ + diff --git a/client/dive-common/recipes/headtail.ts b/client/dive-common/recipes/headtail.ts index 3e26686d2..6b48b944a 100644 --- a/client/dive-common/recipes/headtail.ts +++ b/client/dive-common/recipes/headtail.ts @@ -35,6 +35,9 @@ export default class HeadTail implements Recipe { private startWithHead: boolean; + /* Whether the track had bounds before line creation started */ + private hadBoundsOnCreate: boolean; + bus: Vue; toggleable: Ref; @@ -44,6 +47,7 @@ export default class HeadTail implements Recipe { constructor() { this.bus = new Vue(); this.startWithHead = true; + this.hadBoundsOnCreate = false; this.active = ref(false); this.name = 'HeadTail'; this.toggleable = ref(true); @@ -102,6 +106,37 @@ export default class HeadTail implements Recipe { return (results.filter((item) => item).length === coords.length); } + /** + * Compute a tight axis-aligned bounding box around coords, expanded by fraction + * (e.g. 0.10 = 10% larger in each dimension). + */ + private static tightBoundsExpanded( + coords: GeoJSON.Position[], + fraction: number, + ): GeoJSON.Polygon[] { + const xs = coords.map((c) => c[0]); + const ys = coords.map((c) => c[1]); + const minX = Math.min(...xs); + const maxX = Math.max(...xs); + const minY = Math.min(...ys); + const maxY = Math.max(...ys); + const width = maxX - minX; + const height = maxY - minY; + // Use the other dimension as fallback for degenerate (zero-width/height) cases + const padX = width * fraction || height * fraction; + const padY = height * fraction || width * fraction; + return [{ + type: 'Polygon', + coordinates: [[ + [minX - padX, minY - padY], + [minX - padX, maxY + padY], + [maxX + padX, maxY + padY], + [maxX + padX, minY - padY], + [minX - padX, minY - padY], + ]], + }]; + } + private static makeGeom(ls: GeoJSON.LineString, startWithHead: boolean) { const firstFeature: GeoJSON.Feature = { type: 'Feature', @@ -187,14 +222,19 @@ export default class HeadTail implements Recipe { } as GeoJSON.LineString; } if (geom.coordinates.length === 2) { - let union = HeadTail.findBounds(geom, PaddingVector); + let union: GeoJSON.Polygon[]; if (bounds !== null) { // If both are inside of the bbox don't adjust the union if (HeadTail.coordsInBounds(bounds, geom.coordinates)) { union = []; } else if (tail.length > 0) { // If creating new box add padding union = HeadTail.findBounds(geom, PaddingVectorZero); + } else { + union = HeadTail.findBounds(geom, PaddingVector); } + } else { + // No existing box: make box 10% larger than tight box around vertices + union = HeadTail.tightBoundsExpanded(geom.coordinates, 0.10); } // Both head and tail placed, replace them. return { @@ -206,7 +246,8 @@ export default class HeadTail implements Recipe { } as UpdateResponse; } if (geom.coordinates.length === 1) { - // Only the head placed so far + // Only the head placed so far — record if the track already had bounds + this.hadBoundsOnCreate = bounds !== null; let union = HeadTail.findBounds(geom, PaddingVector); if (bounds !== null) { if (HeadTail.coordsInBounds(bounds, geom.coordinates)) { @@ -226,6 +267,16 @@ export default class HeadTail implements Recipe { /** * IF recipe isn't active, but the key matches, we are editing */ + if (this.active.value && !this.hadBoundsOnCreate) { + // Creating a new line on a track without a pre-existing box: + // use unionWithoutBounds to replace interim bounds with 10% expanded box + return { + ...EmptyResponse, + data: HeadTail.makeGeom(linestring.geometry, true), + unionWithoutBounds: HeadTail.tightBoundsExpanded(linestring.geometry.coordinates, 0.20), + done: true, + }; + } return { ...EmptyResponse, data: HeadTail.makeGeom(linestring.geometry, true), diff --git a/client/dive-common/recipes/polygonbase.ts b/client/dive-common/recipes/polygonbase.ts index 1caae557a..8fe53cea3 100644 --- a/client/dive-common/recipes/polygonbase.ts +++ b/client/dive-common/recipes/polygonbase.ts @@ -8,6 +8,49 @@ import { EditAnnotationTypes } from 'vue-media-annotator/layers'; const EmptyResponse = { data: {}, union: [], unionWithoutBounds: [] }; +/** + * Check if a point is inside a polygon using ray casting algorithm + * @param point [x, y] coordinates + * @param polygon array of [x, y] coordinates forming the polygon (outer ring only) + * @returns true if point is inside polygon + */ +function isPointInsidePolygon(point: [number, number], polygon: GeoJSON.Position[]): boolean { + const [x, y] = point; + let inside = false; + + for (let i = 0, j = polygon.length - 1; i < polygon.length; j = i, i += 1) { + const xi = polygon[i][0]; + const yi = polygon[i][1]; + const xj = polygon[j][0]; + const yj = polygon[j][1]; + + const intersect = ((yi > y) !== (yj > y)) + && (x < ((xj - xi) * (y - yi)) / (yj - yi) + xi); + + if (intersect) { + inside = !inside; + } + } + + return inside; +} + +/** + * Check if all vertices of polygon P are inside polygon E + * @param innerPolygon polygon to check if it's inside + * @param outerPolygon polygon to check if it contains the inner polygon + * @returns true if all vertices of innerPolygon are inside outerPolygon + */ +function isPolygonInsidePolygon( + innerPolygon: GeoJSON.Position[], + outerPolygon: GeoJSON.Position[], +): boolean { + // Check if all vertices of the inner polygon are inside the outer polygon + return innerPolygon.every( + (vertex) => isPointInsidePolygon([vertex[0], vertex[1]], outerPolygon), + ); +} + export default class PolygonBoundsExpand implements Recipe { active: Ref; @@ -19,12 +62,41 @@ export default class PolygonBoundsExpand implements Recipe { bus: Vue; + // Mode for adding polygons: 'normal', 'hole', or 'newPolygon' + addingMode: Ref<'normal' | 'hole' | 'newPolygon'>; + constructor() { this.bus = new Vue(); this.active = ref(false); this.name = 'PolygonBase'; this.toggleable = ref(true); this.icon = ref('mdi-vector-polygon'); + this.addingMode = ref('normal'); + } + + setAddingHole() { + this.addingMode.value = 'hole'; + // Emit activate event with special key to trigger creation mode + // The special key ensures no geometry matches, forcing creation mode + this.bus.$emit('activate', { + editing: 'Polygon' as EditAnnotationTypes, + key: '__adding_hole__', + recipeName: this.name, + }); + } + + setAddingPolygon(newKey: string) { + this.addingMode.value = 'newPolygon'; + // Emit activate event with new key to trigger creation mode + this.bus.$emit('activate', { + editing: 'Polygon' as EditAnnotationTypes, + key: newKey, + recipeName: this.name, + }); + } + + resetAddingMode() { + this.addingMode.value = 'normal'; } update( @@ -37,12 +109,89 @@ export default class PolygonBoundsExpand implements Recipe { if (data.length === 1 && mode === 'editing' && this.active.value) { const poly = data[0].geometry; if (poly.type === 'Polygon') { + const newPolyCoords = poly.coordinates[0] as GeoJSON.Position[]; + const currentMode = this.addingMode.value; + + // Reset adding mode after processing + this.resetAddingMode(); + + if (currentMode === 'hole' || key === '__adding_hole__') { + // Adding a hole - find the first polygon and add hole to it + const existingPolygons = track.getPolygonFeatures(frameNum); + if (existingPolygons.length > 0) { + // Add as hole to the first (default) polygon + const targetPoly = existingPolygons[0]; + // Create updated polygon geometry with the hole added + const updatedCoordinates = [ + ...targetPoly.geometry.coordinates, + newPolyCoords, + ]; + const updatedPolygon: GeoJSON.Polygon = { + type: 'Polygon', + coordinates: updatedCoordinates, + }; + const updatedFeature: GeoJSON.Feature = { + type: 'Feature', + properties: { key: targetPoly.key }, + geometry: updatedPolygon, + }; + // Return data like add polygon mode so right-click behavior is consistent + return { + data: { + [targetPoly.key]: [updatedFeature], + }, + union: [], + done: true, + unionWithoutBounds: [], + newSelectedKey: targetPoly.key, // Set to target polygon's key for proper mode transition + }; + } + // No existing polygon, treat as normal (create first polygon) + return { + data: { + '': data, + }, + union: [], + done: true, + unionWithoutBounds: [poly], + newSelectedKey: '', + }; + } + + if (currentMode === 'newPolygon') { + // Adding a new separate polygon - key should already be set to new value + const useKey = key || track.getNextPolygonKey(frameNum); + const newFeature: GeoJSON.Feature = { + type: 'Feature', + properties: { key: useKey }, + geometry: poly, + }; + return { + data: { + [useKey]: [newFeature], + }, + union: [poly], // Use union to EXPAND bounds, not replace them + done: true, + unionWithoutBounds: [], + newSelectedKey: '', // Reset to default polygon for future edits + }; + } + + // Standard case: save polygon with the given key + // Calculate bounds from ALL polygons in the detection, not just the edited one + const allPolygons = track.getPolygonFeatures(frameNum); + const otherPolygons: GeoJSON.Polygon[] = allPolygons + .filter((p) => p.key !== (key || '')) + .map((p) => p.geometry); + return { data: { [key || '']: data, }, - union: [], + // Use union with other polygons to ensure bounds encompass all + union: otherPolygons, done: true, + // The edited polygon replaces the base bounds unionWithoutBounds: [poly], }; } @@ -50,19 +199,61 @@ export default class PolygonBoundsExpand implements Recipe { return EmptyResponse; } + /** + * Add a polygon as a hole to an existing polygon, or as a new separate polygon. + * Call this method explicitly when auto-hole detection is desired. + */ + // eslint-disable-next-line class-methods-use-this + addPolygonWithHoleDetection( + frameNum: number, + track: Track, + poly: GeoJSON.Polygon, + key?: string, + ) { + const newPolyCoords = poly.coordinates[0] as GeoJSON.Position[]; + + // Get existing polygons for this frame + const existingPolygons = track.getPolygonFeatures(frameNum); + + // Check if this is an edit to an existing polygon (key matches) + const isExistingEdit = existingPolygons.some((p) => p.key === (key || '')); + + if (!isExistingEdit && existingPolygons.length > 0) { + // This is a new polygon - check if it should be a hole in an existing polygon + const containingPoly = existingPolygons.find((existingPoly) => { + const outerRing = existingPoly.geometry.coordinates[0] as GeoJSON.Position[]; + return isPolygonInsidePolygon(newPolyCoords, outerRing); + }); + + if (containingPoly) { + // New polygon is inside existing polygon - add as hole + track.addHoleToPolygon(frameNum, containingPoly.key, newPolyCoords); + return { isHole: true, key: containingPoly.key }; + } + + // Not inside any existing polygon - add as new separate polygon with auto-key + const newKey = track.getNextPolygonKey(frameNum); + return { isHole: false, key: newKey }; + } + + // Standard case: use provided key or default + return { isHole: false, key: key || '' }; + } + // eslint-disable-next-line class-methods-use-this delete(frame: number, track: Track, key: string, type: EditAnnotationTypes) { - if (key === '' && type === 'Polygon') { - track.removeFeatureGeometry(frame, { key: '', type: 'Polygon' }); + if (type === 'Polygon') { + // Remove polygon with the specified key (supports multiple polygons) + track.removeFeatureGeometry(frame, { key, type: 'Polygon' }); } } // eslint-disable-next-line class-methods-use-this deletePoint(frame: number, track: Track, idx: number, key: string, type: EditAnnotationTypes) { - if (key === '' && type === 'Polygon') { + if (type === 'Polygon') { const geoJsonFeatures = track.getFeatureGeometry(frame, { type: 'Polygon', - key: '', + key, }); if (geoJsonFeatures.length === 0) return; const clone = cloneDeep(geoJsonFeatures[0]); diff --git a/client/dive-common/recipes/segmentationpointclick.ts b/client/dive-common/recipes/segmentationpointclick.ts new file mode 100644 index 000000000..805794f86 --- /dev/null +++ b/client/dive-common/recipes/segmentationpointclick.ts @@ -0,0 +1,759 @@ +/** + * Segmentation Point-Click Recipe + * + * Allows users to click on objects to automatically generate segmentation + * masks using point-based segmentation models. + * + * Usage: + * - Activate with 's' hotkey or Segment button + * - Left-click: Add foreground point (include in segmentation) + * - Shift+click or Middle-click: Add background point (exclude from segmentation) + * - Right-click: Confirm and lock the annotation + * - Enter: Confirm and commit the segmentation + * - Escape: Cancel and clear points + * + * Multi-frame support: + * - Points are tracked per-frame + * - When switching frames, previous frame's points are saved + * - Visual dots only show for the current frame + * - Confirming commits all frames with valid polygons + * + * Error handling: + * - If first point fails, segmentation is deactivated + * - If subsequent points fail, the point is rejected with message + * "Latest point rejected by segmentation method" + */ + +import Vue, { ref, Ref } from 'vue'; + +import Track from 'vue-media-annotator/track'; +import Recipe, { UpdateResponse } from 'vue-media-annotator/recipe'; +import { EditAnnotationTypes } from 'vue-media-annotator/layers'; +import { Mousetrap } from 'vue-media-annotator/types'; +import { SegmentationPredictRequest, SegmentationPredictResponse } from 'dive-common/apispec'; + +export const SegmentationPolygonKey = 'SegmentationPolygon'; + +const EmptyResponse: UpdateResponse = { + data: {}, + union: [], + unionWithoutBounds: [], +}; + +export interface SegmentationRecipeOptions { + /** + * Function to call segmentation predict API (platform-specific) + * @param request - The prediction request with points and labels + * @param frameNum - The current frame number (useful for web platform) + */ + predictFn: (request: SegmentationPredictRequest, frameNum: number) => Promise; + /** Function to get image path for current frame (used by desktop platform) */ + getImagePath: (frameNum: number) => string; + /** + * Optional function to initialize the segmentation service. + * Called when the recipe is activated (user clicks Segment button). + * Should throw an error if initialization fails. + */ + initializeServiceFn?: () => Promise; +} + +/** Callback data when prediction completes */ +export interface SegmentationPredictionResult { + polygon: [number, number][]; + bounds: [number, number, number, number] | null; + frameNum: number; + /** RLE-encoded full-resolution mask for display */ + rleMask?: [number, number][]; + /** Mask dimensions [height, width] */ + maskShape?: [number, number]; + /** Control points used for this prediction (for stereo warping) */ + controlPoints?: { + points: [number, number][]; + labels: number[]; + }; +} + +/** Data stored per frame for multi-frame segmentation */ +interface FrameSegmentationData { + points: [number, number][]; + labels: number[]; + polygon: [number, number][] | null; + bounds: [number, number, number, number] | null; + lowResMask: number[][] | null; + rleMask: [number, number][] | null; + maskShape: [number, number] | null; +} + +/** Result containing all frames for multi-frame confirmation */ +export interface MultiFrameSegmentationResult { + /** Map of frame number to segmentation result */ + frames: Map; +} + +/** + * Segmentation Point-Click Recipe + * + * This recipe captures point clicks and uses segmentation models to generate polygons. + */ +export default class SegmentationPointClick implements Recipe { + active: Ref; + + name: string; + + bus: Vue; + + toggleable: Ref; + + icon: Ref; + + /** Platform-specific segmentation predict function */ + private predictFn: ((request: SegmentationPredictRequest, frameNum: number) => Promise) | null = null; + + /** Function to get image path for current frame */ + private getImagePath: ((frameNum: number) => string) | null = null; + + /** Function to initialize the segmentation service (called on activation) */ + private initializeServiceFn: (() => Promise) | null = null; + + /** Whether the service has been successfully initialized */ + private serviceInitialized: boolean = false; + + /** Whether activation is pending (waiting for async init to complete) */ + private pendingActivation: boolean = false; + + /** Accumulated points for current frame's segmentation */ + private points: [number, number][] = []; + + /** Labels for accumulated points (1=foreground, 0=background) */ + private pointLabels: number[] = []; + + /** Low-res mask from last prediction (for refinement) */ + private lastLowResMask: number[][] | null = null; + + /** Pending polygon from async prediction */ + private pendingPolygon: [number, number][] | null = null; + + /** Pending bounds from async prediction */ + private pendingBounds: [number, number, number, number] | null = null; + + /** Pending RLE mask from async prediction (for display) */ + private pendingRleMask: [number, number][] | null = null; + + /** Pending mask shape from async prediction */ + private pendingMaskShape: [number, number] | null = null; + + /** Whether a prediction is currently in progress */ + private isPredicting: boolean = false; + + /** Current frame number */ + private currentFrame: number = 0; + + /** Per-frame segmentation data for multi-frame support */ + private frameData: Map = new Map(); + + /** Whether the recipe is currently loading (initializing the service) */ + loading: Ref; + + constructor() { + this.bus = new Vue(); + this.active = ref(false); + this.name = 'Segment'; + this.toggleable = ref(true); + this.icon = ref('mdi-auto-fix'); + this.loading = ref(false); + } + + /** + * Initialize the recipe with platform-specific options. + * Must be called before using the recipe. + */ + initialize(options: SegmentationRecipeOptions): void { + this.predictFn = options.predictFn; + this.getImagePath = options.getImagePath; + this.initializeServiceFn = options.initializeServiceFn || null; + // Reset service initialization state when re-initializing + this.serviceInitialized = false; + } + + /** + * Reset the recipe state (clear accumulated points for all frames) + */ + private reset(): void { + this.points = []; + this.pointLabels = []; + this.lastLowResMask = null; + this.pendingPolygon = null; + this.pendingBounds = null; + this.pendingRleMask = null; + this.pendingMaskShape = null; + this.isPredicting = false; + this.frameData.clear(); + // Clear visual feedback for points + this.bus.$emit('points-updated', { points: [], labels: [], frameNum: this.currentFrame }); + } + + /** + * Reset only the current frame's points (used when clearing current frame) + */ + private resetCurrentFrame(): void { + this.points = []; + this.pointLabels = []; + this.lastLowResMask = null; + this.pendingPolygon = null; + this.pendingBounds = null; + this.pendingRleMask = null; + this.pendingMaskShape = null; + this.frameData.delete(this.currentFrame); + // Clear visual feedback for points + this.bus.$emit('points-updated', { points: [], labels: [], frameNum: this.currentFrame }); + } + + /** + * Save current frame's data to frameData map + */ + private saveCurrentFrameData(): void { + if (this.points.length > 0 || this.pendingPolygon || this.pendingRleMask) { + this.frameData.set(this.currentFrame, { + points: [...this.points], + labels: [...this.pointLabels], + polygon: this.pendingPolygon ? [...this.pendingPolygon] : null, + bounds: this.pendingBounds ? [...this.pendingBounds] as [number, number, number, number] : null, + lowResMask: this.lastLowResMask, + rleMask: this.pendingRleMask ? [...this.pendingRleMask] : null, + maskShape: this.pendingMaskShape ? [...this.pendingMaskShape] as [number, number] : null, + }); + } + } + + /** + * Load frame data from frameData map into current state + */ + private loadFrameData(frameNum: number): void { + const data = this.frameData.get(frameNum); + if (data) { + this.points = [...data.points]; + this.pointLabels = [...data.labels]; + this.pendingPolygon = data.polygon ? [...data.polygon] : null; + this.pendingBounds = data.bounds ? [...data.bounds] as [number, number, number, number] : null; + this.lastLowResMask = data.lowResMask; + this.pendingRleMask = data.rleMask ? [...data.rleMask] : null; + this.pendingMaskShape = data.maskShape ? [...data.maskShape] as [number, number] : null; + } else { + this.points = []; + this.pointLabels = []; + this.pendingPolygon = null; + this.pendingBounds = null; + this.lastLowResMask = null; + this.pendingRleMask = null; + this.pendingMaskShape = null; + } + } + + /** + * Handle frame change - save current frame's data and load new frame's data + * Emits event to clear visual dots when moving to a different frame + */ + handleFrameChange(newFrame: number): void { + if (!this.active.value) return; + if (newFrame === this.currentFrame) return; + + // Save current frame's data + this.saveCurrentFrameData(); + + // Update current frame + this.currentFrame = newFrame; + + // Load new frame's data (if any) + this.loadFrameData(newFrame); + + // Update visual feedback for the new frame + // If new frame has no points, this clears the display + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum: newFrame, + }); + + // If new frame has a pending prediction (polygon or mask), emit it + if (this.pendingPolygon || this.pendingRleMask) { + this.bus.$emit('prediction-ready', { + polygon: this.pendingPolygon || [], + bounds: this.pendingBounds, + frameNum: newFrame, + rleMask: this.pendingRleMask || undefined, + maskShape: this.pendingMaskShape || undefined, + } as SegmentationPredictionResult); + } + } + + /** + * Make segmentation prediction with current points + * @param frameNum - The frame number to predict on + * @param isFirstPoint - Whether this is the first point (affects error handling) + */ + private async makePrediction(frameNum: number, isFirstPoint: boolean = false): Promise { + if (!this.predictFn || !this.getImagePath) { + return; + } + + if (this.points.length === 0) { + return; + } + + this.isPredicting = true; + + try { + const imagePath = this.getImagePath(frameNum); + + if (!imagePath) { + throw new Error(`No image path available for frame ${frameNum}`); + } + + const request: SegmentationPredictRequest = { + imagePath, + points: this.points, + pointLabels: this.pointLabels, + maskInput: this.lastLowResMask ?? undefined, + multimaskOutput: this.points.length === 1, // Use multimask for single point + }; + + const response = await this.predictFn(request, frameNum); + + if (response.success && response.polygon && response.polygon.length > 0) { + this.pendingPolygon = response.polygon; + this.pendingBounds = response.bounds ?? null; + this.lastLowResMask = response.lowResMask ?? null; + this.pendingRleMask = response.rleMask ?? null; + this.pendingMaskShape = response.maskShape ?? null; + + // Emit event to notify that prediction is ready + // Include frameNum so listeners can update the correct frame + // Includes mask data for display during editing + this.bus.$emit('prediction-ready', { + polygon: response.polygon, + bounds: response.bounds, + score: response.score, + frameNum, + rleMask: response.rleMask, + maskShape: response.maskShape, + } as SegmentationPredictionResult & { score?: number }); + } else { + // Prediction returned an error - handle point rejection + this.handlePredictionError(response.error || 'Prediction failed', isFirstPoint, frameNum); + } + } catch (error) { + // Exception during prediction - handle point rejection + const errorMessage = error instanceof Error ? error.message : 'Prediction failed'; + this.handlePredictionError(errorMessage, isFirstPoint, frameNum); + } finally { + this.isPredicting = false; + } + } + + /** + * Handle prediction errors - remove rejected point and stay in edit mode. + * The user can manually reset or cancel if they want to start over. + */ + private handlePredictionError(originalError: string, isFirstPoint: boolean, frameNum: number): void { + // Remove the rejected point regardless of whether it was the first point + this.points.pop(); + this.pointLabels.pop(); + + // Update icon to reflect new point count + this.icon.value = this.points.length > 0 + ? `mdi-numeric-${Math.min(this.points.length, 9)}-circle` + : 'mdi-auto-fix'; + + // Emit updated points to remove the rejected point from visual display + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum, + }); + + // Show error message - different message for first point vs subsequent + if (isFirstPoint) { + this.bus.$emit('prediction-error', originalError); + } else { + this.bus.$emit('prediction-error', 'Latest point rejected by segmentation method'); + } + // Stay in edit mode - let the user decide to reset or try again + } + + /** + * Recipe update handler - called when user draws/clicks + */ + update( + mode: 'in-progress' | 'editing', + frameNum: number, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + track: Track, + data: GeoJSON.Feature[], + // eslint-disable-next-line @typescript-eslint/no-unused-vars + key?: string, + ): Readonly { + // Only process if this recipe is active + if (!this.active.value) { + return EmptyResponse; + } + + this.currentFrame = frameNum; + + // Look for point features in the data + const pointFeatures = data.filter( + (d) => d.geometry.type === 'Point', + ) as GeoJSON.Feature[]; + + // Handle Point clicks - Point mode emits 'editing' directly (not 'in-progress') + // because points complete immediately. We need to handle both modes. + if (pointFeatures.length > 0) { + const point = pointFeatures[0]; + const coords = point.geometry.coordinates as [number, number]; + + // Check if this is the same point we already processed (avoid duplicates) + const lastPoint = this.points[this.points.length - 1]; + const isDuplicate = lastPoint + && lastPoint[0] === coords[0] + && lastPoint[1] === coords[1]; + + if (!isDuplicate) { + // Track if this is the first point (for error handling) + const isFirstPoint = this.points.length === 0; + + // Determine if this is a foreground or background point + // Check for shift key or middle-click via properties (if available) + const isBackground = point.properties?.background === true; + const label = isBackground ? 0 : 1; + + // Add point to accumulator + this.points.push(coords); + this.pointLabels.push(label); + + // Update icon to show point count + this.icon.value = this.points.length > 1 + ? `mdi-numeric-${Math.min(this.points.length, 9)}-circle` + : 'mdi-auto-fix'; + + // Emit point update for visual feedback (green=foreground, red=background) + this.bus.$emit('points-updated', { + points: [...this.points], + labels: [...this.pointLabels], + frameNum, + }); + + // Trigger segmentation prediction asynchronously + // The prediction result will be handled by the event listener in Viewer + // Pass isFirstPoint so error handling knows whether to reject the point + this.makePrediction(frameNum, isFirstPoint); + } + + // For Point mode, we DON'T return polygon data here. + // The polygon will be set directly on the track when prediction completes + // via the 'prediction-ready' event handler in Viewer.vue. + // Return done: false to keep the track in edit mode. + return { + data: {}, + union: [], + unionWithoutBounds: [], + done: false, + }; + } + + // If we're in editing mode with non-point data and have a pending polygon, commit it + if (mode === 'editing' && this.pendingPolygon && this.pendingPolygon.length > 2) { + const polygon: GeoJSON.Feature = { + type: 'Feature', + geometry: { + type: 'Polygon', + coordinates: [this.pendingPolygon], + }, + properties: {}, + }; + + const unionPolygon = this.pendingBounds + ? SegmentationPointClick.boundsToPolygon(this.pendingBounds) + : null; + + // Clear state after committing + this.reset(); + this.deactivate(); + + return { + data: { + [SegmentationPolygonKey]: [polygon], + }, + union: unionPolygon ? [unionPolygon] : [], + unionWithoutBounds: [], + newSelectedKey: SegmentationPolygonKey, + done: true, + }; + } + + return EmptyResponse; + } + + /** + * Convert bounds to a GeoJSON Polygon for union calculation + */ + private static boundsToPolygon(bounds: [number, number, number, number]): GeoJSON.Polygon { + const [minX, minY, maxX, maxY] = bounds; + return { + type: 'Polygon', + coordinates: [[ + [minX, minY], + [maxX, minY], + [maxX, maxY], + [minX, maxY], + [minX, minY], + ]], + }; + } + + /** + * Handle deletion of segmentation geometry + */ + delete(frame: number, track: Track, key: string, type: EditAnnotationTypes): void { + if (key === SegmentationPolygonKey && type === 'Polygon') { + track.removeFeatureGeometry(frame, { type: 'Polygon', key: SegmentationPolygonKey }); + this.reset(); + } + } + + /** + * Handle point deletion (not applicable for segmentation, but required by interface) + */ + deletePoint( + frame: number, + track: Track, + idx: number, + key: string, + type: EditAnnotationTypes, + ): void { + // Segmentation doesn't support individual point deletion within a polygon + // If needed, delete the whole polygon + if (key === SegmentationPolygonKey && type === 'Polygon') { + this.delete(frame, track, key, type); + } + } + + /** + * Activate the segmentation recipe. + * If an initializeServiceFn was provided, it will be called first to ensure + * the segmentation service is ready. If initialization fails, the recipe will not + * activate and an error event will be emitted. + */ + activate(): void { + // If we have an initialization function and haven't initialized yet, do it now + if (this.initializeServiceFn && !this.serviceInitialized) { + // Show loading state + this.loading.value = true; + this.icon.value = 'mdi-loading'; + // Track that we're waiting for initialization + this.pendingActivation = true; + + this.initializeServiceFn() + .then(() => { + this.serviceInitialized = true; + this.loading.value = false; + // Only complete activation if we weren't deactivated during async wait + // (e.g., user switched to polygon mode while waiting) + if (this.pendingActivation) { + this.pendingActivation = false; + this.completeActivation(); + } + }) + .catch((error) => { + this.pendingActivation = false; + const errorMessage = error instanceof Error ? error.message : 'Unable to load segmentation module'; + this.bus.$emit('prediction-error', errorMessage); + this.loading.value = false; + this.icon.value = 'mdi-auto-fix'; + // Don't activate - stay in previous mode + }); + } else { + // No initialization function or already initialized - activate immediately + this.completeActivation(); + } + } + + /** + * Complete the activation after service is ready + */ + private completeActivation(): void { + this.active.value = true; + this.reset(); + this.icon.value = 'mdi-auto-fix'; + + // Emit activation event to trigger Point editing mode + this.bus.$emit('activate', { + editing: 'Point' as EditAnnotationTypes, + key: SegmentationPolygonKey, + recipeName: this.name, + }); + } + + /** + * Deactivate the segmentation recipe + */ + deactivate(): void { + this.active.value = false; + // Cancel any pending activation from async init + this.pendingActivation = false; + this.loading.value = false; + this.reset(); + this.icon.value = 'mdi-auto-fix'; + + // Emit empty points to clear the visual points layer + this.bus.$emit('points-updated', { + points: [], + labels: [], + frameNum: this.currentFrame, + }); + } + + /** + * Check if there's a pending prediction that can be confirmed (current frame or any saved frame) + */ + hasPendingPrediction(): boolean { + // Check current frame + if (this.pendingPolygon !== null && this.pendingPolygon.length > 2) { + return true; + } + // Check saved frames + return Array.from(this.frameData.values()).some( + (data) => data.polygon && data.polygon.length > 2, + ); + } + + /** + * Check if there are any points accumulated (current frame or any saved frame) + */ + hasPoints(): boolean { + // Check current frame + if (this.points.length > 0) { + return true; + } + // Check saved frames + return Array.from(this.frameData.values()).some( + (data) => data.points.length > 0, + ); + } + + /** + * Get the number of frames with pending predictions + */ + getFrameCount(): number { + // Save current frame data first + this.saveCurrentFrameData(); + + return Array.from(this.frameData.values()).filter( + (data) => data.polygon && data.polygon.length > 2, + ).length; + } + + /** + * Public method to reset (clear) all accumulated points and pending prediction. + * Called from UI Reset button. Clears all frames. + */ + resetPoints(): void { + // Emit reset event for all frames with data + const framesToReset = [this.currentFrame, ...this.frameData.keys()]; + framesToReset.forEach((frameNum) => { + this.bus.$emit('prediction-reset', { frameNum }); + }); + this.reset(); + this.icon.value = 'mdi-auto-fix'; + } + + /** + * Public method to confirm the current prediction and emit it for track update. + * Called from UI Confirm button. Confirms all frames with valid polygons. + */ + confirmPrediction(): void { + // Save current frame data to frameData map + this.saveCurrentFrameData(); + + // Collect all frames with valid polygons + const confirmedFrames: Map = new Map(); + + Array.from(this.frameData.entries()).forEach(([frameNum, data]) => { + if (data.polygon && data.polygon.length > 2) { + confirmedFrames.set(frameNum, { + polygon: data.polygon, + bounds: data.bounds, + frameNum, + controlPoints: data.points.length > 0 ? { + points: data.points, + labels: data.labels, + } : undefined, + }); + } + }); + + if (confirmedFrames.size === 0) { + return; + } + + // Emit multi-frame confirmed event + this.bus.$emit('prediction-confirmed-multi', { + frames: confirmedFrames, + } as MultiFrameSegmentationResult); + + // Also emit single-frame events for backward compatibility + // (in case only single-frame handler is registered) + Array.from(confirmedFrames.values()).forEach((result) => { + this.bus.$emit('prediction-confirmed', result); + }); + + // Reset state and deactivate + this.reset(); + this.deactivate(); + } + + /** + * Implements the Recipe interface's confirm method. + * Called when right-click is used to lock the annotation. + */ + confirm(): void { + if (this.active.value) { + if (this.hasPendingPrediction()) { + this.confirmPrediction(); + } else { + // No prediction to confirm, still deactivate the recipe + this.deactivate(); + } + } + } + + /** + * Keyboard shortcuts for segmentation recipe + */ + mousetrap(): Mousetrap[] { + return [ + { + bind: 's', + handler: () => { + if (!this.active.value) { + this.activate(); + } + }, + }, + { + bind: 'escape', + handler: () => { + if (this.active.value) { + this.resetPoints(); + } + }, + }, + { + bind: 'enter', + handler: () => { + if (this.active.value && this.hasPendingPrediction()) { + this.confirmPrediction(); + } + }, + }, + ]; + } +} diff --git a/client/dive-common/store/settings.ts b/client/dive-common/store/settings.ts index 930bc5379..13a48970a 100644 --- a/client/dive-common/store/settings.ts +++ b/client/dive-common/store/settings.ts @@ -48,6 +48,14 @@ interface AnnotationSettings { multiCamSettings: { showToolbar: boolean; }; + layoutSettings: { + sidebarPosition: 'left' | 'bottom'; + }; + stereoSettings: { + interactiveModeEnabled: boolean; + loading: boolean; + loadingMessage: string; + }; } const defaultSettings: AnnotationSettings = { @@ -106,6 +114,14 @@ const defaultSettings: AnnotationSettings = { multiCamSettings: { showToolbar: true, }, + layoutSettings: { + sidebarPosition: 'left', + }, + stereoSettings: { + interactiveModeEnabled: false, + loading: false, + loadingMessage: '', + }, }; // Utility to safely load from localStorage @@ -125,7 +141,16 @@ function loadStoredSettings(): Partial { function saveSettings() { try { if (typeof localStorage !== 'undefined') { - localStorage.setItem('Settings', JSON.stringify(clientSettings)); + // Exclude transient stereo fields from persistence + const toSave = { + ...clientSettings, + stereoSettings: { + ...clientSettings.stereoSettings, + loading: false, + loadingMessage: '', + }, + }; + localStorage.setItem('Settings', JSON.stringify(toSave)); } } catch (e) { console.warn('Failed to save settings to localStorage:', e); diff --git a/client/dive-common/use/useModeManager.ts b/client/dive-common/use/useModeManager.ts index a64eca011..2e08183d2 100644 --- a/client/dive-common/use/useModeManager.ts +++ b/client/dive-common/use/useModeManager.ts @@ -2,7 +2,7 @@ import { computed, Ref, reactive, ref, onBeforeUnmount, toRef, } from 'vue'; import { uniq, flatMapDeep, flattenDeep } from 'lodash'; -import Track, { TrackId } from 'vue-media-annotator/track'; +import Track, { TrackId, TrackSupportedFeature } from 'vue-media-annotator/track'; import { RectBounds, updateBounds, @@ -21,6 +21,10 @@ import { clientSettings } from 'dive-common/store/settings'; import GroupFilterControls from 'vue-media-annotator/GroupFilterControls'; import CameraStore from 'vue-media-annotator/CameraStore'; import { SortedAnnotation } from 'vue-media-annotator/BaseAnnotationStore'; +import SegmentationPointClick, { + SegmentationPredictionResult, + MultiFrameSegmentationResult, +} from 'dive-common/recipes/segmentationpointclick'; type SupportedFeature = GeoJSON.Feature; @@ -51,6 +55,16 @@ interface SetAnnotationStateArgs { key?: string; recipeName?: string; } + +export type StereoAnnotationCompleteParams = + | { type: 'line'; camera: string; trackId: number; frameNum: number; + line: [[number, number], [number, number]]; } + | { type: 'box'; camera: string; trackId: number; frameNum: number; + bounds: [number, number, number, number]; } + | { type: 'polygon'; camera: string; trackId: number; frameNum: number; + polygon: [number, number][]; key: string; } + | { type: 'segmentation'; camera: string; trackId: number; frameNum: number; + points: [number, number][]; labels: number[]; }; /** * The point of this composition function is to define and manage the transition betwee * different UI states within the program. States and state transitions can be modified @@ -65,6 +79,7 @@ export default function useModeManager({ aggregateController, readonlyState, recipes, + onStereoAnnotationComplete, }: { cameraStore: CameraStore; trackFilterControls: TrackFilterControls; @@ -72,6 +87,7 @@ export default function useModeManager({ aggregateController: Ref; readonlyState: Readonly>; recipes: Recipe[]; + onStereoAnnotationComplete?: (params: StereoAnnotationCompleteParams) => void; }) { let creating = false; const { prompt } = usePrompt(); @@ -126,6 +142,15 @@ export default function useModeManager({ const selectNextGroup = (delta = 1) => selectNext(_filteredGroups.value, editingGroupId.value, delta); function selectTrack(trackId: AnnotationId | null, edit = false) { + // Reset segmentation recipe state when switching to a different track + // so stale points/mask from the previous detection don't interfere + if (trackId !== selectedTrackId.value) { + recipes.forEach((r) => { + if (r instanceof SegmentationPointClick && r.active.value) { + r.resetPoints(); + } + }); + } selectedTrackId.value = trackId; if (edit && readonlyState.value) { prompt({ title: 'Read Only Mode', text: 'This Dataset is in Read Only mode, no edits can be made.' }); @@ -157,7 +182,12 @@ export default function useModeManager({ } if (annotationModes.editing === 'rectangle') { return 'Editing'; } - return (feature.geometry?.features.filter((item) => item.geometry.type === annotationModes.editing).length ? 'Editing' : 'Creating'); + // Check if there's a geometry matching both the type AND the selectedKey + const matchingGeometry = feature.geometry?.features.filter( + (item) => item.geometry.type === annotationModes.editing + && item.properties?.key === selectedKey.value, + ); + return (matchingGeometry?.length ? 'Editing' : 'Creating'); } return 'Creating'; } @@ -436,6 +466,48 @@ export default function useModeManager({ track.setFeatureAttribute(frameNum, 'userModified', true); } newTrackSettingsAfterLogic(track); + + // Stereo: emit box annotation complete + if (onStereoAnnotationComplete && clientSettings.stereoSettings.interactiveModeEnabled) { + onStereoAnnotationComplete({ + type: 'box', + camera: selectedCamera.value, + trackId: selectedTrackId.value as number, + frameNum, + bounds: bounds as [number, number, number, number], + }); + } + } + } + } + + /** + * Set a feature on a track with proper interpolation handling. + * This is used by segmentation and other modes that need to set features + * while respecting track settings and interpolation logic. + */ + function handleSetTrackFeature( + frameNum: number, + bounds: RectBounds, + geometry: GeoJSON.Feature[], + runAfterLogic: boolean = true, + ) { + if (selectedTrackId.value !== null) { + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (track) { + const { interpolate } = track.canInterpolate(frameNum); + + track.setFeature({ + frame: frameNum, + flick: 0, + bounds, + keyframe: true, + interpolate: _shouldInterpolate(interpolate), + }, geometry); + + if (runAfterLogic) { + newTrackSettingsAfterLogic(track); + } } } } @@ -514,16 +586,18 @@ export default function useModeManager({ // If a drawable changed, but we aren't changing modes // prevent an interrupt within EditAnnotationLayer + // Use === undefined to distinguish "no key change" from "change to empty key" if ( somethingChanged - && !update.newSelectedKey + && update.newSelectedKey === undefined && !update.newType && preventInterrupt ) { preventInterrupt(); } else { // Otherwise, one of these state changes will trigger an interrupt. - if (update.newSelectedKey) { + // Use !== undefined to allow setting key to empty string + if (update.newSelectedKey !== undefined) { selectedKey.value = update.newSelectedKey; } if (update.newType) { @@ -562,6 +636,44 @@ export default function useModeManager({ // Or none of the recieps reported that they were unfinished. if (eventType === 'editing' || update.done.every((v) => v !== false)) { newTrackSettingsAfterLogic(track); + + // Stereo: emit line or polygon annotation complete + if (onStereoAnnotationComplete && clientSettings.stereoSettings.interactiveModeEnabled + && selectedTrackId.value !== null) { + // Check for LineString with exactly 2 points (line annotation) + if (data.geometry.type === 'LineString' + && data.geometry.coordinates.length === 2) { + const coords = data.geometry.coordinates as [number, number][]; + onStereoAnnotationComplete({ + type: 'line', + camera: selectedCamera.value, + trackId: selectedTrackId.value as number, + frameNum, + line: [coords[0], coords[1]], + }); + } + // Check for completed Polygon (done=true from recipes) + if (update.done.some((v) => v === true)) { + // Look for polygon geometry in the update record + Object.entries(update.geoJsonFeatureRecord).forEach(([geoKey, features]) => { + features.forEach((feat) => { + if (feat.geometry.type === 'Polygon' + && feat.geometry.coordinates[0] + && feat.geometry.coordinates[0].length >= 3) { + const polyCoords = feat.geometry.coordinates[0] as [number, number][]; + onStereoAnnotationComplete({ + type: 'polygon', + camera: selectedCamera.value, + trackId: selectedTrackId.value as number, + frameNum, + polygon: polyCoords, + key: geoKey, + }); + } + }); + }); + } + } } } } else { @@ -600,11 +712,32 @@ export default function useModeManager({ const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); if (track) { const { frame } = aggregateController.value; + const frameNum = frame.value; recipes.forEach((r) => { if (r.active.value) { - r.delete(frame.value, track, selectedKey.value, annotationModes.editing); + r.delete(frameNum, track, selectedKey.value, annotationModes.editing); } }); + + // After deleting a polygon, recalculate bounds from remaining polygons + if (annotationModes.editing === 'Polygon') { + const remainingPolygons = track.getPolygonFeatures(frameNum); + if (remainingPolygons.length > 0) { + // Recalculate bounds from remaining polygons + const polygonGeometries = remainingPolygons.map((p) => p.geometry); + const newBounds = updateBounds(undefined, [], polygonGeometries); + + // Get current feature and update with new bounds + const [currentFeature] = track.getFeature(frameNum); + if (currentFeature && newBounds) { + track.setFeature({ + ...currentFeature, + bounds: newBounds, + }); + } + } + } + _nudgeEditingCanary(); } } @@ -692,6 +825,20 @@ export default function useModeManager({ if (track) { seekNearest(track); const editing = trackId === selectedTrackId.value ? (!editingTrack.value) : true; + // When in LineString mode, set the selected key so EditAnnotationLayer + // can find the geometry (lines are stored with a recipe key like 'HeadTails'). + if (editing) { + const { frame } = aggregateController.value; + const [feature] = track.getFeature(frame.value); + if (feature?.geometry?.features?.length) { + if (annotationModes.editing === 'LineString') { + const lineFeature = feature.geometry.features.find( + (f) => f.geometry.type === 'LineString', + ); + selectedKey.value = lineFeature?.properties?.key || ''; + } + } + } handleSelectTrack(trackId, editing); } else if (cameraStore.getAnyTrack(trackId) !== undefined) { //track exists in other cameras we create in the current map using override @@ -738,6 +885,28 @@ export default function useModeManager({ } } + /** + * Confirm the current annotation for any active recipe that supports it. + * Called when right-click is used in Point mode to lock the annotation. + */ + function handleConfirmRecipe() { + const activeSegRecipes: SegmentationPointClick[] = []; + recipes.forEach((r) => { + if (r.active.value && r.confirm) { + if (r instanceof SegmentationPointClick) { + activeSegRecipes.push(r); + } + r.confirm(); + } + }); + // Clear saved state - the confirmed polygons are now permanent + preSegmentationFeatures.clear(); + // Exit editing mode and deselect to unhighlight the track + selectTrack(null, false); + // Re-activate segmentation recipe so it's ready for the next detection + activeSegRecipes.forEach((r) => r.activate()); + } + /** * Merge: Enabled whenever there are candidates in the merge list */ @@ -832,11 +1001,307 @@ export default function useModeManager({ handleGroupEdit(previousOrNext); } + /** + * Save original track feature state before segmentation prediction modifies it. + * Used by handleSegmentationReset to restore the detection to its pre-segmentation state. + */ + const preSegmentationFeatures = new Map(); + + /** + * Segmentation prompt points for visualization (green=foreground, red=background) + */ + const segmentationPoints: Ref<{ points: [number, number][]; labels: number[]; frameNum: number }> = ref({ + points: [], + labels: [], + frameNum: -1, + }); + + /** + * Handle segmentation points update - update visual display of prompt points + */ + function handleSegmentationPointsUpdated(data: { points: [number, number][]; labels: number[]; frameNum: number }) { + segmentationPoints.value = { + points: [...data.points], + labels: [...data.labels], + frameNum: data.frameNum, + }; + } + + /** + * Handle segmentation prediction ready - update visual display with pending polygon/mask. + * This is called when the segmentation model returns a prediction. + * During editing, we show the polygon preview but don't commit it yet. + */ + function handleSegmentationPredictionReady(result: SegmentationPredictionResult) { + if (selectedTrackId.value === null) { + return; + } + + const { frame } = aggregateController.value; + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (!track) { + return; + } + + // Create polygon geometry from prediction result + if (result.polygon && result.polygon.length >= 3) { + const bounds = result.bounds || [ + Math.min(...result.polygon.map((p) => p[0])), + Math.min(...result.polygon.map((p) => p[1])), + Math.max(...result.polygon.map((p) => p[0])), + Math.max(...result.polygon.map((p) => p[1])), + ] as [number, number, number, number]; + + // Close polygon if not already closed + const closedPolygon = [...result.polygon]; + const first = closedPolygon[0]; + const last = closedPolygon[closedPolygon.length - 1]; + if (first[0] !== last[0] || first[1] !== last[1]) { + closedPolygon.push([...first] as [number, number]); + } + + const polygonGeometry: GeoJSON.Feature[] = [{ + type: 'Feature', + geometry: { + type: 'Polygon', + coordinates: [closedPolygon], + }, + properties: { key: '' }, + }]; + + // Update the track's feature with the preview polygon + // Use frame number from the result if provided, otherwise current frame + const targetFrame = result.frameNum ?? frame.value; + const { interpolate } = track.canInterpolate(targetFrame); + + // Save original feature state before first prediction modifies the track + if (!preSegmentationFeatures.has(targetFrame)) { + const [existingFeature] = track.getFeature(targetFrame); + if (existingFeature) { + preSegmentationFeatures.set(targetFrame, { + hadFeature: true, + bounds: existingFeature.bounds + ? [...existingFeature.bounds] as RectBounds : undefined, + interpolate: existingFeature.interpolate, + geometryFeatures: existingFeature.geometry?.features + ? JSON.parse(JSON.stringify(existingFeature.geometry.features)) + : undefined, + }); + } else { + preSegmentationFeatures.set(targetFrame, { hadFeature: false }); + } + } + + track.setFeature({ + frame: targetFrame, + flick: 0, + bounds, + keyframe: true, + interpolate, + }, polygonGeometry); + + _nudgeEditingCanary(); + } + } + + /** + * Handle segmentation prediction confirmed - commit the polygon to the track. + * This is called when the user confirms the segmentation (right-click or Enter). + */ + function handleSegmentationPredictionConfirmed(result: SegmentationPredictionResult) { + handleSegmentationPredictionReady(result); + } + + /** + * Handle multi-frame segmentation confirmation. + * Commits polygons for all frames that have valid predictions. + */ + function handleSegmentationConfirmedMulti(result: MultiFrameSegmentationResult) { + if (selectedTrackId.value === null) { + return; + } + + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (!track) { + return; + } + + // Apply each frame's prediction to the track + result.frames.forEach((frameResult, frameNum) => { + if (frameResult.polygon && frameResult.polygon.length >= 3) { + const bounds = frameResult.bounds || [ + Math.min(...frameResult.polygon.map((p) => p[0])), + Math.min(...frameResult.polygon.map((p) => p[1])), + Math.max(...frameResult.polygon.map((p) => p[0])), + Math.max(...frameResult.polygon.map((p) => p[1])), + ] as [number, number, number, number]; + + // Close polygon if not already closed + const closedPolygon = [...frameResult.polygon]; + const first = closedPolygon[0]; + const last = closedPolygon[closedPolygon.length - 1]; + if (first[0] !== last[0] || first[1] !== last[1]) { + closedPolygon.push([...first] as [number, number]); + } + + const polygonGeometry: GeoJSON.Feature[] = [{ + type: 'Feature', + geometry: { + type: 'Polygon', + coordinates: [closedPolygon], + }, + properties: { key: '' }, + }]; + + const { interpolate } = track.canInterpolate(frameNum); + + track.setFeature({ + frame: frameNum, + flick: 0, + bounds, + keyframe: true, + interpolate, + }, polygonGeometry); + + // Stereo: emit segmentation annotation complete for each frame + if (onStereoAnnotationComplete && clientSettings.stereoSettings.interactiveModeEnabled + && selectedTrackId.value !== null && frameResult.controlPoints) { + onStereoAnnotationComplete({ + type: 'segmentation', + camera: selectedCamera.value, + trackId: selectedTrackId.value as number, + frameNum, + points: frameResult.controlPoints.points, + labels: frameResult.controlPoints.labels, + }); + } + } + }); + + _nudgeEditingCanary(); + } + + /** + * Handle segmentation prediction error - show error dialog to user + */ + function handleSegmentationPredictionError(errorMessage: string) { + prompt({ + title: 'Segmentation Error', + text: [errorMessage], + }); + } + + /** + * Handle segmentation reset - restore detection to its pre-segmentation state. + * Called when the user presses the Reset button, which triggers resetPoints() + * on the recipe, which emits 'prediction-reset' for each frame. + */ + function handleSegmentationReset(data: { frameNum: number }) { + if (selectedTrackId.value === null) return; + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (!track) return; + + const saved = preSegmentationFeatures.get(data.frameNum); + if (!saved) return; + + if (!saved.hadFeature) { + track.deleteFeature(data.frameNum); + } else { + // Remove the segmentation-derived polygon + track.removeFeatureGeometry(data.frameNum, { key: '', type: 'Polygon' }); + // Find original default polygon if there was one + const origDefaultPolygon = saved.geometryFeatures?.find( + (f) => f.geometry.type === 'Polygon' && (f.properties?.key ?? '') === '', + ); + // Restore original bounds and geometry + track.setFeature({ + frame: data.frameNum, + flick: 0, + bounds: saved.bounds, + keyframe: true, + interpolate: saved.interpolate ?? false, + }, origDefaultPolygon + ? [origDefaultPolygon as GeoJSON.Feature] + : []); + } + + preSegmentationFeatures.delete(data.frameNum); + _nudgeEditingCanary(); + } + + /** + * Set up polygon recipe for adding a hole to an existing polygon. + * The recipe emits an activate event that triggers creation mode. + */ + function handleAddHole() { + if (selectedTrackId.value === null) return; + + const polygonRecipe = recipes.find((r) => r.name === 'PolygonBase'); + if (polygonRecipe && 'setAddingHole' in polygonRecipe) { + (polygonRecipe as { setAddingHole: () => void }).setAddingHole(); + } + } + + /** + * Set up polygon recipe for adding a new separate polygon. + */ + function handleAddPolygon() { + if (selectedTrackId.value === null) return; + + const polygonRecipe = recipes.find((r) => r.name === 'PolygonBase'); + if (polygonRecipe && 'setAddingPolygon' in polygonRecipe) { + const track = cameraStore.getPossibleTrack(selectedTrackId.value, selectedCamera.value); + if (track) { + const { frame } = aggregateController.value; + const newKey = track.getNextPolygonKey(frame.value); + (polygonRecipe as { setAddingPolygon: (key: string) => void }).setAddingPolygon(newKey); + } + } + } + + /** + * Cancel any in-progress creation mode (hole or polygon addition). + */ + function handleCancelCreation() { + const polygonRecipe = recipes.find((r) => r.name === 'PolygonBase'); + if (polygonRecipe && 'resetAddingMode' in polygonRecipe) { + (polygonRecipe as { resetAddingMode: () => void }).resetAddingMode(); + } + } + /* Subscribe to recipe activation events */ recipes.forEach((r) => r.bus.$on('activate', handleSetAnnotationState)); + + /* Subscribe to segmentation recipe events */ + recipes.forEach((r) => { + if (r instanceof SegmentationPointClick) { + r.bus.$on('points-updated', handleSegmentationPointsUpdated); + r.bus.$on('prediction-ready', handleSegmentationPredictionReady); + r.bus.$on('prediction-confirmed', handleSegmentationPredictionConfirmed); + r.bus.$on('prediction-confirmed-multi', handleSegmentationConfirmedMulti); + r.bus.$on('prediction-error', handleSegmentationPredictionError); + r.bus.$on('prediction-reset', handleSegmentationReset); + } + }); + /* Unsubscribe before unmount */ onBeforeUnmount(() => { recipes.forEach((r) => r.bus.$off('activate', handleSetAnnotationState)); + recipes.forEach((r) => { + if (r instanceof SegmentationPointClick) { + r.bus.$off('points-updated', handleSegmentationPointsUpdated); + r.bus.$off('prediction-ready', handleSegmentationPredictionReady); + r.bus.$off('prediction-confirmed', handleSegmentationPredictionConfirmed); + r.bus.$off('prediction-confirmed-multi', handleSegmentationConfirmedMulti); + r.bus.$off('prediction-error', handleSegmentationPredictionError); + r.bus.$off('prediction-reset', handleSegmentationReset); + } + }); }); return { @@ -856,8 +1321,10 @@ export default function useModeManager({ selectedKey, selectedCamera, selectNextTrack, + segmentationPoints, handler: { commitMerge: handleCommitMerge, + confirmRecipe: handleConfirmRecipe, groupAdd: handleAddGroup, deleteSelectedTracks: handleDeleteSelectedTracks, groupEdit: handleGroupEdit, @@ -868,6 +1335,7 @@ export default function useModeManager({ trackSeek: handleTrackClick, trackSelect: handleSelectTrack, trackSelectNext: handleSelectNext, + setTrackFeature: handleSetTrackFeature, updateRectBounds: handleUpdateRectBounds, updateGeoJSON: handleUpdateGeoJSON, removeTrack: handleRemoveTrack, @@ -880,6 +1348,9 @@ export default function useModeManager({ startLinking: handleStartLinking, stopLinking: handleStopLinking, seekFrame, + addHole: handleAddHole, + addPolygon: handleAddPolygon, + cancelCreation: handleCancelCreation, }, }; } diff --git a/client/platform/desktop/backend/ipcService.ts b/client/platform/desktop/backend/ipcService.ts index ec878fe59..a89b27baf 100644 --- a/client/platform/desktop/backend/ipcService.ts +++ b/client/platform/desktop/backend/ipcService.ts @@ -24,6 +24,19 @@ import * as common from './native/common'; import beginMultiCamImport from './native/multiCamImport'; import settings from './state/settings'; import { listen } from './server'; +import { + getSegmentationServiceManager, + shutdownSegmentationService, + SegmentationPredictRequest, +} from './native/segmentation'; +import { + getStereoServiceManager, + shutdownStereoService, + StereoCalibration, + StereoSetFrameRequest, + StereoTransferLineRequest, + StereoTransferPointsRequest, +} from './native/stereo'; // defaults to linux if win32 doesn't exist const currentPlatform = OS.platform() === 'win32' ? win32 : linux; @@ -228,4 +241,164 @@ export default function register() { }; return currentPlatform.train(settings.get(), args, updater); }); + + /** + * Interactive Segmentation Service + */ + + ipcMain.handle('segmentation-initialize', async () => { + const segService = getSegmentationServiceManager(); + await segService.initialize(settings.get()); + return { success: true }; + }); + + ipcMain.handle('segmentation-predict', async (_, args: SegmentationPredictRequest) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.predict(args); + return response; + }); + + ipcMain.handle('segmentation-set-image', async (_, imagePath: string) => { + const segService = getSegmentationServiceManager(); + + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + await segService.setImage(imagePath); + return { success: true }; + }); + + ipcMain.handle('segmentation-clear-image', async () => { + const segService = getSegmentationServiceManager(); + + if (segService.isReady()) { + await segService.clearImage(); + } + return { success: true }; + }); + + ipcMain.handle('segmentation-shutdown', async () => { + await shutdownSegmentationService(); + return { success: true }; + }); + + ipcMain.handle('segmentation-is-ready', () => { + const segService = getSegmentationServiceManager(); + return { ready: segService.isReady() }; + }); + + ipcMain.handle('segmentation-text-query', async (_, args: { + imagePath: string; + text: string; + boxThreshold?: number; + maxDetections?: number; + boxes?: [number, number, number, number][]; + points?: [number, number][]; + pointLabels?: number[]; + }) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.textQuery(args); + return response; + }); + + ipcMain.handle('segmentation-refine', async (_, args: { + imagePath: string; + detections: { + box: [number, number, number, number]; + polygon?: [number, number][]; + score: number; + label: string; + }[]; + points?: [number, number][]; + pointLabels?: number[]; + refineMasks?: boolean; + }) => { + const segService = getSegmentationServiceManager(); + + // Auto-initialize if not ready + if (!segService.isReady()) { + await segService.initialize(settings.get()); + } + + const response = await segService.refineDetections(args); + return response; + }); + + /** + * Interactive Stereo Service + */ + + ipcMain.handle('stereo-enable', async (event, args?: { calibration?: StereoCalibration }) => { + const stereoService = getStereoServiceManager(); + + // Forward async disparity events to the renderer + stereoService.on('disparity_ready', (data) => { + event.sender.send('stereo-disparity-ready', data); + }); + stereoService.on('disparity_error', (data) => { + event.sender.send('stereo-disparity-error', data); + }); + + const result = await stereoService.enable(settings.get(), args?.calibration); + return result; + }); + + ipcMain.handle('stereo-disable', async () => { + const stereoService = getStereoServiceManager(); + const result = await stereoService.disable(); + return result; + }); + + ipcMain.handle('stereo-set-frame', async (_, args: StereoSetFrameRequest) => { + const stereoService = getStereoServiceManager(); + const result = await stereoService.setFrame(args); + return result; + }); + + ipcMain.handle('stereo-get-status', async () => { + const stereoService = getStereoServiceManager(); + const result = await stereoService.getStatus(); + return result; + }); + + ipcMain.handle('stereo-transfer-line', async (_, args: StereoTransferLineRequest) => { + const stereoService = getStereoServiceManager(); + const result = await stereoService.transferLine(args); + return result; + }); + + ipcMain.handle('stereo-transfer-points', async (_, args: StereoTransferPointsRequest) => { + const stereoService = getStereoServiceManager(); + const result = await stereoService.transferPoints(args); + return result; + }); + + ipcMain.handle('stereo-set-calibration', async (_, args: { calibration: StereoCalibration }) => { + const stereoService = getStereoServiceManager(); + await stereoService.setCalibration(args.calibration); + return { success: true }; + }); + + ipcMain.handle('stereo-shutdown', async () => { + await shutdownStereoService(); + return { success: true }; + }); + + ipcMain.handle('stereo-is-enabled', () => { + const stereoService = getStereoServiceManager(); + return { enabled: stereoService.isEnabled() }; + }); } diff --git a/client/platform/desktop/backend/native/segmentation.ts b/client/platform/desktop/backend/native/segmentation.ts new file mode 100644 index 000000000..f171e7134 --- /dev/null +++ b/client/platform/desktop/backend/native/segmentation.ts @@ -0,0 +1,533 @@ +/** + * Interactive Segmentation Service Manager for Desktop + * + * Manages a persistent Python subprocess that keeps the segmentation model loaded in memory + * for fast interactive segmentation from point clicks. + */ + +import { spawn, ChildProcess } from 'child_process'; +import npath from 'path'; +import readline from 'readline'; +import { EventEmitter } from 'events'; +import { Settings } from 'platform/desktop/constants'; +import { observeChild } from './processManager'; + +/** Error message shown to users when segmentation fails to load */ +export const SEGMENTATION_LOAD_ERROR_MESSAGE = 'Unable to load segmentation module'; + +/** Request to the segmentation service */ +export interface SegmentationInternalPredictRequest { + /** Unique request ID for correlation */ + id: string; + /** Path to the image file */ + imagePath: string; + /** Point coordinates as [x, y] pairs */ + points: [number, number][]; + /** Point labels: 1 for foreground, 0 for background */ + pointLabels: number[]; + /** Optional low-res mask from previous prediction for refinement */ + maskInput?: number[][]; + /** Whether to return multiple mask options */ + multimaskOutput?: boolean; +} + +/** Response from the segmentation service */ +export interface SegmentationInternalPredictResponse { + /** Request ID for correlation */ + id: string; + /** Whether the prediction succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Polygon coordinates as [x, y] pairs */ + polygon?: [number, number][]; + /** Bounding box [x_min, y_min, x_max, y_max] */ + bounds?: [number, number, number, number]; + /** Quality score from segmentation model */ + score?: number; + /** Low-res mask for subsequent refinement */ + lowResMask?: number[][]; + /** Mask dimensions [height, width] */ + maskShape?: [number, number]; +} + +interface PendingRequest { + resolve: (response: SegmentationInternalPredictResponse) => void; + reject: (error: Error) => void; + timeout: NodeJS.Timeout; +} + +/** + * Segmentation Service Manager + * + * Manages a persistent Python subprocess for interactive segmentation inference. + * The service is started on-demand and kept alive for the session. + */ +export class SegmentationServiceManager extends EventEmitter { + private process: ChildProcess | null = null; + + private readline: readline.Interface | null = null; + + private pendingRequests: Map = new Map(); + + private isInitializing = false; + + private initPromise: Promise | null = null; + + private settings: Settings | null = null; + + private requestCounter = 0; + + private readonly requestTimeoutMs = 30000; // 30 second timeout + + /** + * Initialize the segmentation service with the given settings. + * This spawns the Python process and loads the segmentation model. + * The model stays loaded for the entire session to avoid reload delays. + */ + async initialize(settings: Settings): Promise { + // If already initialized and running, return immediately + // This keeps the model loaded between activations + if (this.isReady()) { + console.log('[Segmentation] Service already running, skipping initialization'); + return undefined; + } + + // If currently initializing, wait for it + if (this.isInitializing && this.initPromise) { + await this.initPromise; + return undefined; + } + + this.isInitializing = true; + this.settings = settings; + + this.initPromise = this._doInitialize(settings); + try { + await this.initPromise; + } finally { + this.isInitializing = false; + } + return undefined; + } + + private async _doInitialize(settings: Settings): Promise { + // Clean up any existing process + await this.shutdown(); + + return new Promise((resolve, reject) => { + const viameSetup = npath.join(settings.viamePath, 'setup_viame.sh'); + + const configPath = npath.join(settings.viamePath, 'configs', 'pipelines', 'interactive_segmenter_default.conf'); + + // Build the command to run the interactive segmentation service + const command = [ + `. "${viameSetup}"`, + '&&', + 'python -m viame.core.interactive_segmentation', + `--config "${configPath}"`, + ].join(' '); + + console.log('[Segmentation] Starting interactive segmentation service...'); + console.log(`[Segmentation] Command: ${command}`); + + this.process = observeChild(spawn(command, { + shell: '/bin/bash', + stdio: ['pipe', 'pipe', 'pipe'], + })); + + // Set up readline for stdout (JSON responses) + if (this.process.stdout) { + this.readline = readline.createInterface({ + input: this.process.stdout, + crlfDelay: Infinity, + }); + + this.readline.on('line', (line) => { + this.handleResponse(line); + }); + } + + // Log stderr (diagnostic messages) + if (this.process.stderr) { + this.process.stderr.on('data', (data: Buffer) => { + const message = data.toString().trim(); + if (message) { + console.log(`[Segmentation] ${message}`); + // Detect successful initialization + if (message.includes('model initialized successfully')) { + resolve(); + } + } + }); + } + + // Handle process exit + this.process.on('exit', (code, signal) => { + console.log(`[Segmentation] Process exited with code ${code}, signal ${signal}`); + this.cleanup(); + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }); + + this.process.on('error', (err) => { + console.error('[Segmentation] Process error:', err); + this.cleanup(); + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }); + + // Timeout for initialization (60 seconds for model loading) + setTimeout(() => { + if (this.isInitializing) { + reject(new Error(SEGMENTATION_LOAD_ERROR_MESSAGE)); + } + }, 60000); + }); + } + + /** + * Check if the service is ready for requests + */ + isReady(): boolean { + return this.process !== null && this.process.exitCode === null; + } + + /** + * Generate a unique request ID + */ + private generateRequestId(): string { + this.requestCounter += 1; + return `req_${Date.now()}_${this.requestCounter}`; + } + + /** + * Handle a response line from the segmentation service + */ + private handleResponse(line: string): void { + try { + const response = JSON.parse(line) as SegmentationInternalPredictResponse; + const pending = this.pendingRequests.get(response.id); + + if (pending) { + clearTimeout(pending.timeout); + this.pendingRequests.delete(response.id); + pending.resolve(response); + } else { + console.warn(`[Segmentation] Received response for unknown request: ${response.id}`); + } + } catch (err) { + console.error('[Segmentation] Failed to parse response:', line, err); + } + } + + /** + * Send a predict request to the segmentation service + */ + async predict(request: Omit): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + if (!request.imagePath) { + throw new Error('imagePath is required for segmentation prediction'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'predict', + image_path: request.imagePath, + points: request.points, + point_labels: request.pointLabels, + mask_input: request.maskInput, + multimask_output: request.multimaskOutput ?? false, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Segmentation predict request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + // Send the request as JSON line + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Pre-load an image for multiple predictions (optional optimization) + */ + async setImage(imagePath: string): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready'); + } + + const id = this.generateRequestId(); + const request = { + id, + command: 'set_image', + image_path: imagePath, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('set_image request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Clear the cached image + */ + async clearImage(): Promise { + if (!this.isReady()) { + return undefined; // Nothing to clear + } + + const id = this.generateRequestId(); + const request = { + id, + command: 'clear_image', + }; + + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('clear_image request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + return undefined; + } + + /** + * Send a text query request for open-vocabulary detection/segmentation + */ + async textQuery(request: { + imagePath: string; + text: string; + boxThreshold?: number; + maxDetections?: number; + boxes?: [number, number, number, number][]; + points?: [number, number][]; + pointLabels?: number[]; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + }): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'text_query', + image_path: request.imagePath, + text: request.text, + box_threshold: request.boxThreshold ?? 0.3, + max_detections: request.maxDetections ?? 10, + boxes: request.boxes, + points: request.points, + point_labels: request.pointLabels, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Text query request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Refine existing detections with additional prompts + */ + async refineDetections(request: { + imagePath: string; + detections: { + box: [number, number, number, number]; + polygon?: [number, number][]; + score: number; + label: string; + }[]; + points?: [number, number][]; + pointLabels?: number[]; + refineMasks?: boolean; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + }): Promise { + if (!this.isReady()) { + throw new Error('Segmentation service is not ready. Call initialize() first.'); + } + + if (!this.process?.stdin) { + throw new Error('Segmentation service stdin is not available'); + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'refine', + image_path: request.imagePath, + detections: request.detections, + points: request.points, + point_labels: request.pointLabels, + refine_masks: request.refineMasks ?? true, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error(`Refine request timed out after ${this.requestTimeoutMs}ms`)); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { resolve, reject, timeout }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine, (err) => { + if (err) { + clearTimeout(timeout); + this.pendingRequests.delete(id); + reject(err); + } + }); + }); + } + + /** + * Clean up internal state after process exits + */ + private cleanup(): void { + // Reject all pending requests + this.pendingRequests.forEach((pending) => { + clearTimeout(pending.timeout); + pending.reject(new Error('Segmentation service terminated')); + }); + this.pendingRequests.clear(); + + if (this.readline) { + this.readline.close(); + this.readline = null; + } + + this.process = null; + this.emit('shutdown'); + } + + /** + * Gracefully shutdown the segmentation service + */ + async shutdown(): Promise { + if (!this.process) { + return undefined; + } + + console.log('[Segmentation] Shutting down segmentation service...'); + + await new Promise((resolve) => { + // Send shutdown command + const reqId = this.generateRequestId(); + const request = { id: reqId, command: 'shutdown' }; + + if (this.process?.stdin?.writable) { + this.process.stdin.write(`${JSON.stringify(request)}\n`); + } + + // Wait for process to exit or timeout + const timeoutId = setTimeout(() => { + if (this.process) { + console.log('[Segmentation] Force killing segmentation service...'); + this.process.kill('SIGTERM'); + } + this.cleanup(); + resolve(); + }, 5000); + + if (this.process) { + this.process.once('exit', () => { + clearTimeout(timeoutId); + this.cleanup(); + resolve(); + }); + } else { + clearTimeout(timeoutId); + resolve(); + } + }); + return undefined; + } +} + +// Singleton instance +let serviceManager: SegmentationServiceManager | null = null; + +/** + * Get the segmentation service manager singleton + */ +export function getSegmentationServiceManager(): SegmentationServiceManager { + if (!serviceManager) { + serviceManager = new SegmentationServiceManager(); + } + return serviceManager; +} + +/** + * Shutdown the segmentation service (call on app close) + */ +export async function shutdownSegmentationService(): Promise { + if (serviceManager) { + await serviceManager.shutdown(); + serviceManager = null; + } +} + +// Export type aliases for generic naming +export type SegmentationPredictRequest = Omit; +export type SegmentationPredictResponse = SegmentationInternalPredictResponse; diff --git a/client/platform/desktop/backend/native/stereo.ts b/client/platform/desktop/backend/native/stereo.ts new file mode 100644 index 000000000..95b0d8bdd --- /dev/null +++ b/client/platform/desktop/backend/native/stereo.ts @@ -0,0 +1,666 @@ +/** + * Interactive Stereo Service Manager for Desktop + * + * Manages a persistent Python subprocess that keeps the interactive stereo model loaded + * for fast interactive stereo annotation. When enabled, the service proactively computes + * disparity maps when the user navigates to new frames, so annotation transfers are instant. + */ + +import { spawn, ChildProcess } from 'child_process'; +import npath from 'path'; +import readline from 'readline'; +import { EventEmitter } from 'events'; +import { Settings } from 'platform/desktop/constants'; +import { observeChild } from './processManager'; + +/** Error message shown to users when stereo service fails to load */ +export const STEREO_LOAD_ERROR_MESSAGE = 'Unable to load stereo service'; + +/** Calibration data for stereo depth computation */ +export interface StereoCalibration { + fx_left: number; + fy_left?: number; + cx_left: number; + cy_left: number; + T: [number, number, number]; +} + +/** Request to set the current stereo frame */ +export interface StereoSetFrameRequest { + leftImagePath: string; + rightImagePath: string; +} + +/** Response from set frame request */ +export interface StereoSetFrameResponse { + id: string; + success: boolean; + error?: string; + disparityReady: boolean; + message?: string; +} + +/** Request to transfer a line from left to right image */ +export interface StereoTransferLineRequest { + line: [[number, number], [number, number]]; +} + +/** Response from transfer line request */ +export interface StereoTransferLineResponse { + id: string; + success: boolean; + error?: string; + transferredLine?: [[number, number], [number, number]]; + originalLine?: [[number, number], [number, number]]; + depthInfo?: { + depthPoint1: number | null; + depthPoint2: number | null; + disparityPoint1: number; + disparityPoint2: number; + }; +} + +/** Request to transfer multiple points */ +export interface StereoTransferPointsRequest { + points: [number, number][]; +} + +/** Response from transfer points request */ +export interface StereoTransferPointsResponse { + id: string; + success: boolean; + error?: string; + transferredPoints?: [number, number][]; + originalPoints?: [number, number][]; + disparityValues?: number[]; +} + +/** Status response from the stereo service */ +export interface StereoStatusResponse { + id: string; + success: boolean; + enabled: boolean; + disparityReady: boolean; + computing?: boolean; + currentLeftPath?: string; + currentRightPath?: string; + hasCalibration: boolean; +} + +// Generic response type for internal use +interface StereoResponse { + id: string; + success: boolean; + error?: string; + message?: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + [key: string]: any; +} + +interface PendingRequest { + resolve: (response: StereoResponse) => void; + reject: (error: Error) => void; + timeout: NodeJS.Timeout; +} + +/** + * Stereo Service Manager + * + * Manages a persistent Python subprocess for interactive stereo disparity computation. + * The service is started when enabled and kept alive until disabled. + */ +export class StereoServiceManager extends EventEmitter { + private process: ChildProcess | null = null; + + private readline: readline.Interface | null = null; + + private pendingRequests: Map = new Map(); + + private isInitializing = false; + + private initPromise: Promise | null = null; + + private settings: Settings | null = null; + + private requestCounter = 0; + + private enabled = false; + + private readonly requestTimeoutMs = 60000; // 60 second timeout (disparity can take time) + + /** + * Enable the stereo service with the given settings and calibration. + * This spawns the Python process and loads the interactive stereo model. + */ + async enable(settings: Settings, calibration?: StereoCalibration): Promise<{ success: boolean; error?: string }> { + // If already enabled and running, just update calibration if provided + if (this.enabled && this.isReady()) { + console.log('[Stereo] Service already running'); + if (calibration) { + await this.setCalibration(calibration); + } + return { success: true }; + } + + // If currently initializing, wait for it + if (this.isInitializing && this.initPromise) { + await this.initPromise; + return { success: true }; + } + + this.isInitializing = true; + this.settings = settings; + + try { + this.initPromise = this._doInitialize(settings, calibration); + await this.initPromise; + this.enabled = true; + return { success: true }; + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + return { success: false, error: errorMessage }; + } finally { + this.isInitializing = false; + } + } + + private async _doInitialize(settings: Settings, calibration?: StereoCalibration): Promise { + // Clean up any existing process + await this.shutdown(); + + return new Promise((resolve, reject) => { + const viameSetup = npath.join(settings.viamePath, 'setup_viame.sh'); + const configPath = npath.join(settings.viamePath, 'configs', 'pipelines', 'interactive_stereo_default.conf'); + + // Build the command to run the interactive stereo service + const command = [ + `. "${viameSetup}"`, + '&&', + 'python -m viame.core.interactive_stereo', + `--config "${configPath}"`, + ].join(' '); + + console.log('[Stereo] Starting interactive stereo service...'); + console.log(`[Stereo] Command: ${command}`); + + this.process = observeChild(spawn(command, { + shell: '/bin/bash', + stdio: ['pipe', 'pipe', 'pipe'], + })); + + // Set up readline for stdout (JSON responses) + if (this.process.stdout) { + this.readline = readline.createInterface({ + input: this.process.stdout, + crlfDelay: Infinity, + }); + + this.readline.on('line', (line) => { + this.handleResponse(line); + }); + } + + let initialized = false; + + // Log stderr (diagnostic messages) + if (this.process.stderr) { + this.process.stderr.on('data', (data: Buffer) => { + const message = data.toString().trim(); + if (message) { + console.log(`[Stereo] ${message}`); + // Detect successful startup (service is waiting for requests) + if (message.includes('Service started, waiting for requests')) { + // Now send the enable command with calibration + this.sendEnableCommand(calibration) + .then(() => { + initialized = true; + resolve(); + }) + .catch(reject); + } + } + }); + } + + // Handle process exit + this.process.on('exit', (code, signal) => { + console.log(`[Stereo] Process exited with code ${code}, signal ${signal}`); + this.cleanup(); + if (this.isInitializing && !initialized) { + reject(new Error(STEREO_LOAD_ERROR_MESSAGE)); + } + }); + + this.process.on('error', (err) => { + console.error('[Stereo] Process error:', err); + this.cleanup(); + if (this.isInitializing) { + reject(new Error(STEREO_LOAD_ERROR_MESSAGE)); + } + }); + + // Timeout for initialization (90 seconds for model loading) + setTimeout(() => { + if (this.isInitializing && !initialized) { + reject(new Error(STEREO_LOAD_ERROR_MESSAGE)); + } + }, 90000); + }); + } + + /** + * Send the enable command to the Python service + */ + private async sendEnableCommand(calibration?: StereoCalibration): Promise { + const id = this.generateRequestId(); + const request = { + id, + command: 'enable', + calibration, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('Enable command timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: (response) => { + if (response.success) { + resolve(); + } else { + reject(new Error(response.error || 'Failed to enable stereo service')); + } + }, + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Disable the stereo service and unload the model + */ + async disable(): Promise<{ success: boolean }> { + if (!this.enabled || !this.isReady()) { + this.enabled = false; + return { success: true }; + } + + try { + const id = this.generateRequestId(); + const request = { id, command: 'disable' }; + + await new Promise((resolve) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + resolve(); + }, 5000); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject: () => resolve(), + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + + await this.shutdown(); + this.enabled = false; + return { success: true }; + } catch { + this.enabled = false; + return { success: true }; + } + } + + /** + * Check if the service is ready for requests + */ + isReady(): boolean { + return this.process !== null && this.process.exitCode === null; + } + + /** + * Check if the service is enabled + */ + isEnabled(): boolean { + return this.enabled && this.isReady(); + } + + /** + * Generate a unique request ID + */ + private generateRequestId(): string { + this.requestCounter += 1; + return `req_${Date.now()}_${this.requestCounter}`; + } + + /** + * Handle a response line from the stereo service + */ + private handleResponse(line: string): void { + try { + const response = JSON.parse(line) as StereoResponse; + const pending = this.pendingRequests.get(response.id); + + if (pending) { + clearTimeout(pending.timeout); + this.pendingRequests.delete(response.id); + pending.resolve(response); + } else if (response.type === 'disparity_ready') { + // Async notification that disparity is ready + this.emit('disparity_ready', response); + } else if (response.type === 'disparity_error') { + this.emit('disparity_error', response); + } else { + console.warn(`[Stereo] Received response for unknown request: ${response.id}`); + } + } catch (err) { + console.error('[Stereo] Failed to parse response:', line, err); + } + } + + /** + * Set calibration parameters + */ + async setCalibration(calibration: StereoCalibration): Promise { + if (!this.isReady()) { + throw new Error('Stereo service is not ready'); + } + + const id = this.generateRequestId(); + const request = { + id, + command: 'set_calibration', + calibration, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('set_calibration request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: () => resolve(), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Set the current frame and start computing disparity proactively + */ + async setFrame(request: StereoSetFrameRequest): Promise { + if (!this.isEnabled()) { + return { + id: '', + success: false, + error: 'Stereo service is not enabled', + disparityReady: false, + }; + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'set_frame', + left_image_path: request.leftImagePath, + right_image_path: request.rightImagePath, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('set_frame request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: (response) => resolve({ + id: response.id, + success: response.success, + error: response.error, + disparityReady: response.disparity_ready || false, + message: response.message, + }), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Get the current status of the stereo service + */ + async getStatus(): Promise { + if (!this.isReady()) { + return { + id: '', + success: true, + enabled: false, + disparityReady: false, + hasCalibration: false, + }; + } + + const id = this.generateRequestId(); + const request = { id, command: 'get_status' }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('get_status request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: (response) => resolve({ + id: response.id, + success: response.success, + enabled: response.enabled || false, + disparityReady: response.disparity_ready || false, + computing: response.computing, + currentLeftPath: response.current_left_path, + currentRightPath: response.current_right_path, + hasCalibration: response.has_calibration || false, + }), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(request)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Transfer a line from left image to right image using disparity + */ + async transferLine(request: StereoTransferLineRequest): Promise { + if (!this.isEnabled()) { + return { + id: '', + success: false, + error: 'Stereo service is not enabled', + }; + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'transfer_line', + line: request.line, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('transfer_line request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: (response) => resolve({ + id: response.id, + success: response.success, + error: response.error, + transferredLine: response.transferred_line, + originalLine: response.original_line, + depthInfo: response.depth_info ? { + depthPoint1: response.depth_info.depth_point1, + depthPoint2: response.depth_info.depth_point2, + disparityPoint1: response.depth_info.disparity_point1, + disparityPoint2: response.depth_info.disparity_point2, + } : undefined, + }), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Transfer multiple points from left image to right image + */ + async transferPoints(request: StereoTransferPointsRequest): Promise { + if (!this.isEnabled()) { + return { + id: '', + success: false, + error: 'Stereo service is not enabled', + }; + } + + const id = this.generateRequestId(); + const fullRequest = { + id, + command: 'transfer_points', + points: request.points, + }; + + return new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + this.pendingRequests.delete(id); + reject(new Error('transfer_points request timed out')); + }, this.requestTimeoutMs); + + this.pendingRequests.set(id, { + resolve: (response) => resolve({ + id: response.id, + success: response.success, + error: response.error, + transferredPoints: response.transferred_points, + originalPoints: response.original_points, + disparityValues: response.disparity_values, + }), + reject, + timeout, + }); + + const requestLine = `${JSON.stringify(fullRequest)}\n`; + this.process!.stdin!.write(requestLine); + }); + } + + /** + * Clean up internal state after process exits + */ + private cleanup(): void { + // Reject all pending requests + this.pendingRequests.forEach((pending) => { + clearTimeout(pending.timeout); + pending.reject(new Error('Stereo service terminated')); + }); + this.pendingRequests.clear(); + + if (this.readline) { + this.readline.close(); + this.readline = null; + } + + this.process = null; + this.enabled = false; + this.emit('shutdown'); + } + + /** + * Gracefully shutdown the stereo service + */ + async shutdown(): Promise { + if (!this.process) { + return undefined; + } + + console.log('[Stereo] Shutting down stereo service...'); + + await new Promise((resolve) => { + // Send shutdown command + const reqId = this.generateRequestId(); + const request = { id: reqId, command: 'shutdown' }; + + if (this.process?.stdin?.writable) { + this.process.stdin.write(`${JSON.stringify(request)}\n`); + } + + // Wait for process to exit or timeout + const timeoutId = setTimeout(() => { + if (this.process) { + console.log('[Stereo] Force killing stereo service...'); + this.process.kill('SIGTERM'); + } + this.cleanup(); + resolve(); + }, 5000); + + if (this.process) { + this.process.once('exit', () => { + clearTimeout(timeoutId); + this.cleanup(); + resolve(); + }); + } else { + clearTimeout(timeoutId); + resolve(); + } + }); + return undefined; + } +} + +// Singleton instance +let serviceManager: StereoServiceManager | null = null; + +/** + * Get the stereo service manager singleton + */ +export function getStereoServiceManager(): StereoServiceManager { + if (!serviceManager) { + serviceManager = new StereoServiceManager(); + } + return serviceManager; +} + +/** + * Shutdown the stereo service (call on app close) + */ +export async function shutdownStereoService(): Promise { + if (serviceManager) { + await serviceManager.shutdown(); + serviceManager = null; + } +} diff --git a/client/platform/desktop/backend/serializers/viame.ts b/client/platform/desktop/backend/serializers/viame.ts index b9e55c2c9..afb9738c1 100644 --- a/client/platform/desktop/backend/serializers/viame.ts +++ b/client/platform/desktop/backend/serializers/viame.ts @@ -26,7 +26,10 @@ const HeadRegex = /^\(kp\) head (-?[0-9]+\.*-?[0-9]*) (-?[0-9]+\.*-?[0-9]*)/g; const TailRegex = /^\(kp\) tail (-?[0-9]+\.*-?[0-9]*) (-?[0-9]+\.*-?[0-9]*)/g; const AttrRegex = /^\(atr\) (.*?)\s(.+)/g; const TrackAttrRegex = /^\(trk-atr\) (.*?)\s(.+)/g; -const PolyRegex = /^(\(poly\)) ((?:-?[0-9]+\.*-?[0-9]*\s*)+)/g; +// Polygon format: (poly) coordinates +const PolyRegex = /^\(poly\)\s*((?:-?[0-9]+\.*-?[0-9]*\s*)+)/g; +// Hole format: (hole) coordinates +const HoleRegex = /^\(hole\)\s*((?:-?[0-9]+\.*-?[0-9]*\s*)+)/g; const FpsRegex = /fps:\s*(\d+(\.\d+)?)/ig; const ExecTimeRegEx = /exec_time:\s*(\d+(\.\d+)?)/ig; const AtrToken = '(atr)'; @@ -120,6 +123,18 @@ function _deduceType(value: string): boolean | number | string { return value; } +/** + * Get the next available polygon key for a feature collection. + */ +function _getNextPolygonKey( + geoFeatureCollection: GeoJSON.FeatureCollection, +): string { + const polygonCount = geoFeatureCollection.features.filter( + (f) => f.geometry.type === 'Polygon', + ).length; + return polygonCount > 0 ? String(polygonCount) : ''; +} + /** * Simplified from python variant. Does not handle duplicate type/key pairs * within a single feature. @@ -151,6 +166,28 @@ function _createGeoJsonFeature( return geoFeature; } +/** + * Find an existing polygon feature by key and add a hole to it. + * @param geoFeatureCollection the feature collection to search + * @param coords hole coordinates + * @param key polygon key to find + */ +function _addHoleToPolygon( + geoFeatureCollection: GeoJSON.FeatureCollection, + coords: number[][], + key = '', +) { + const matchingFeature = geoFeatureCollection.features.find( + (feature) => feature.geometry.type === 'Polygon' && feature.properties?.key === key, + ); + if (matchingFeature) { + // Add hole as additional ring to the polygon coordinates + (matchingFeature.geometry.coordinates as number[][][]).push(coords); + return true; + } + return false; +} + function _parseRow(row: string[]) { // Create empty feature collection const geoFeatureCollection: @@ -206,11 +243,13 @@ function _parseRow(row: string[]) { trackAttributes[trackattr[1]] = _deduceType(trackattr[2]); } - /* Polygon */ + /* Polygon - format: (poly) coordinates + * Multiple (poly) entries create separate polygons with auto-generated keys */ const poly = getCaptureGroups(PolyRegex, value); if (poly !== null) { + const coordString = poly[1]; const coords: number[][] = []; - const polyList = poly[2].split(' '); + const polyList = coordString.split(' '); polyList.forEach((coord, j) => { if (j % 2 === 0) { // Filter out ODDs @@ -219,7 +258,32 @@ function _parseRow(row: string[]) { } } }); - geoFeatureCollection.features.push(_createGeoJsonFeature('Polygon', coords)); + // Create new polygon with auto-generated key + const newKey = _getNextPolygonKey(geoFeatureCollection); + geoFeatureCollection.features.push(_createGeoJsonFeature('Polygon', coords, newKey)); + } + + /* Hole - format: (hole) coordinates + * Added to the most recent polygon */ + const hole = getCaptureGroups(HoleRegex, value); + if (hole !== null) { + const coordString = hole[1]; + const coords: number[][] = []; + const polyList = coordString.split(' '); + polyList.forEach((coord, j) => { + if (j % 2 === 0) { + // Filter out ODDs + if (polyList[j + 1]) { + coords.push([parseFloat(coord), parseFloat(polyList[j + 1])]); + } + } + }); + // Add as hole to the most recent polygon + const polygons = geoFeatureCollection.features.filter((f) => f.geometry.type === 'Polygon'); + if (polygons.length > 0) { + const lastPolyKey = polygons[polygons.length - 1].properties?.key || ''; + _addHoleToPolygon(geoFeatureCollection, coords, lastPolyKey); + } } }); @@ -603,8 +667,19 @@ async function serialize( if (feature.geometry && feature.geometry.type === 'FeatureCollection') { feature.geometry.features.forEach((geoJSONFeature) => { if (geoJSONFeature.geometry.type === 'Polygon') { - const coordinates = flattenDeep(geoJSONFeature.geometry.coordinates[0]); - row.push(`${PolyToken} ${coordinates.map(Math.round).join(' ')}`); + const allRings = geoJSONFeature.geometry.coordinates as number[][][]; + + // Write outer ring (first ring) + if (allRings.length > 0) { + const outerCoords = flattenDeep(allRings[0]); + row.push(`${PolyToken} ${outerCoords.map(Math.round).join(' ')}`); + + // Write holes (additional rings) + for (let holeIdx = 0; holeIdx < allRings.length - 1; holeIdx += 1) { + const holeCoords = flattenDeep(allRings[holeIdx + 1]); + row.push(`(hole) ${holeCoords.map(Math.round).join(' ')}`); + } + } } else if (geoJSONFeature.geometry.type === 'Point') { if (geoJSONFeature.properties) { const kpname = geoJSONFeature.properties.key; @@ -614,7 +689,6 @@ async function serialize( ); } } - /* TODO support for multiple GeoJSON Objects of the same type */ }); } stringify.write(row); diff --git a/client/platform/desktop/frontend/api.ts b/client/platform/desktop/frontend/api.ts index 49a26a709..9c4ff463a 100644 --- a/client/platform/desktop/frontend/api.ts +++ b/client/platform/desktop/frontend/api.ts @@ -4,6 +4,8 @@ import type { DatasetMetaMutable, DatasetType, MultiCamImportArgs, Pipe, Pipelines, SaveAttributeArgs, SaveAttributeTrackFilterArgs, SaveDetectionsArgs, TrainingConfigs, + SegmentationPredictRequest, SegmentationPredictResponse, SegmentationStatusResponse, + TextQueryRequest, TextQueryResponse, RefineDetectionsRequest, RefineDetectionsResponse, } from 'dive-common/apispec'; import { @@ -242,6 +244,189 @@ async function cancelJob(job: DesktopJob): Promise { return window.diveDesktop.invoke('cancel-job', job); } +/** + * Interactive Segmentation API + */ + +async function segmentationInitialize(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-initialize'); +} + +async function segmentationPredict(request: SegmentationPredictRequest): Promise { + return ipcRenderer.invoke('segmentation-predict', request); +} + +async function segmentationSetImage(imagePath: string): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-set-image', imagePath); +} + +async function segmentationClearImage(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-clear-image'); +} + +async function segmentationShutdown(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('segmentation-shutdown'); +} + +async function segmentationIsReady(): Promise { + return ipcRenderer.invoke('segmentation-is-ready'); +} + +/** + * Text Query API + * Allows open-vocabulary detection and segmentation using text prompts + */ + +async function textQuery(request: TextQueryRequest): Promise { + return ipcRenderer.invoke('segmentation-text-query', request); +} + +async function refineDetections(request: RefineDetectionsRequest): Promise { + return ipcRenderer.invoke('segmentation-refine', request); +} + +/** + * Run text query pipeline on all frames + */ +async function runTextQueryPipeline( + datasetId: string, + queryText: string, + threshold?: number, +): Promise { + const pipeline: Pipe = { + name: 'Text Query', + pipe: 'utility_text_query.pipe', + type: 'utility', + }; + + const pipelineParams: Record = { + 'track_refiner:refiner:text_query': queryText, + }; + + if (threshold !== undefined) { + pipelineParams['track_refiner:refiner:detection_threshold'] = threshold.toString(); + } + + const args: RunPipeline = { + type: JobType.RunPipeline, + pipeline, + datasetId, + pipelineParams, + }; + gpuJobQueue.enqueue(args); +} + +/** + * Interactive Stereo API + */ + +interface StereoCalibration { + fx_left: number; + fy_left?: number; + cx_left: number; + cy_left: number; + T: [number, number, number]; +} + +interface StereoSetFrameRequest { + leftImagePath: string; + rightImagePath: string; +} + +interface StereoSetFrameResponse { + id: string; + success: boolean; + error?: string; + disparityReady: boolean; + message?: string; +} + +interface StereoStatusResponse { + id: string; + success: boolean; + enabled: boolean; + disparityReady: boolean; + computing?: boolean; + currentLeftPath?: string; + currentRightPath?: string; + hasCalibration: boolean; +} + +interface StereoTransferLineRequest { + line: [[number, number], [number, number]]; +} + +interface StereoTransferLineResponse { + id: string; + success: boolean; + error?: string; + transferredLine?: [[number, number], [number, number]]; + originalLine?: [[number, number], [number, number]]; + depthInfo?: { + depthPoint1: number | null; + depthPoint2: number | null; + disparityPoint1: number; + disparityPoint2: number; + }; +} + +interface StereoTransferPointsRequest { + points: [number, number][]; +} + +interface StereoTransferPointsResponse { + id: string; + success: boolean; + error?: string; + transferredPoints?: [number, number][]; + originalPoints?: [number, number][]; + disparityValues?: number[]; +} + +async function stereoEnable(calibration?: StereoCalibration): Promise<{ success: boolean; error?: string }> { + return ipcRenderer.invoke('stereo-enable', { calibration }); +} + +async function stereoDisable(): Promise<{ success: boolean }> { + return ipcRenderer.invoke('stereo-disable'); +} + +async function stereoSetFrame(request: StereoSetFrameRequest): Promise { + return ipcRenderer.invoke('stereo-set-frame', request); +} + +async function stereoGetStatus(): Promise { + return ipcRenderer.invoke('stereo-get-status'); +} + +async function stereoTransferLine(request: StereoTransferLineRequest): Promise { + return ipcRenderer.invoke('stereo-transfer-line', request); +} + +async function stereoTransferPoints(request: StereoTransferPointsRequest): Promise { + return ipcRenderer.invoke('stereo-transfer-points', request); +} + +async function stereoSetCalibration(calibration: StereoCalibration): Promise<{ success: boolean }> { + return ipcRenderer.invoke('stereo-set-calibration', { calibration }); +} + +async function stereoIsEnabled(): Promise<{ enabled: boolean }> { + return ipcRenderer.invoke('stereo-is-enabled'); +} + +function onStereoDisparityReady(callback: (data: unknown) => void): () => void { + const handler = (_event: unknown, data: unknown) => callback(data); + ipcRenderer.on('stereo-disparity-ready', handler); + return () => ipcRenderer.removeListener('stereo-disparity-ready', handler); +} + +function onStereoDisparityError(callback: (data: unknown) => void): () => void { + const handler = (_event: unknown, data: unknown) => callback(data); + ipcRenderer.on('stereo-disparity-error', handler); + return () => ipcRenderer.removeListener('stereo-disparity-error', handler); +} + /** * REST api for larger-body messages */ @@ -356,4 +541,26 @@ export { cancelJob, getLastCalibration, saveCalibration, + /* Segmentation APIs */ + segmentationInitialize, + segmentationPredict, + segmentationSetImage, + segmentationClearImage, + segmentationShutdown, + segmentationIsReady, + /* Text Query APIs */ + textQuery, + refineDetections, + runTextQueryPipeline, + /* Stereo APIs */ + stereoEnable, + stereoDisable, + stereoSetFrame, + stereoGetStatus, + stereoTransferLine, + stereoTransferPoints, + stereoSetCalibration, + stereoIsEnabled, + onStereoDisparityReady, + onStereoDisparityError, }; diff --git a/client/platform/desktop/frontend/components/ViewerLoader.vue b/client/platform/desktop/frontend/components/ViewerLoader.vue index 94ca70d8c..d75436a91 100644 --- a/client/platform/desktop/frontend/components/ViewerLoader.vue +++ b/client/platform/desktop/frontend/components/ViewerLoader.vue @@ -1,6 +1,7 @@ + + diff --git a/client/src/components/LayerManager.vue b/client/src/components/LayerManager.vue index 675dcdcd5..4dd235079 100644 --- a/client/src/components/LayerManager.vue +++ b/client/src/components/LayerManager.vue @@ -39,7 +39,9 @@ import { useSelectedCamera, useAttributes, useComparisonSets, + useSegmentationPoints, } from '../provides'; +import SegmentationPointsLayer from '../layers/AnnotationLayers/SegmentationPointsLayer'; /** LayerManager is a component intended to be used as a child of an Annotator. * It provides logic for switching which layers are visible, but more importantly @@ -89,7 +91,8 @@ export default defineComponent({ return trackStyleManager.typeStyling.value; }); - const annotator = injectAggregateController().value.getController(props.camera); + const aggregateController = injectAggregateController(); + const annotator = aggregateController.value.getController(props.camera); const frameNumberRef = annotator.frame; const flickNumberRef = annotator.flick; @@ -154,6 +157,20 @@ export default defineComponent({ type: 'rectangle', }); + // Segmentation points layer for displaying prompt points during point-click segmentation + const segmentationPointsRef = useSegmentationPoints(); + const segmentationPointsLayer = new SegmentationPointsLayer(annotator); + + // Watch for segmentation points updates - only show points for current frame + watch([segmentationPointsRef, frameNumberRef], ([newPoints, currentFrame]) => { + // Only display points if they belong to the current frame + if (newPoints.points.length > 0 && newPoints.frameNum === currentFrame) { + segmentationPointsLayer.updatePoints(newPoints.points, newPoints.labels); + } else { + segmentationPointsLayer.clear(); + } + }, { deep: true }); + const updateAttributes = () => { const newList = attributes.value.filter((item) => item.render).sort((a, b) => { if (a.render && b.render) { @@ -388,6 +405,7 @@ export default defineComponent({ typeStylingRef, toRef(props, 'colorBy'), selectedCamera, + selectedKeyRef, ], () => { updateLayers( @@ -435,6 +453,23 @@ export default defineComponent({ ); }); + /** Watch for resize events to redraw layers after view mode changes */ + watch( + () => aggregateController.value.resizeTrigger.value, + () => { + updateLayers( + frameNumberRef.value, + editingModeRef.value, + selectedTrackIdRef.value, + multiSeletListRef.value, + enabledTracksRef.value, + visibleModesRef.value, + selectedKeyRef.value, + props.colorBy, + ); + }, + ); + const Clicked = (trackId: number, editing: boolean, modifiers?: {ctrl: boolean}) => { // If the camera isn't selected yet we ignore the click if (selectedCamera.value !== props.camera) { @@ -443,20 +478,108 @@ export default defineComponent({ //So we only want to pass the click whjen not in creation mode or editing mode for features if (editAnnotationLayer.getMode() !== 'creation') { editAnnotationLayer.disable(); - handler.trackSelect(trackId, editing, modifiers); + // When entering editing mode (right-click), use trackEdit so the + // geometry type is auto-detected (e.g. LineString vs rectangle). + if (editing && trackId !== null) { + handler.trackEdit(trackId); + } else { + handler.trackSelect(trackId, editing, modifiers); + } + } else if (editing && trackId !== null) { + // Right-click on another detection while in creation mode: + // cancel creation and switch to editing the clicked detection + editAnnotationLayer.disable(); + handler.trackEdit(trackId); } }; //Sync of internal geoJS state with the application - editAnnotationLayer.bus.$on('editing-annotation-sync', (editing: boolean) => { - handler.trackSelect(selectedTrackIdRef.value, editing); + editAnnotationLayer.bus.$on('editing-annotation-sync', (editing: boolean, deselect?: boolean) => { + if (deselect) { + handler.trackSelect(null, false); + } else { + handler.trackSelect(selectedTrackIdRef.value, editing); + } + }); + // Handle right-click to confirm/lock annotation in Point mode (segmentation) + editAnnotationLayer.bus.$on('confirm-annotation', () => { + handler.confirmRecipe(); }); rectAnnotationLayer.bus.$on('annotation-clicked', Clicked); rectAnnotationLayer.bus.$on('annotation-right-clicked', Clicked); rectAnnotationLayer.bus.$on('annotation-ctrl-clicked', Clicked); polyAnnotationLayer.bus.$on('annotation-clicked', Clicked); polyAnnotationLayer.bus.$on('annotation-right-clicked', Clicked); + // Handle right-click polygon selection for multi-polygon support + polyAnnotationLayer.bus.$on('polygon-right-clicked', (_trackId: number, polygonKey: string) => { + // If in creation mode, cancel it first so we can select the polygon + if (editAnnotationLayer.getMode() === 'creation') { + handler.cancelCreation(); + } + // Set the polygon key for the right-clicked polygon + handler.selectFeatureHandle(-1, polygonKey); + // Force layer update to load the selected polygon + // This is especially important when already editing the same track + // since annotation-right-clicked won't be emitted in that case + window.setTimeout(() => { + updateLayers( + frameNumberRef.value, + editingModeRef.value, + selectedTrackIdRef.value, + multiSeletListRef.value, + enabledTracksRef.value, + visibleModesRef.value, + selectedKeyRef.value, + props.colorBy, + ); + }, 0); + }); polyAnnotationLayer.bus.$on('annotation-ctrl-clicked', Clicked); + lineLayer.bus.$on('annotation-clicked', Clicked); + lineLayer.bus.$on('annotation-right-clicked', Clicked); + // Handle polygon selection for multi-polygon support + polyAnnotationLayer.bus.$on('polygon-clicked', (_trackId: number, polygonKey: string) => { + // If in creation mode, don't interrupt - let the edit layer handle clicks for placing points + // This is important for hole drawing where left-clicks place hole vertices + if (editAnnotationLayer.getMode() === 'creation') { + return; + } + handler.selectFeatureHandle(-1, polygonKey); + // Force layer update to load the newly selected polygon + // Use nextTick to ensure the selectedKey ref has been updated + window.setTimeout(() => { + updateLayers( + frameNumberRef.value, + editingModeRef.value, + selectedTrackIdRef.value, + multiSeletListRef.value, + enabledTracksRef.value, + visibleModesRef.value, + selectedKeyRef.value, + props.colorBy, + ); + }, 0); + }); + // Handle right-click outside polygons to finalize/cancel creation + polyAnnotationLayer.bus.$on('polygon-right-clicked-outside', () => { + if (editAnnotationLayer.getMode() === 'creation') { + // Cancel creation and go back to editing the default polygon + handler.cancelCreation(); + handler.selectFeatureHandle(-1, ''); + window.setTimeout(() => { + updateLayers( + frameNumberRef.value, + editingModeRef.value, + selectedTrackIdRef.value, + multiSeletListRef.value, + enabledTracksRef.value, + visibleModesRef.value, + selectedKeyRef.value, + props.colorBy, + ); + }, 0); + } + }); editAnnotationLayer.bus.$on('update:geojson', ( mode: 'in-progress' | 'editing', geometryCompleteEvent: boolean, @@ -492,8 +615,59 @@ export default defineComponent({ }); editAnnotationLayer.bus.$on( 'update:selectedIndex', - (index: number, _type: EditAnnotationTypes, key = '') => handler.selectFeatureHandle(index, key), + (index: number, _type: EditAnnotationTypes, key?: string) => { + // When deselecting (index -1), don't change the key - it may have been + // set by polygon-right-clicked/polygon-clicked for multi-polygon selection + if (index >= 0 && key !== undefined) { + handler.selectFeatureHandle(index, key); + } else { + // Just update the handle index, preserve the current key + handler.selectFeatureHandle(index, selectedKeyRef.value); + } + }, ); + // Handle clicks outside the edit polygon to allow selecting other polygons + editAnnotationLayer.bus.$on('click-outside-edit', (geo: { x: number; y: number }) => { + // Check which polygon was clicked by iterating through formatted data + const point: [number, number] = [geo.x, geo.y]; + const polygonData = polyAnnotationLayer.formattedData; + + // Find the polygon that contains the click point + const clickedPolygon = polygonData.find((data) => { + const coords = data.polygon.coordinates[0] as [number, number][]; + // Ray casting algorithm + let inside = false; + for (let i = 0, j = coords.length - 1; i < coords.length; j = i, i += 1) { + const xi = coords[i][0]; + const yi = coords[i][1]; + const xj = coords[j][0]; + const yj = coords[j][1]; + const intersect = ((yi > point[1]) !== (yj > point[1])) + && (point[0] < ((xj - xi) * (point[1] - yi)) / (yj - yi) + xi); + if (intersect) inside = !inside; + } + return inside; + }); + + if (clickedPolygon) { + const polygonKey = clickedPolygon.polygonKey || ''; + // Select the clicked polygon + handler.selectFeatureHandle(-1, polygonKey); + // Force layer update to load the newly selected polygon + window.setTimeout(() => { + updateLayers( + frameNumberRef.value, + editingModeRef.value, + selectedTrackIdRef.value, + multiSeletListRef.value, + enabledTracksRef.value, + visibleModesRef.value, + selectedKeyRef.value, + props.colorBy, + ); + }, 0); + } + }); const annotationHoverTooltip = ( found: { styleType: [string, number]; diff --git a/client/src/layers/AnnotationLayers/LineLayer.ts b/client/src/layers/AnnotationLayers/LineLayer.ts index d4c2516fa..8f814003b 100644 --- a/client/src/layers/AnnotationLayers/LineLayer.ts +++ b/client/src/layers/AnnotationLayers/LineLayer.ts @@ -1,4 +1,5 @@ /* eslint-disable class-methods-use-this */ +import geo, { GeoEvent } from 'geojs'; import { cloneDeep } from 'lodash'; import BaseLayer, { LayerStyle, BaseLayerParams } from '../BaseLayer'; @@ -24,7 +25,23 @@ export default class LineLayer extends BaseLayer { const layer = this.annotator.geoViewerRef.value.createLayer('feature', { features: ['point', 'line'], }); - this.featureLayer = layer.createFeature('line'); + this.featureLayer = layer + .createFeature('line', { selectionAPI: true }) + .geoOn(geo.event.feature.mouseclick, (e: GeoEvent) => { + if (e.mouse.buttonsDown.left) { + if (!e.data.editing || (e.data.editing && !e.data.selected)) { + this.bus.$emit('annotation-clicked', e.data.trackId, false); + } + } else if (e.mouse.buttonsDown.right) { + if (!e.data.editing || (e.data.editing && !e.data.selected)) { + this.bus.$emit('annotation-right-clicked', e.data.trackId, true); + } + } + }); + this.featureLayer.geoOn( + geo.event.feature.mouseclick_order, + this.featureLayer.mouseOverOrderClosestBorder, + ); super.initialize(); } diff --git a/client/src/layers/AnnotationLayers/PolygonLayer.ts b/client/src/layers/AnnotationLayers/PolygonLayer.ts index 302c030b7..79fe0043a 100644 --- a/client/src/layers/AnnotationLayers/PolygonLayer.ts +++ b/client/src/layers/AnnotationLayers/PolygonLayer.ts @@ -10,7 +10,28 @@ interface PolyGeoJSData{ editing: boolean | string; styleType: [string, number] | null; polygon: GeoJSON.Polygon; + polygonKey: string; set?: string; + isHole?: boolean; // True if this is a hole polygon (for styling) +} + +/** + * Darken a hex color by a given factor (0-1, where 0 = black, 1 = original) + */ +function darkenColor(color: string, factor: number): string { + // Handle hex colors + if (color.startsWith('#')) { + const hex = color.slice(1); + const r = parseInt(hex.slice(0, 2), 16); + const g = parseInt(hex.slice(2, 4), 16); + const b = parseInt(hex.slice(4, 6), 16); + const newR = Math.round(r * factor); + const newG = Math.round(g * factor); + const newB = Math.round(b * factor); + return `#${newR.toString(16).padStart(2, '0')}${newG.toString(16).padStart(2, '0')}${newB.toString(16).padStart(2, '0')}`; + } + // For non-hex colors, return as-is (could extend to support rgb(), etc.) + return color; } export default class PolygonLayer extends BaseLayer { @@ -35,19 +56,36 @@ export default class PolygonLayer extends BaseLayer { .geoOn(geo.event.feature.mouseclick, (e: GeoEvent) => { /** * Handle clicking on individual annotations, if DrawingOther is true we use the - * Rectangle type if only the polygon is visible we use the polygon bounds + * Rectangle type for track selection. However, polygon key events are always + * emitted so that multi-polygon selection works regardless of drawingOther. * */ - if (e.mouse.buttonsDown.left && !this.drawingOther) { - if (!e.data.editing || (e.data.editing && !e.data.selected)) { - if (e.mouse.modifiers.ctrl) { - this.bus.$emit('annotation-ctrl-clicked', e.data.trackId, false, { ctrl: true }); - } else { - this.bus.$emit('annotation-clicked', e.data.trackId, false); + if (e.mouse.buttonsDown.left) { + // Always emit polygon-clicked for multi-polygon support, regardless of drawingOther + const polygonKey = e.data.polygonKey || ''; + if (e.data.selected) { + // Already selected track - user may be selecting a different polygon + this.bus.$emit('polygon-clicked', e.data.trackId, polygonKey); + } + // Track-level events only when not drawingOther (rectangle layer handles those) + if (!this.drawingOther) { + if (!e.data.editing || (e.data.editing && !e.data.selected)) { + if (e.mouse.modifiers.ctrl) { + this.bus.$emit('annotation-ctrl-clicked', e.data.trackId, false, { ctrl: true }); + } else { + this.bus.$emit('polygon-clicked', e.data.trackId, polygonKey); + this.bus.$emit('annotation-clicked', e.data.trackId, false); + } } } - } else if (e.mouse.buttonsDown.right && !this.drawingOther) { - if (!e.data.editing || (e.data.editing && !e.data.selected)) { - this.bus.$emit('annotation-right-clicked', e.data.trackId, true); + } else if (e.mouse.buttonsDown.right) { + // Always emit polygon key for right-click so the correct polygon can be selected + const polygonKey = e.data.polygonKey || ''; + this.bus.$emit('polygon-right-clicked', e.data.trackId, polygonKey); + // Track-level events only when not drawingOther + if (!this.drawingOther) { + if (!e.data.editing || (e.data.editing && !e.data.selected)) { + this.bus.$emit('annotation-right-clicked', e.data.trackId, true); + } } } }); @@ -58,7 +96,12 @@ export default class PolygonLayer extends BaseLayer { this.featureLayer.geoOn(geo.event.mouseclick, (e: GeoEvent) => { // If we aren't clicking on an annotation we can deselect the current track if (this.featureLayer.pointSearch(e.geo).found.length === 0 && !this.drawingOther) { - this.bus.$emit('annotation-clicked', null, false); + if (e.mouse.buttonsDown.left) { + this.bus.$emit('annotation-clicked', null, false); + } else if (e.mouse.buttonsDown.right) { + // Right-click outside polygons - emit event to finalize/cancel creation + this.bus.$emit('polygon-right-clicked-outside'); + } } }); super.initialize(); @@ -106,15 +149,41 @@ export default class PolygonLayer extends BaseLayer { frameData.features.geometry.features.forEach((feature) => { if (feature.geometry && feature.geometry.type === 'Polygon') { const polygon = feature.geometry; + const polygonKey = feature.properties?.key || ''; const annotation: PolyGeoJSData = { trackId: frameData.track.id, selected: frameData.selected, editing: frameData.editing, styleType: frameData.styleType, polygon, + polygonKey, set: frameData.set, + isHole: false, }; arr.push(annotation); + + // Also add holes as separate polygon entries for distinct styling + const coords = polygon.coordinates as GeoJSON.Position[][]; + if (coords.length > 1) { + // coords[0] is outer ring, coords[1..n] are holes + for (let i = 1; i < coords.length; i += 1) { + const holePolygon: GeoJSON.Polygon = { + type: 'Polygon', + coordinates: [coords[i]], // Hole as its own polygon + }; + const holeAnnotation: PolyGeoJSData = { + trackId: frameData.track.id, + selected: frameData.selected, + editing: frameData.editing, + styleType: frameData.styleType, + polygon: holePolygon, + polygonKey, // Same key as parent polygon + set: frameData.set, + isHole: true, + }; + arr.push(holeAnnotation); + } + } } }); } @@ -126,7 +195,17 @@ export default class PolygonLayer extends BaseLayer { redraw() { this.featureLayer .data(this.formattedData) - .polygon((d: PolyGeoJSData) => d.polygon.coordinates[0]) + .polygon((d: PolyGeoJSData) => { + // GeoJS expects outer ring as array of points for simple polygons + // For polygons with holes, return object with outer/inner properties + if (d.polygon.coordinates.length > 1) { + return { + outer: d.polygon.coordinates[0], + inner: d.polygon.coordinates.slice(1), + }; + } + return d.polygon.coordinates[0]; + }) .draw(); } @@ -142,15 +221,22 @@ export default class PolygonLayer extends BaseLayer { // Style conversion to get array objects to work in geoJS position: (point) => ({ x: point[0], y: point[1] }), strokeColor: (_point, _index, data) => { + let color: string; if (data.selected) { - return this.stateStyling.selected.color; - } - if (data.styleType) { - return this.typeStyling.value.color(data.styleType[0]); + color = this.stateStyling.selected.color; + } else if (data.styleType) { + color = this.typeStyling.value.color(data.styleType[0]); + } else { + color = this.typeStyling.value.color(''); } - return this.typeStyling.value.color(''); + // Darken color for holes + return data.isHole ? darkenColor(color, 0.5) : color; }, fill: (data) => { + // Holes should always be filled to show the darker color + if (data.isHole) { + return true; + } if (data.set) { return this.typeStyling.value.fill(data.set); } @@ -160,12 +246,20 @@ export default class PolygonLayer extends BaseLayer { return this.stateStyling.standard.fill; }, fillColor: (_point, _index, data) => { + let color: string; if (data.styleType) { - return this.typeStyling.value.color(data.styleType[0]); + color = this.typeStyling.value.color(data.styleType[0]); + } else { + color = this.typeStyling.value.color(''); } - return this.typeStyling.value.color(''); + // Darken color for holes + return data.isHole ? darkenColor(color, 0.5) : color; }, fillOpacity: (_point, _index, data) => { + // Holes get higher opacity to stand out + if (data.isHole) { + return 0.5; + } if (data.set) { return this.typeStyling.value.opacity(data.set); } diff --git a/client/src/layers/AnnotationLayers/SegmentationPointsLayer.ts b/client/src/layers/AnnotationLayers/SegmentationPointsLayer.ts new file mode 100644 index 000000000..09db4c186 --- /dev/null +++ b/client/src/layers/AnnotationLayers/SegmentationPointsLayer.ts @@ -0,0 +1,76 @@ +import { MediaController } from '../../components/annotators/mediaControllerType'; + +interface SegmentationPointData { + x: number; + y: number; + label: number; // 1=foreground, 0=background +} + +/** + * Layer for displaying segmentation prompt points (green=foreground, red=background) + * This is a simple layer that doesn't follow the BaseLayer pattern since it's + * not tied to track data - it's UI feedback during the segmentation process. + */ +export default class SegmentationPointsLayer { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private featureLayer: any; + + private annotator: MediaController; + + private points: SegmentationPointData[] = []; + + constructor(annotator: MediaController) { + this.annotator = annotator; + this.initialize(); + } + + private initialize() { + const layer = this.annotator.geoViewerRef.value.createLayer('feature', { + features: ['point'], + }); + this.featureLayer = layer.createFeature('point'); + this.featureLayer.style({ + radius: 8, + strokeWidth: 2, + strokeColor: (data: SegmentationPointData) => (data.label === 1 ? '#00FF00' : '#FF0000'), + fillColor: (data: SegmentationPointData) => (data.label === 1 ? '#00FF00' : '#FF0000'), + fillOpacity: 0.6, + strokeOpacity: 1, + }); + this.featureLayer.position((data: SegmentationPointData) => ({ + x: data.x, + y: data.y, + })); + } + + /** + * Update the displayed points + * @param points - Array of [x, y] coordinates + * @param labels - Array of labels (1=foreground, 0=background) + */ + updatePoints(points: [number, number][], labels: number[]) { + this.points = points.map((p, i) => ({ + x: p[0], + y: p[1], + label: labels[i] ?? 1, + })); + this.redraw(); + } + + /** + * Clear all displayed points + */ + clear() { + this.points = []; + this.redraw(); + } + + private redraw() { + this.featureLayer.data(this.points).draw(); + } + + disable() { + this.points = []; + this.featureLayer.data([]).draw(); + } +} diff --git a/client/src/layers/EditAnnotationLayer.ts b/client/src/layers/EditAnnotationLayer.ts index 4c1204a1a..e12de9f37 100644 --- a/client/src/layers/EditAnnotationLayer.ts +++ b/client/src/layers/EditAnnotationLayer.ts @@ -69,6 +69,8 @@ export default class EditAnnotationLayer extends BaseLayer { selectedKey?: string; + selectedPolygonIndex: number; + selectedHandleIndex: number; hoverHandleIndex: number; @@ -80,6 +82,17 @@ export default class EditAnnotationLayer extends BaseLayer { /* in-progress events only emitted for lines and polygons */ shapeInProgress: GeoJSON.LineString | GeoJSON.Polygon | null; + /* Track if the last click was a right-click or shift-click for Point mode */ + lastClickWasBackground: boolean; + + /* Track shift key state from native DOM events (more reliable than GeoJS events) */ + lastShiftKeyState: boolean; + + /* Bound event handlers for cleanup */ + private boundTrackShiftKey: ((e: MouseEvent) => void) | null = null; + + private boundHandleContextMenu: ((e: MouseEvent) => void) | null = null; + // eslint-disable-next-line @typescript-eslint/no-explicit-any arrowFeatureLayer: any; @@ -90,18 +103,93 @@ export default class EditAnnotationLayer extends BaseLayer { this.skipNextExternalUpdate = false; this._mode = 'editing'; this.selectedKey = ''; + this.selectedPolygonIndex = 0; this.type = params.type; this.selectedHandleIndex = -1; this.hoverHandleIndex = -1; this.shapeInProgress = null; this.disableModeSync = false; this.leftButtonCheckTimeout = -1; + this.lastClickWasBackground = false; + this.lastShiftKeyState = false; this.unrotatedGeoJSONCoords = null; + // Bind event handlers once (listeners are added/removed dynamically based on type) + this.boundTrackShiftKey = this.trackShiftKey.bind(this); + this.boundHandleContextMenu = this.handleContextMenu.bind(this); + + // Add listeners if starting in Point mode + if (this.type === 'Point') { + this.addPointModeListeners(); + } + //Only initialize once, prevents recreating Layer each edit this.initialize(); } + /** + * Add event listeners needed for Point mode (segmentation). + */ + private addPointModeListeners() { + if (this.boundTrackShiftKey) { + document.addEventListener('mousedown', this.boundTrackShiftKey, true); + } + if (this.boundHandleContextMenu) { + document.addEventListener('contextmenu', this.boundHandleContextMenu, true); + } + } + + /** + * Remove event listeners used for Point mode. + */ + private removePointModeListeners() { + if (this.boundTrackShiftKey) { + document.removeEventListener('mousedown', this.boundTrackShiftKey, true); + } + if (this.boundHandleContextMenu) { + document.removeEventListener('contextmenu', this.boundHandleContextMenu, true); + } + } + + /** + * Track shift key state from native DOM mousedown events. + * This is more reliable than GeoJS events for detecting shift+click. + */ + trackShiftKey(e: MouseEvent) { + this.lastShiftKeyState = e.shiftKey; + // Also track middle-click (button 1) from native events for background points + if (e.button === 1 && this.type === 'Point' && this.getMode() === 'creation') { + this.lastClickWasBackground = true; + } + } + + /** + * Handle right-click context menu in Point mode. + * In segmentation mode, right-click confirms/locks the annotation. + * Prevents browser context menu. + */ + handleContextMenu(e: MouseEvent) { + if (this.type === 'Point' && this.getMode() === 'creation') { + e.preventDefault(); + e.stopPropagation(); + + // Emit right-click event with screen coordinates so LayerManager can + // check if an annotation is under the cursor and select it. + // On Windows/Electron, GeoJS mouseclick may not fire for right-button, + // so this provides a reliable fallback for annotation selection. + this.bus.$emit('right-click-point-mode', { x: e.clientX, y: e.clientY }); + } + } + + /** + * Clean up event listeners when the layer is destroyed. + */ + destroy() { + this.removePointModeListeners(); + this.boundTrackShiftKey = null; + this.boundHandleContextMenu = null; + } + /** * Initialization of the layer should only be done once for edit layers * Handlers for edit_action and state which will emit data when necessary @@ -130,6 +218,13 @@ export default class EditAnnotationLayer extends BaseLayer { (e: GeoEvent) => this.hoverEditHandle(e), ); this.featureLayer.geoOn(geo.event.mouseclick, (e: GeoEvent) => { + // Right-click in creation mode (non-Point): cancel and fully deselect. + // Point mode has its own right-click handler (handleContextMenu). + if (e.buttonsDown.right && this.getMode() === 'creation' && this.type !== 'Point') { + this.shapeInProgress = null; + this.bus.$emit('editing-annotation-sync', false, true); + return; + } //Used to sync clicks that kick out of editing mode with application //This prevents that pseudo Edit state when left clicking on a object in edit mode if (!this.disableModeSync && (e.buttonsDown.left) @@ -137,6 +232,35 @@ export default class EditAnnotationLayer extends BaseLayer { this.bus.$emit('editing-annotation-sync', false); } else if (e.buttonsDown.left) { const newIndex = this.hoverHandleIndex; + // If not hovering over an edit handle and not on the edit polygon, + // emit event so other layers can handle the click (e.g., selecting different polygon) + if (newIndex < 0 && this.type === 'Polygon') { + const annotations = this.featureLayer.annotations(); + if (annotations.length > 0) { + const annotation = annotations[0]; + const geojson = annotation.geojson(); + if (geojson && geojson.geometry && geojson.geometry.type === 'Polygon') { + const coords = geojson.geometry.coordinates[0] as [number, number][]; + const point: [number, number] = [e.geo.x, e.geo.y]; + // Ray casting algorithm to check if point is inside polygon + let inside = false; + for (let i = 0, j = coords.length - 1; i < coords.length; j = i, i += 1) { + const xi = coords[i][0]; + const yi = coords[i][1]; + const xj = coords[j][0]; + const yj = coords[j][1]; + const intersect = ((yi > point[1]) !== (yj > point[1])) + && (point[0] < ((xj - xi) * (point[1] - yi)) / (yj - yi) + xi); + if (intersect) inside = !inside; + } + if (!inside) { + // Click is outside the current edit polygon - emit passthrough event + this.bus.$emit('click-outside-edit', e.geo); + return; + } + } + } + } // Click features like a toggle: unselect if it's clicked twice. if (newIndex === this.selectedHandleIndex) { this.selectedHandleIndex = -1; @@ -184,10 +308,56 @@ export default class EditAnnotationLayer extends BaseLayer { * shape that GeoJS is keeps internally. Emit the shape as update:in-progress-geojson */ setShapeInProgress(e: GeoEvent) { - // Allow middle click movement when placing points - if (e.mouse.buttons.middle && !e.propogated) { + // Allow middle click movement when placing points (except in Point mode where it creates background points) + if (e.mouse.buttons.middle && !e.propogated && this.type !== 'Point') { + return; + } + // Right-click should never add vertices - cancel/confirm is handled by + // mouseclick (line/polygon cancel) and contextmenu (Point confirm) + if (e.mouse.buttons.right) { return; } + + // Track if this is a background point (shift+click or middle-click) for Point mode + // Check both GeoJS event modifiers and our native DOM event tracking for reliability + // Preserve the value if it was already set to true by trackShiftKey (native event) + if (this.type === 'Point' && this.getMode() === 'creation') { + this.lastClickWasBackground = this.lastClickWasBackground + || e.mouse.buttons.middle + || e.mouse.modifiers.shift + || this.lastShiftKeyState; + } + + // Handle middle-click in Point mode - GeoJS doesn't create points on middle-click, + // so we need to manually create the point and emit the event + if (this.type === 'Point' && this.getMode() === 'creation' && e.mouse.buttons.middle) { + const pointGeojson: GeoJSON.Feature = { + type: 'Feature', + geometry: { + type: 'Point', + coordinates: [Math.round(e.mouse.geo.x), Math.round(e.mouse.geo.y)], + }, + properties: { + background: true, + }, + }; + + // Emit the point creation event directly + this.bus.$emit( + 'update:geojson', + 'editing', + true, // geometryCompleteEvent - point is complete + pointGeojson, + this.type, + this.selectedKey, + this.skipNextFunc(), + ); + + // Reset background flag for next point + this.lastClickWasBackground = false; + return; + } + if (this.getMode() === 'creation' && ['LineString', 'Polygon'].includes(this.type)) { if (this.shapeInProgress === null) { // Initialize a new in-progress shape @@ -205,6 +375,25 @@ export default class EditAnnotationLayer extends BaseLayer { } else { const coords = this.shapeInProgress?.coordinates as GeoJSON.Position[]; coords.push(newPoint); + // Auto-complete LineString after 2 points (simple line with 2 endpoints) + if (coords.length >= 2) { + const feature: GeoJSON.Feature = { + type: 'Feature', + geometry: this.shapeInProgress!, + properties: {}, + }; + this.shapeInProgress = null; + this.disableModeSync = true; + this.bus.$emit( + 'update:geojson', + 'editing', + true, // geometryCompleteEvent - line is complete + feature, + this.type, + this.selectedKey, + ); + return; + } } this.bus.$emit( 'update:geojson', @@ -281,9 +470,19 @@ export default class EditAnnotationLayer extends BaseLayer { /** * Set the current Editing type for switching between editing polygons or rects. - * */ + * Also manages event listeners that are only needed for Point mode (segmentation). + */ setType(type: EditAnnotationTypes) { + const wasPoint = this.type === 'Point'; + const isPoint = type === 'Point'; this.type = type; + + // Add or remove Point mode listeners based on type change + if (!wasPoint && isPoint) { + this.addPointModeListeners(); + } else if (wasPoint && !isPoint) { + this.removePointModeListeners(); + } } setKey(key: string) { @@ -335,6 +534,82 @@ export default class EditAnnotationLayer extends BaseLayer { } } + /** + * Attempt to finalize any in-progress annotation before switching tracks. + * Handles: + * - Polygon with 3+ vertices tracked in shapeInProgress + * - GeoJS-managed annotations (rectangles) in creation/done state + * - Discards invalid partial shapes (polygon < 3 vertices, line with 1 point) + * Returns true if a shape was finalized. + */ + finalizeInProgress(): boolean { + // Handle shapeInProgress (polygon/line tracked manually) + if (this.shapeInProgress && this.getMode() === 'creation') { + if (this.shapeInProgress.type === 'Polygon') { + const coords = this.shapeInProgress.coordinates as GeoJSON.Position[][]; + if (coords[0] && coords[0].length >= 3) { + const ring = coords[0]; + const first = ring[0]; + const last = ring[ring.length - 1]; + if (first[0] !== last[0] || first[1] !== last[1]) { + ring.push([...first]); + } + const feature: GeoJSON.Feature = { + type: 'Feature', + geometry: this.shapeInProgress, + properties: {}, + }; + this.disableModeSync = true; + this.bus.$emit( + 'update:geojson', + 'editing', + true, + feature, + this.type, + this.selectedKey, + this.skipNextFunc(), + ); + this.shapeInProgress = null; + return true; + } + } + // Discard invalid partial shapes (polygon < 3 vertices, line with 1 point) + this.shapeInProgress = null; + return false; + } + + // Handle GeoJS-managed annotations (rectangles, completed shapes) + // Skip Point mode — segmentation manages its own polygon via the recipe, + // not through the edit layer's GeoJS annotation. + if (this.featureLayer && this.type !== 'Point') { + const annotations = this.featureLayer.annotations(); + if (annotations.length > 0) { + const annotation = annotations[0]; + const geoJSONData = annotation.geojson(); + if (geoJSONData && geoJSONData.geometry) { + if (this.type === 'rectangle') { + geoJSONData.geometry.coordinates[0] = reOrdergeoJSON( + geoJSONData.geometry.coordinates[0] as GeoJSON.Position[], + ); + } + this.disableModeSync = true; + this.bus.$emit( + 'update:geojson', + 'editing', + true, + geoJSONData, + this.type, + this.selectedKey, + this.skipNextFunc(), + ); + return true; + } + } + } + + return false; + } + /** * Removes the current annotation and resets the mode when completed editing */ @@ -359,25 +634,52 @@ export default class EditAnnotationLayer extends BaseLayer { /** * retrieves geoJSON data based on the key and type - * @param frameData + * @param track + * @param polygonIndex optional index to get a specific polygon when multiple exist */ - getGeoJSONData(track: FrameDataTrack) { - let geoJSONData; + getGeoJSONData( + track: FrameDataTrack, + polygonIndex?: number, + ): GeoJSON.Point | GeoJSON.Polygon | GeoJSON.LineString | undefined { + let geoJSONData: GeoJSON.Point | GeoJSON.Polygon | GeoJSON.LineString | undefined; if (track && track.features && track.features.geometry) { + const matchingFeatures: (GeoJSON.Point | GeoJSON.Polygon | GeoJSON.LineString)[] = []; track.features.geometry.features.forEach((feature) => { if (feature.geometry && feature.geometry.type.toLowerCase() === this.type.toLowerCase()) { - if (feature.properties && feature.properties.key !== 'undefined') { - if (feature.properties.key === this.selectedKey) { - geoJSONData = feature.geometry; - } + // Get the feature key, defaulting to '' for undefined/null keys + const featureKey = feature.properties?.key ?? ''; + if (featureKey === this.selectedKey) { + matchingFeatures.push( + feature.geometry as GeoJSON.Point | GeoJSON.Polygon | GeoJSON.LineString, + ); } } }); + // If polygonIndex is specified and valid, use it; otherwise use first match + if (polygonIndex !== undefined && polygonIndex >= 0 && polygonIndex < matchingFeatures.length) { + geoJSONData = matchingFeatures[polygonIndex]; + } else if (matchingFeatures.length > 0) { + [geoJSONData] = matchingFeatures; + } } return geoJSONData; } + /** + * Set which polygon index to edit when multiple polygons exist + */ + setPolygonIndex(index: number) { + this.selectedPolygonIndex = index; + } + + /** + * Get the currently selected polygon index + */ + getPolygonIndex() { + return this.selectedPolygonIndex; + } + /** overrides default function to disable and clear anotations before drawing again */ async changeData(frameData: FrameDataTrack[]) { if (this.skipNextExternalUpdate === false) { @@ -477,6 +779,19 @@ export default class EditAnnotationLayer extends BaseLayer { // Only calls this once on completion of an annotation if (e.annotation.state() === 'done' && this.getMode() === 'creation') { const geoJSONData = [e.annotation.geojson()]; + if (this.type === 'rectangle') { + geoJSONData[0].geometry.coordinates[0] = reOrdergeoJSON( + geoJSONData[0].geometry.coordinates[0] as GeoJSON.Position[], + ); + } + // For Point mode, add background property if it was a right-click or shift-click + if (this.type === 'Point' && this.lastClickWasBackground) { + geoJSONData[0].properties = { + ...geoJSONData[0].properties, + background: true, + }; + this.lastClickWasBackground = false; // Reset for next point + } this.unrotatedGeoJSONCoords = geoJSONData[0].geometry.coordinates[0] as GeoJSON.Position[]; this.formattedData = geoJSONData; diff --git a/client/src/provides.ts b/client/src/provides.ts index 078b8ae5f..2a91a087d 100644 --- a/client/src/provides.ts +++ b/client/src/provides.ts @@ -8,6 +8,7 @@ import type { EditAnnotationTypes } from './layers/EditAnnotationLayer'; import type { AnnotationId, StringKeyObject } from './BaseAnnotation'; import type { VisibleAnnotationTypes } from './layers'; import type { RectBounds } from './utils'; +import type { TrackSupportedFeature } from './track'; import type { Attribute, AttributeFilter, @@ -54,6 +55,9 @@ type EditingModeType = Readonly>; const MultiSelectSymbol = Symbol('multiSelect'); type MultiSelectType = Readonly>; +const SegmentationPointsSymbol = Symbol('segmentationPoints'); +type SegmentationPointsType = Readonly>; + const PendingSaveCountSymbol = Symbol('pendingSaveCount'); type pendingSaveCountType = Readonly>; @@ -119,6 +123,8 @@ export interface Handler { seekFrame(frame: number): void; /* Toggle editing mode for track */ trackEdit(AnnotationId: AnnotationId): void; + /* Confirm/lock the current annotation for active recipes */ + confirmRecipe(): void; /* toggle selection mode for track */ trackSelect(AnnotationId: AnnotationId | null, edit: boolean, modifiers?: { ctrl: boolean }): void; /* select next track in the list */ @@ -134,6 +140,13 @@ export interface Handler { bounds: RectBounds, rotation?: number, ): void; + /* Set a feature on the selected track with proper interpolation handling */ + setTrackFeature( + frameNum: number, + bounds: RectBounds, + geometry: GeoJSON.Feature[], + runAfterLogic?: boolean, + ): void; /* update geojson for track */ updateGeoJSON( eventType: 'in-progress' | 'editing', @@ -185,6 +198,12 @@ export interface Handler { startLinking(camera: string): void; stopLinking(): void; setChange(set: string): void; + /* Add a hole to the current polygon */ + addHole(): void; + /* Add a new separate polygon */ + addPolygon(): void; + /* Cancel any in-progress creation mode (hole or polygon addition) */ + cancelCreation(): void; } const HandlerSymbol = Symbol('handler'); @@ -200,11 +219,13 @@ function dummyHandler(handle: (name: string, args: unknown[]) => void): Handler trackSeek(...args) { handle('trackSeek', args); }, seekFrame(...args) { handle('seekFrame', args); }, trackEdit(...args) { handle('trackEdit', args); }, + confirmRecipe(...args) { handle('confirmRecipe', args); }, trackSelect(...args) { handle('trackSelect', args); }, trackSelectNext(...args) { handle('trackSelectNext', args); }, trackSplit(...args) { handle('trackSplit', args); }, trackAdd(...args) { handle('trackAdd', args); return 0; }, updateRectBounds(...args) { handle('updateRectBounds', args); }, + setTrackFeature(...args) { handle('setTrackFeature', args); }, updateGeoJSON(...args) { handle('updateGeoJSON', args); }, removeTrack(...args) { handle('removeTrack', args); }, removeGroup(...args) { handle('removeGroup', args); }, @@ -227,6 +248,9 @@ function dummyHandler(handle: (name: string, args: unknown[]) => void): Handler startLinking(...args) { handle('startLinking', args); }, stopLinking(...args) { handle('stopLinking', args); }, setChange(...args) { handle('setChange', args); }, + addHole(...args) { handle('addHole', args); }, + addPolygon(...args) { handle('addPolygon', args); }, + cancelCreation(...args) { handle('cancelCreation', args); }, }; } @@ -252,6 +276,7 @@ export interface State { annotationSet: AnnotationSetType; annotationSets: AnnotationSetsType; comparisonSets: ComparisonSetsType; + segmentationPoints: SegmentationPointsType; selectedCamera: SelectedCameraType; selectedKey: SelectedKeyType; selectedTrackId: SelectedTrackIdType; @@ -318,6 +343,7 @@ function dummyState(): State { comparisonSets: ref([]), groupFilters: groupFilterControls, groupStyleManager: new StyleManager({ markChangesPending }), + segmentationPoints: ref({ points: [], labels: [], frameNum: -1 }), selectedCamera: ref('singleCam'), selectedKey: ref(''), selectedTrackId: ref(null), @@ -367,6 +393,7 @@ function provideAnnotator(state: State, handler: Handler, attributesFilters: Att provide(AnnotationSetSymbol, state.annotationSet); provide(AnnotationSetsSymbol, state.annotationSets); provide(ComparisonSetsSymbol, state.comparisonSets); + provide(SegmentationPointsSymbol, state.segmentationPoints); provide(TrackFilterControlsSymbol, state.trackFilters); provide(TrackStyleManagerSymbol, state.trackStyleManager); provide(SelectedCameraSymbol, state.selectedCamera); @@ -499,6 +526,10 @@ function useImageEnhancements() { return use(ImageEnhancementsSymbol); } +function useSegmentationPoints() { + return use(SegmentationPointsSymbol); +} + export { dummyHandler, dummyState, @@ -531,4 +562,5 @@ export { useReadOnlyMode, useImageEnhancements, useAttributesFilters, + useSegmentationPoints, }; diff --git a/client/src/recipe.ts b/client/src/recipe.ts index efd5635b7..bd259fa08 100644 --- a/client/src/recipe.ts +++ b/client/src/recipe.ts @@ -28,6 +28,8 @@ interface Recipe { icon: Ref; active: Ref; toggleable: Ref; + /** Whether the recipe is currently loading (e.g., initializing models) */ + loading?: Ref; bus: Vue; update: ( mode: 'in-progress' | 'editing', @@ -52,6 +54,8 @@ interface Recipe { activate: () => unknown; mousetrap: () => Mousetrap[]; deactivate: () => void; + /** Optional method to confirm/lock the current annotation (e.g., for segmentation) */ + confirm?: () => void; } export default Recipe; diff --git a/client/src/track.ts b/client/src/track.ts index c23750978..e59b2d506 100644 --- a/client/src/track.ts +++ b/client/src/track.ts @@ -310,7 +310,10 @@ export default class Track extends BaseAnnotation { geometry.forEach((geo) => { const i = fg.features .findIndex((item) => { - const keyMatch = !geo.properties?.key || item.properties?.key === geo.properties?.key; + // Compare keys directly, treating undefined/null as empty string + const geoKey = geo.properties?.key ?? ''; + const itemKey = item.properties?.key ?? ''; + const keyMatch = geoKey === itemKey; const typeMatch = item.geometry.type === geo.geometry.type; return keyMatch && typeMatch; }); @@ -348,7 +351,9 @@ export default class Track extends BaseAnnotation { return []; } return feature.geometry.features.filter((item) => { - const matchesKey = !key || item.properties?.key === key; + // Check key match: undefined means match all, otherwise compare (treating undefined/null as '') + const matchesKey = key === undefined + || (item.properties?.key ?? '') === key; const matchesType = !type || item.geometry.type === type; return matchesKey && matchesType; }); @@ -361,7 +366,9 @@ export default class Track extends BaseAnnotation { return false; } const index = feature.geometry.features.findIndex((item) => { - const matchesKey = !key || item.properties?.key === key; + // Check key match: undefined means match all, otherwise compare (treating undefined/null as '') + const matchesKey = key === undefined + || (item.properties?.key ?? '') === key; const matchesType = !type || item.geometry.type === type; return matchesKey && matchesType; }); @@ -373,6 +380,116 @@ export default class Track extends BaseAnnotation { return false; } + /** + * Get all polygon features for a frame + * @returns Array of polygon GeoJSON features with their keys + */ + getPolygonFeatures(frame: number): Array<{ + key: string; + geometry: GeoJSON.Polygon; + hasHoles: boolean; + holeCount: number; + }> { + const feature = this.features[frame]; + if (!feature?.geometry) { + return []; + } + const polygons: Array<{ + key: string; + geometry: GeoJSON.Polygon; + hasHoles: boolean; + holeCount: number; + }> = []; + feature.geometry.features.forEach((item) => { + if (item.geometry.type === 'Polygon') { + const coords = item.geometry.coordinates as GeoJSON.Position[][]; + polygons.push({ + key: item.properties?.key || '', + geometry: item.geometry, + hasHoles: coords.length > 1, + holeCount: Math.max(0, coords.length - 1), + }); + } + }); + return polygons; + } + + /** + * Add a hole to an existing polygon + * @param frame frame number + * @param key polygon key to add hole to + * @param holeCoords coordinates of the hole (array of [x,y] positions) + * @returns true if hole was added successfully + */ + addHoleToPolygon(frame: number, key: string, holeCoords: GeoJSON.Position[]): boolean { + const feature = this.features[frame]; + if (!feature?.geometry) { + return false; + } + const polygonFeature = feature.geometry.features.find( + (item) => item.geometry.type === 'Polygon' && item.properties?.key === key, + ); + if (polygonFeature && polygonFeature.geometry.type === 'Polygon') { + (polygonFeature.geometry.coordinates as GeoJSON.Position[][]).push(holeCoords); + this.notify('feature', feature); + return true; + } + return false; + } + + /** + * Remove a hole from a polygon + * @param frame frame number + * @param key polygon key + * @param holeIndex index of the hole to remove (0 = first hole, which is coordinates[1]) + * @returns true if hole was removed successfully + */ + removeHoleFromPolygon(frame: number, key: string, holeIndex: number): boolean { + const feature = this.features[frame]; + if (!feature?.geometry) { + return false; + } + const polygonFeature = feature.geometry.features.find( + (item) => item.geometry.type === 'Polygon' && item.properties?.key === key, + ); + if (polygonFeature && polygonFeature.geometry.type === 'Polygon') { + const coords = polygonFeature.geometry.coordinates as GeoJSON.Position[][]; + // holeIndex 0 corresponds to coords[1], holeIndex 1 to coords[2], etc. + const actualIndex = holeIndex + 1; + if (actualIndex > 0 && actualIndex < coords.length) { + coords.splice(actualIndex, 1); + this.notify('feature', feature); + return true; + } + } + return false; + } + + /** + * Get the next available polygon key for this frame + * @param frame frame number + * @returns next available key (e.g., "1", "2", etc.) + */ + getNextPolygonKey(frame: number): string { + const polygons = this.getPolygonFeatures(frame); + if (polygons.length === 0) { + return ''; + } + // Find the highest numeric key and increment + let maxKey = 0; + polygons.forEach((p) => { + if (p.key === '') { + maxKey = Math.max(maxKey, 0); + } else { + const numKey = parseInt(p.key, 10); + if (!Number.isNaN(numKey)) { + maxKey = Math.max(maxKey, numKey); + } + } + }); + return String(maxKey + 1); + } + setFeatureAttribute(frame: number, name: string, value: unknown, user: null | string = null) { if (this.features[frame]) { if (user !== null) { diff --git a/server/dive_utils/models.py b/server/dive_utils/models.py index 9c18c0e27..6ecf8657e 100644 --- a/server/dive_utils/models.py +++ b/server/dive_utils/models.py @@ -29,7 +29,8 @@ class GeoJSONGeometry(BaseModel): class GeoJSONFeature(BaseModel): type: str geometry: GeoJSONGeometry - properties: Dict[str, Union[bool, float, str]] + # str first in the Union to keep numeric strings like "1" from coercing to bool/float + properties: Dict[str, Union[str, float, bool]] class GeoJSONFeatureCollection(BaseModel): diff --git a/server/dive_utils/serializers/viame.py b/server/dive_utils/serializers/viame.py index f57930bce..72fcfecf1 100644 --- a/server/dive_utils/serializers/viame.py +++ b/server/dive_utils/serializers/viame.py @@ -80,11 +80,28 @@ def _deduceType(value: Any) -> Union[bool, float, str, None]: return value -def create_geoJSONFeature(features: Dict[str, Any], type: str, coords: List[Any], key=''): +def get_next_polygon_key(features: Dict[str, Any]) -> str: + """Get the next available polygon key for a feature.""" + if "geometry" not in features or not features["geometry"]["features"]: + return '' + # Count existing polygons to determine the next key + polygon_count = sum( + 1 for f in features["geometry"]["features"] + if f["geometry"]["type"] == "Polygon" + ) + return str(polygon_count) if polygon_count > 0 else '' + + +def create_geoJSONFeature(features: Dict[str, Any], type: str, coords: List[Any], key='', auto_key=False): feature = {} if "geometry" not in features: features["geometry"] = {"type": "FeatureCollection", "features": []} - else: # check for existing type/key pairs + + # For polygons with auto_key, always create a new feature with a unique key + if type == 'Polygon' and auto_key: + key = get_next_polygon_key(features) + elif not auto_key: + # Check for existing type/key pairs (for non-polygon or explicit key) if features["geometry"]["features"]: for subfeature in features["geometry"]["features"]: if ( @@ -93,18 +110,34 @@ def create_geoJSONFeature(features: Dict[str, Any], type: str, coords: List[Any] ): feature = subfeature break + if "geometry" not in feature: feature = { "type": "Feature", "properties": {"key": key}, "geometry": {"type": type}, } + features['geometry']['features'].append(feature) if type == 'Polygon': feature["geometry"]['coordinates'] = [coords] elif type in ["LineString", "Point"]: feature['geometry']['coordinates'] = coords - features['geometry']['features'].append(feature) + return key # Return the key used (useful for auto-generated keys) + + +def add_hole_to_polygon(features: Dict[str, Any], coords: List[Any], key=''): + """Add a hole to an existing polygon feature with the given key.""" + if "geometry" not in features or not features["geometry"]["features"]: + return + for subfeature in features["geometry"]["features"]: + if ( + subfeature["geometry"]["type"] == 'Polygon' + and subfeature["properties"]["key"] == key + ): + # Add hole as additional ring to the polygon coordinates + subfeature["geometry"]["coordinates"].append(coords) + break def _parse_row(row: List[str]) -> Tuple[Dict, Dict, Dict, List]: @@ -148,12 +181,32 @@ def _parse_row(row: List[str]) -> Tuple[Dict, Dict, Dict, List]: if trk_regex: track_attributes[trk_regex[1]] = _deduceType(trk_regex[2]) - # (poly) x1 y1 x2 y2 ... - poly_regex = re.match(r"^(\(poly\)) ((?:-?[0-9]+\.*-?[0-9]*\s*)+)", row[j]) + # (poly) x1 y1 x2 y2 ... - polygon (multiple allowed, auto-keyed internally) + # (hole) x1 y1 x2 y2 ... - hole in the most recent polygon + poly_regex = re.match( + r"^\(poly\)\s*((?:-?[0-9]+\.*-?[0-9]*\s*)+)", + row[j] + ) if poly_regex: - temp = [float(x) for x in poly_regex[2].split()] - coords = list(zip(temp[::2], temp[1::2])) - create_geoJSONFeature(features, 'Polygon', coords) + temp = [float(x) for x in poly_regex.group(1).split()] + coords = [[temp[i], temp[i + 1]] for i in range(0, len(temp), 2)] + # Create new polygon with auto-generated key + create_geoJSONFeature(features, 'Polygon', coords, auto_key=True) + + # (hole) x1 y1 x2 y2 ... - hole in the most recent polygon + hole_regex = re.match( + r"^\(hole\)\s*((?:-?[0-9]+\.*-?[0-9]*\s*)+)", + row[j] + ) + if hole_regex: + temp = [float(x) for x in hole_regex.group(1).split()] + coords = [[temp[i], temp[i + 1]] for i in range(0, len(temp), 2)] + # Add hole to the most recent polygon (last one added) + if "geometry" in features and features["geometry"]["features"]: + polygons = [f for f in features["geometry"]["features"] if f["geometry"]["type"] == "Polygon"] + if polygons: + last_poly_key = polygons[-1]["properties"]["key"] + add_hole_to_polygon(features, coords, last_poly_key) if len(head_tail) == 2: create_geoJSONFeature(features, 'LineString', head_tail, 'HeadTails') @@ -558,25 +611,35 @@ def export_tracks_as_csv( if feature.geometry and "FeatureCollection" == feature.geometry.type: for geoJSONFeature in feature.geometry.features: if 'Polygon' == geoJSONFeature.geometry.type: - # Coordinates need to be flattened out from their list of tuples - coordinates = [ - item - for sublist in geoJSONFeature.geometry.coordinates[ - 0 - ] # type: ignore - for item in sublist # type: ignore - ] - columns.append( - f"(poly) {' '.join(map(lambda x: str(round(x)), coordinates))}" - ) + all_rings = geoJSONFeature.geometry.coordinates # type: ignore + + # Write outer ring (first ring) + if len(all_rings) > 0: + outer_coords = [ + item + for sublist in all_rings[0] + for item in sublist # type: ignore + ] + columns.append( + f"(poly) {' '.join(map(lambda x: str(round(x)), outer_coords))}" + ) + + # Write holes (additional rings) + for hole_ring in all_rings[1:]: + hole_coords = [ + item + for sublist in hole_ring + for item in sublist # type: ignore + ] + columns.append( + f"(hole) {' '.join(map(lambda x: str(round(x)), hole_coords))}" + ) if 'Point' == geoJSONFeature.geometry.type: coordinates = geoJSONFeature.geometry.coordinates # type: ignore columns.append( f"(kp) {geoJSONFeature.properties['key']} " f"{round(coordinates[0])} {round(coordinates[1])}" ) - # TODO: support for multiple GeoJSON Objects of the same type - # once the CSV supports it writer.writerow(columns) yield csvFile.getvalue() diff --git a/server/tests/test_serialize_viame_csv.py b/server/tests/test_serialize_viame_csv.py index 107e5c4b7..cd99a78a0 100644 --- a/server/tests/test_serialize_viame_csv.py +++ b/server/tests/test_serialize_viame_csv.py @@ -192,6 +192,129 @@ ], [], ), + # Testing multi-polygon with different keys + ( + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [["fish", 1.0]], + "features": [ + { + "frame": 0, + "bounds": [100, 100, 500, 500], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"key": ""}, + "geometry": { + "type": "Polygon", + "coordinates": [[[100, 100], [200, 100], [200, 200], [100, 200]]], + }, + }, + { + "type": "Feature", + "properties": {"key": "1"}, + "geometry": { + "type": "Polygon", + "coordinates": [[[300, 300], [400, 300], [400, 400], [300, 400]]], + }, + }, + ], + }, + }, + ], + "begin": 0, + "end": 0, + }, + }, + [ + "0,1.png,0,100,100,500,500,1.0,-1,fish,1.0,(poly) 100 100 200 100 200 200 100 200,(poly) 300 300 400 300 400 400 300 400", + "", + ], + [], + ), + # Testing polygon with hole + ( + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [["object", 1.0]], + "features": [ + { + "frame": 0, + "bounds": [100, 100, 500, 500], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"key": ""}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [[100, 100], [500, 100], [500, 500], [100, 500]], + [[200, 200], [400, 200], [400, 400], [200, 400]], + ], + }, + }, + ], + }, + }, + ], + "begin": 0, + "end": 0, + }, + }, + [ + "0,1.png,0,100,100,500,500,1.0,-1,object,1.0,(poly) 100 100 500 100 500 500 100 500,(hole) 200 200 400 200 400 400 200 400", + "", + ], + [], + ), + # Testing keyed polygon with hole + ( + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [["region", 1.0]], + "features": [ + { + "frame": 0, + "bounds": [0, 0, 1000, 1000], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {"key": "2"}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [[0, 0], [1000, 0], [1000, 1000], [0, 1000]], + [[100, 100], [200, 100], [200, 200], [100, 200]], + [[300, 300], [400, 300], [400, 400], [300, 400]], + ], + }, + }, + ], + }, + }, + ], + "begin": 0, + "end": 0, + }, + }, + [ + "0,1.png,0,0,0,1000,1000,1.0,-1,region,1.0,(poly) 0 0 1000 0 1000 1000 0 1000,(hole) 100 100 200 100 200 200 100 200,(hole) 300 300 400 300 400 400 300 400", + "", + ], + [], + ), # Testing type filter ( { diff --git a/testutils/viame.spec.json b/testutils/viame.spec.json index 040e88d2b..81eca2f3d 100644 --- a/testutils/viame.spec.json +++ b/testutils/viame.spec.json @@ -673,5 +673,202 @@ } }, {} + ], + [ + [ + "0,1.png,0,100,100,500,500,1.0,-1,fish,1.0,(poly) 100 100 200 100 200 200 100 200,(poly) 300 300 400 300 400 400 300 400", + "" + ], + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [ + [ + "fish", + 1.0 + ] + ], + "features": [ + { + "frame": 0, + "bounds": [ + 100, + 100, + 500, + 500 + ], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "key": "" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [100.0, 100.0], + [200.0, 100.0], + [200.0, 200.0], + [100.0, 200.0] + ] + ] + } + }, + { + "type": "Feature", + "properties": { + "key": "1" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [300.0, 300.0], + [400.0, 300.0], + [400.0, 400.0], + [300.0, 400.0] + ] + ] + } + } + ] + } + } + ], + "begin": 0, + "end": 0 + } + }, + {} + ], + [ + [ + "0,1.png,0,100,100,500,500,1.0,-1,object,1.0,(poly) 100 100 500 100 500 500 100 500,(hole) 200 200 400 200 400 400 200 400", + "" + ], + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [ + [ + "object", + 1.0 + ] + ], + "features": [ + { + "frame": 0, + "bounds": [ + 100, + 100, + 500, + 500 + ], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "key": "" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [100.0, 100.0], + [500.0, 100.0], + [500.0, 500.0], + [100.0, 500.0] + ], + [ + [200.0, 200.0], + [400.0, 200.0], + [400.0, 400.0], + [200.0, 400.0] + ] + ] + } + } + ] + } + } + ], + "begin": 0, + "end": 0 + } + }, + {} + ], + [ + [ + "0,1.png,0,0,0,1000,1000,1.0,-1,region,1.0,(poly) 0 0 1000 0 1000 1000 0 1000,(hole) 100 100 200 100 200 200 100 200,(hole) 300 300 400 300 400 400 300 400", + "" + ], + { + "0": { + "id": 0, + "attributes": {}, + "confidencePairs": [ + [ + "region", + 1.0 + ] + ], + "features": [ + { + "frame": 0, + "bounds": [ + 0, + 0, + 1000, + 1000 + ], + "geometry": { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "key": "" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [0.0, 0.0], + [1000.0, 0.0], + [1000.0, 1000.0], + [0.0, 1000.0] + ], + [ + [100.0, 100.0], + [200.0, 100.0], + [200.0, 200.0], + [100.0, 200.0] + ], + [ + [300.0, 300.0], + [400.0, 300.0], + [400.0, 400.0], + [300.0, 400.0] + ] + ] + } + } + ] + } + } + ], + "begin": 0, + "end": 0 + } + }, + {} ] ]