Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 221 additions & 0 deletions SPEC.md

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions Sources/PreviewsCore/Recording/ActionLog.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import Foundation

/// A single entry in the recording action timeline.
///
/// Captures a tool call that occurred during an active recording session,
/// with its timestamp relative to recording start and a flag indicating
/// whether the call caused a recompile (visible as a cut in the video).
public struct ActionLogEntry: Sendable, Codable, Equatable {
/// Milliseconds from recording start (monotonic clock).
public let tMs: Int
/// MCP tool name that was called.
public let tool: String
/// String-typed parameters for the tool call.
public let params: [String: String]
/// Whether this tool call caused a preview recompile.
public let causedRecompile: Bool

public init(tMs: Int, tool: String, params: [String: String], causedRecompile: Bool) {
self.tMs = tMs
self.tool = tool
self.params = params
self.causedRecompile = causedRecompile
}
}

/// Thread-safe action log for recording sessions.
///
/// Accumulates tool calls that occur while a recording session is active.
/// Entries are appended from the dispatcher middleware and retrieved on stop.
public actor ActionLog {

private var log: [ActionLogEntry] = []

public init() {}

/// Append a new entry to the log.
public func append(
tMs: Int, tool: String, params: [String: String], causedRecompile: Bool
) {
log.append(
ActionLogEntry(
tMs: tMs, tool: tool, params: params, causedRecompile: causedRecompile
))
}

/// Return all entries in insertion order.
public func entries() -> [ActionLogEntry] {
log
}
}
109 changes: 109 additions & 0 deletions Sources/PreviewsCore/Recording/FrameDiff.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import CoreGraphics
import Foundation

/// Computes Structural Similarity Index (SSIM) between two images.
///
/// Both images are downsampled to 128×128 grayscale before comparison.
/// Uses the canonical Wang et al. formulation with 8×8 sliding window.
///
/// Reference: Z. Wang, A.C. Bovik, H.R. Sheikh, E.P. Simoncelli,
/// "Image Quality Assessment: From Error Visibility to Structural Similarity,"
/// IEEE Transactions on Image Processing, 2004.
public enum FrameDiff {

/// Compare two images and return their SSIM (0.0 = completely different, 1.0 = identical).
public static func ssim(_ a: CGImage, _ b: CGImage) -> Double {
let size = 128
let pixelsA = downsampleToGrayscale(a, size: size)
let pixelsB = downsampleToGrayscale(b, size: size)
return computeSSIM(pixelsA, pixelsB, width: size, height: size)
}

// MARK: - Internal

/// Downsample a CGImage to `size×size` grayscale, returning pixel values as [Double] in 0...255.
static func downsampleToGrayscale(_ image: CGImage, size: Int) -> [Double] {
let bytesPerRow = size
var grayscale = [UInt8](repeating: 0, count: size * size)
guard
let context = CGContext(
data: &grayscale,
width: size,
height: size,
bitsPerComponent: 8,
bytesPerRow: bytesPerRow,
space: CGColorSpaceCreateDeviceGray(),
bitmapInfo: CGImageAlphaInfo.none.rawValue
)
else {
return [Double](repeating: 0, count: size * size)
}
context.interpolationQuality = .medium
context.draw(image, in: CGRect(x: 0, y: 0, width: size, height: size))
return grayscale.map { Double($0) }
}

/// Compute mean SSIM over 8×8 non-overlapping windows.
///
/// SSIM(x,y) = (2·μx·μy + C1)(2·σxy + C2) / ((μx² + μy² + C1)(σx² + σy² + C2))
/// where C1 = (K1·L)², C2 = (K2·L)², L = 255, K1 = 0.01, K2 = 0.03
private static func computeSSIM(
_ a: [Double], _ b: [Double],
width: Int, height: Int
) -> Double {
let windowSize = 8
let l: Double = 255.0
let k1: Double = 0.01
let k2: Double = 0.03
let c1 = (k1 * l) * (k1 * l)
let c2 = (k2 * l) * (k2 * l)

var ssimSum: Double = 0
var windowCount = 0

let stepsX = width / windowSize
let stepsY = height / windowSize

for wy in 0..<stepsY {
for wx in 0..<stepsX {
let startX = wx * windowSize
let startY = wy * windowSize
let n = Double(windowSize * windowSize)

var sumA: Double = 0
var sumB: Double = 0
var sumA2: Double = 0
var sumB2: Double = 0
var sumAB: Double = 0

for dy in 0..<windowSize {
for dx in 0..<windowSize {
let idx = (startY + dy) * width + (startX + dx)
let pa = a[idx]
let pb = b[idx]
sumA += pa
sumB += pb
sumA2 += pa * pa
sumB2 += pb * pb
sumAB += pa * pb
}
}

let muA = sumA / n
let muB = sumB / n
let sigmaA2 = sumA2 / n - muA * muA
let sigmaB2 = sumB2 / n - muB * muB
let sigmaAB = sumAB / n - muA * muB

let numerator = (2.0 * muA * muB + c1) * (2.0 * sigmaAB + c2)
let denominator = (muA * muA + muB * muB + c1) * (sigmaA2 + sigmaB2 + c2)

ssimSum += numerator / denominator
windowCount += 1
}
}

guard windowCount > 0 else { return 0 }
return ssimSum / Double(windowCount)
}
}
159 changes: 159 additions & 0 deletions Sources/PreviewsCore/Recording/KeyframeSelector.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import Foundation

/// Selects keyframes from a sequence of per-frame SSIM diffs using
/// ffmpeg-style scene-detect semantics: pairwise threshold-gated emission
/// with a minimum gap between frames and forced endpoints.
public enum KeyframeSelector {

/// Result of keyframe selection.
public struct KeyframeSelection: Sendable {
/// Index of the first frame where motion was detected, or nil if no motion.
public let motionStartFrame: Int?
/// Index of the last frame of the motion window, or nil if no motion.
public let motionEndFrame: Int?
/// Sorted indices of selected keyframes within the diff array.
public let selectedIndices: [Int]
}

/// Select keyframes from a per-frame diff array.
///
/// - Parameters:
/// - diffs: Per-frame SSIM difference from the previous frame (1.0 - SSIM).
/// Index 0 is the diff between frame 0 and frame 1.
/// - frameCount: Target number of keyframes to return.
/// - minGapMs: Minimum gap between selected frames in milliseconds.
/// - fps: Capture frame rate (used to convert minGapMs to frame count).
/// - motionThreshold: Diff value above which a frame is considered "in motion".
/// - stillThreshold: Diff value below which a frame is considered "settled".
/// - Returns: A `KeyframeSelection` with the motion window and selected indices.
public static func select(
diffs: [Double],
frameCount: Int,
minGapMs: Int,
fps: Int,
motionThreshold: Double,
stillThreshold: Double
) -> KeyframeSelection {
guard !diffs.isEmpty else {
return KeyframeSelection(
motionStartFrame: nil, motionEndFrame: nil, selectedIndices: [])
}

// 1. Find motion window
guard let motionStart = findMotionStart(diffs: diffs, threshold: motionThreshold) else {
return KeyframeSelection(
motionStartFrame: nil, motionEndFrame: nil, selectedIndices: [])
}

let motionEnd = findMotionEnd(
diffs: diffs, startFrame: motionStart,
stillThreshold: stillThreshold, settleFrames: settleFrameCount(fps: fps)
)

// 2. Select keyframes within the motion window
let minGapFrames = max(1, Int(ceil(Double(minGapMs) / 1000.0 * Double(fps))))

let selected = selectKeyframes(
diffs: diffs,
start: motionStart,
end: motionEnd,
frameCount: frameCount,
minGapFrames: minGapFrames,
motionThreshold: motionThreshold
)

return KeyframeSelection(
motionStartFrame: motionStart,
motionEndFrame: motionEnd,
selectedIndices: selected
)
}

// MARK: - Motion detection

/// Find the first frame where diff exceeds the motion threshold.
private static func findMotionStart(diffs: [Double], threshold: Double) -> Int? {
diffs.firstIndex { $0 > threshold }
}

/// Find the end of the motion window: the first frame after `startFrame`
/// where diff drops below `stillThreshold` for `settleFrames` consecutive frames.
/// Falls back to the last diff index if motion never settles.
private static func findMotionEnd(
diffs: [Double], startFrame: Int,
stillThreshold: Double, settleFrames: Int
) -> Int {
var consecutiveStill = 0
for i in (startFrame + 1)..<diffs.count {
if diffs[i] < stillThreshold {
consecutiveStill += 1
if consecutiveStill >= settleFrames {
return i - settleFrames + 1
}
} else {
consecutiveStill = 0
}
}
return diffs.count - 1
}

/// Number of consecutive "still" frames needed to declare settle (~100ms).
private static func settleFrameCount(fps: Int) -> Int {
max(1, Int(ceil(Double(fps) * 0.1)))
}

// MARK: - Keyframe selection

/// Select up to `frameCount` keyframes from the motion window,
/// respecting `minGapFrames` between each pair.
///
/// Strategy:
/// 1. Collect all frames that cross `motionThreshold` (candidates).
/// 2. Force-include first and last frames of the motion window.
/// 3. Greedily pick candidates by descending diff, skipping any that
/// violate the min-gap constraint against already-selected frames.
/// 4. If we have fewer than `frameCount`, fill with the highest-diff
/// non-candidate frames that respect the gap constraint.
private static func selectKeyframes(
diffs: [Double],
start: Int,
end: Int,
frameCount: Int,
minGapFrames: Int,
motionThreshold: Double
) -> [Int] {
guard start <= end else { return [start] }

var selected = Set<Int>()

// Force endpoints
selected.insert(start)
selected.insert(end)

// All frames in the window, sorted by diff (descending)
let windowFrames = (start...end).map { ($0, diffs[$0]) }
let sortedByDiff = windowFrames.sorted { $0.1 > $1.1 }

// Greedily pick frames by highest diff, respecting min-gap
for (idx, diff) in sortedByDiff {
if selected.count >= frameCount { break }
if diff <= 0 { continue }
if selected.contains(idx) { continue }
if respectsGap(idx, against: selected, minGap: minGapFrames) {
selected.insert(idx)
}
}

return selected.sorted()
}

/// Check if `candidate` is at least `minGap` frames away from all frames in `selected`.
private static func respectsGap(
_ candidate: Int, against selected: Set<Int>, minGap: Int
) -> Bool {
for s in selected {
if abs(candidate - s) < minGap { return false }
}
return true
}
}
Loading
Loading