diff --git a/js/hang/src/catalog/audio.ts b/js/hang/src/catalog/audio.ts index e96f6d161..e57710bf2 100644 --- a/js/hang/src/catalog/audio.ts +++ b/js/hang/src/catalog/audio.ts @@ -1,4 +1,5 @@ import { z } from "zod"; +import { ContainerSchema, DEFAULT_CONTAINER } from "./container"; import { u53Schema } from "./integers"; // Backwards compatibility: old track schema @@ -13,6 +14,10 @@ export const AudioConfigSchema = z.object({ // See: https://w3c.github.io/webcodecs/codec_registry.html codec: z.string(), + // Container format for timestamp encoding + // Defaults to "legacy" when not specified in catalog (backward compatibility) + container: ContainerSchema.default(DEFAULT_CONTAINER), + // The description is used for some codecs. // If provided, we can initialize the decoder based on the catalog alone. // Otherwise, the initialization information is in-band. diff --git a/js/hang/src/catalog/container.ts b/js/hang/src/catalog/container.ts new file mode 100644 index 000000000..05b0e81db --- /dev/null +++ b/js/hang/src/catalog/container.ts @@ -0,0 +1,18 @@ +import { z } from "zod"; + +/** + * Container format for frame timestamp encoding. + * + * - "legacy": Uses QUIC VarInt encoding (1-8 bytes, variable length) + * - "raw": Uses fixed u64 encoding (8 bytes, big-endian) + * - "fmp4": Fragmented MP4 container (future) + */ +export const ContainerSchema = z.enum(["legacy", "raw", "fmp4"]); + +export type Container = z.infer; + +/** + * Default container format when not specified. + * Set to legacy for backward compatibility. + */ +export const DEFAULT_CONTAINER: Container = "legacy"; diff --git a/js/hang/src/catalog/index.ts b/js/hang/src/catalog/index.ts index a7f78e111..cfe27be11 100644 --- a/js/hang/src/catalog/index.ts +++ b/js/hang/src/catalog/index.ts @@ -1,6 +1,8 @@ export * from "./audio"; export * from "./capabilities"; export * from "./chat"; +export * from "./container"; +export { DEFAULT_CONTAINER } from "./container"; export * from "./integers"; export * from "./location"; export * from "./preview"; diff --git a/js/hang/src/catalog/video.ts b/js/hang/src/catalog/video.ts index 78b353964..b8b77883f 100644 --- a/js/hang/src/catalog/video.ts +++ b/js/hang/src/catalog/video.ts @@ -1,5 +1,5 @@ import { z } from "zod"; - +import { ContainerSchema, DEFAULT_CONTAINER } from "./container"; import { u53Schema } from "./integers"; // Backwards compatibility: old track schema @@ -13,6 +13,10 @@ export const VideoConfigSchema = z.object({ // See: https://w3c.github.io/webcodecs/codec_registry.html codec: z.string(), + // Container format for timestamp encoding + // Defaults to "legacy" when not specified in catalog (backward compatibility) + container: ContainerSchema.default(DEFAULT_CONTAINER), + // The description is used for some codecs. // If provided, we can initialize the decoder based on the catalog alone. // Otherwise, the initialization information is (repeated) before each key-frame. diff --git a/js/hang/src/container/codec.ts b/js/hang/src/container/codec.ts new file mode 100644 index 000000000..57a1119cc --- /dev/null +++ b/js/hang/src/container/codec.ts @@ -0,0 +1,155 @@ +import type * as Catalog from "../catalog"; +import { DEFAULT_CONTAINER } from "../catalog"; +import type * as Time from "../time"; + +/** + * Encodes a timestamp according to the specified container format. + * + * @param timestamp - The timestamp in microseconds + * @param container - The container format to use + * @returns The encoded timestamp as a Uint8Array + */ +export function encodeTimestamp(timestamp: Time.Micro, container: Catalog.Container = DEFAULT_CONTAINER): Uint8Array { + switch (container) { + case "legacy": + return encodeVarInt(timestamp); + case "raw": + return encodeU64(timestamp); + case "fmp4": + throw new Error("fmp4 container not yet implemented"); + } +} + +/** + * Decodes a timestamp from a buffer according to the specified container format. + * + * @param buffer - The buffer containing the encoded timestamp + * @param container - The container format to use + * @returns [timestamp in microseconds, remaining buffer after timestamp] + */ +export function decodeTimestamp( + buffer: Uint8Array, + container: Catalog.Container = DEFAULT_CONTAINER, +): [Time.Micro, Uint8Array] { + switch (container) { + case "legacy": { + const [value, remaining] = decodeVarInt(buffer); + return [value as Time.Micro, remaining]; + } + case "raw": { + const [value, remaining] = decodeU64(buffer); + return [value as Time.Micro, remaining]; + } + case "fmp4": + throw new Error("fmp4 container not yet implemented"); + } +} + +/** + * Gets the size in bytes of an encoded timestamp for the given container format. + * For variable-length formats, returns the maximum size. + * + * @param container - The container format + * @returns Size in bytes + */ +export function getTimestampSize(container: Catalog.Container = DEFAULT_CONTAINER): number { + switch (container) { + case "legacy": + return 8; // VarInt maximum size + case "raw": + return 8; // u64 fixed size + case "fmp4": + throw new Error("fmp4 container not yet implemented"); + } +} + +// ============================================================================ +// LEGACY VARINT IMPLEMENTATION +// ============================================================================ + +const MAX_U6 = 2 ** 6 - 1; +const MAX_U14 = 2 ** 14 - 1; +const MAX_U30 = 2 ** 30 - 1; +const MAX_U53 = Number.MAX_SAFE_INTEGER; + +function decodeVarInt(buf: Uint8Array): [number, Uint8Array] { + const size = 1 << ((buf[0] & 0xc0) >> 6); + + const view = new DataView(buf.buffer, buf.byteOffset, size); + const remain = new Uint8Array(buf.buffer, buf.byteOffset + size, buf.byteLength - size); + let v: number; + + if (size === 1) { + v = buf[0] & 0x3f; + } else if (size === 2) { + v = view.getUint16(0) & 0x3fff; + } else if (size === 4) { + v = view.getUint32(0) & 0x3fffffff; + } else if (size === 8) { + // NOTE: Precision loss above 2^52 + v = Number(view.getBigUint64(0) & 0x3fffffffffffffffn); + } else { + throw new Error("impossible"); + } + + return [v, remain]; +} + +function encodeVarInt(v: number): Uint8Array { + const dst = new Uint8Array(8); + + if (v <= MAX_U6) { + dst[0] = v; + return new Uint8Array(dst.buffer, dst.byteOffset, 1); + } + + if (v <= MAX_U14) { + const view = new DataView(dst.buffer, dst.byteOffset, 2); + view.setUint16(0, v | 0x4000); + return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + } + + if (v <= MAX_U30) { + const view = new DataView(dst.buffer, dst.byteOffset, 4); + view.setUint32(0, v | 0x80000000); + return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + } + + if (v <= MAX_U53) { + const view = new DataView(dst.buffer, dst.byteOffset, 8); + view.setBigUint64(0, BigInt(v) | 0xc000000000000000n); + return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); + } + + throw new Error(`overflow, value larger than 53-bits: ${v}`); +} + +// ============================================================================ +// RAW U64 IMPLEMENTATION +// ============================================================================ + +/** + * Decodes a fixed 8-byte big-endian unsigned 64-bit integer. + */ +function decodeU64(buf: Uint8Array): [number, Uint8Array] { + if (buf.byteLength < 8) { + throw new Error("Buffer too short for u64 decode"); + } + + const view = new DataView(buf.buffer, buf.byteOffset, 8); + const value = Number(view.getBigUint64(0)); + const remain = new Uint8Array(buf.buffer, buf.byteOffset + 8, buf.byteLength - 8); + + return [value, remain]; +} + +/** + * Encodes a number as a fixed 8-byte big-endian unsigned 64-bit integer. + * Much simpler than VarInt! + */ +function encodeU64(v: number): Uint8Array { + const dst = new Uint8Array(8); + const view = new DataView(dst.buffer, dst.byteOffset, 8); + view.setBigUint64(0, BigInt(v)); + return dst; +} diff --git a/js/hang/src/container/index.ts b/js/hang/src/container/index.ts new file mode 100644 index 000000000..2e87cf323 --- /dev/null +++ b/js/hang/src/container/index.ts @@ -0,0 +1 @@ +export * from "./codec"; diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index 23ba18c14..a8209fb37 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -1,5 +1,7 @@ import type * as Moq from "@moq/lite"; import { Effect, Signal } from "@moq/signals"; +import type * as Catalog from "./catalog"; +import * as Container from "./container"; import * as Time from "./time"; export interface Source { @@ -14,33 +16,42 @@ export interface Frame { group: number; } -export function encode(source: Uint8Array | Source, timestamp: Time.Micro): Uint8Array { - // TODO switch over to u64 for simplicity. The varint uses 8 bytes anyway after 18 minutes lul. - // TODO Don't encode into one buffer. Write the header/payload separately to avoid reallocating. - const data = new Uint8Array(8 + (source instanceof Uint8Array ? source.byteLength : source.byteLength)); +export function encode(source: Uint8Array | Source, timestamp: Time.Micro, container?: Catalog.Container): Uint8Array { + // Encode timestamp using the specified container format + const timestampBytes = Container.encodeTimestamp(timestamp, container); - const size = setVint53(data, timestamp).byteLength; + // Allocate buffer for timestamp + payload + const payloadSize = source instanceof Uint8Array ? source.byteLength : source.byteLength; + const data = new Uint8Array(timestampBytes.byteLength + payloadSize); + + // Write timestamp header + data.set(timestampBytes, 0); + + // Write payload if (source instanceof Uint8Array) { - data.set(source, size); + data.set(source, timestampBytes.byteLength); } else { - source.copyTo(data.subarray(size)); + source.copyTo(data.subarray(timestampBytes.byteLength)); } - return data.subarray(0, (source instanceof Uint8Array ? source.byteLength : source.byteLength) + size); + + return data; } // NOTE: A keyframe is always the first frame in a group, so it's not encoded on the wire. -export function decode(buffer: Uint8Array): { data: Uint8Array; timestamp: Time.Micro } { - const [us, data] = getVint53(buffer); - const timestamp = us as Time.Micro; - return { timestamp, data }; +export function decode(buffer: Uint8Array, container?: Catalog.Container): { data: Uint8Array; timestamp: Time.Micro } { + // Decode timestamp using the specified container format + const [timestamp, data] = Container.decodeTimestamp(buffer, container); + return { timestamp: timestamp as Time.Micro, data }; } export class Producer { #track: Moq.Track; #group?: Moq.Group; + #container?: Catalog.Container; - constructor(track: Moq.Track) { + constructor(track: Moq.Track, container?: Catalog.Container) { this.#track = track; + this.#container = container; } encode(data: Uint8Array | Source, timestamp: Time.Micro, keyframe: boolean) { @@ -51,7 +62,7 @@ export class Producer { throw new Error("must start with a keyframe"); } - this.#group?.writeFrame(encode(data, timestamp)); + this.#group?.writeFrame(encode(data, timestamp, this.#container)); } close() { @@ -63,6 +74,7 @@ export class Producer { export interface ConsumerProps { // Target latency in milliseconds (default: 0) latency?: Signal | Time.Milli; + container?: Catalog.Container; } interface Group { @@ -74,6 +86,7 @@ interface Group { export class Consumer { #track: Moq.Track; #latency: Signal; + #container?: Catalog.Container; #groups: Group[] = []; #active?: number; // the active group sequence number @@ -85,6 +98,7 @@ export class Consumer { constructor(track: Moq.Track, props?: ConsumerProps) { this.#track = track; this.#latency = Signal.from(props?.latency ?? Time.Milli.zero); + this.#container = props?.container; this.#signals.spawn(this.#run.bind(this)); this.#signals.cleanup(() => { @@ -138,7 +152,7 @@ export class Consumer { const next = await group.consumer.readFrame(); if (!next) break; - const { data, timestamp } = decode(next); + const { data, timestamp } = decode(next, this.#container); const frame = { data, timestamp, @@ -268,60 +282,3 @@ export class Consumer { this.#groups.length = 0; } } - -const MAX_U6 = 2 ** 6 - 1; -const MAX_U14 = 2 ** 14 - 1; -const MAX_U30 = 2 ** 30 - 1; -const MAX_U53 = Number.MAX_SAFE_INTEGER; -//const MAX_U62: bigint = 2n ** 62n - 1n; - -// QUIC VarInt -function getVint53(buf: Uint8Array): [number, Uint8Array] { - const size = 1 << ((buf[0] & 0xc0) >> 6); - - const view = new DataView(buf.buffer, buf.byteOffset, size); - const remain = new Uint8Array(buf.buffer, buf.byteOffset + size, buf.byteLength - size); - let v: number; - - if (size === 1) { - v = buf[0] & 0x3f; - } else if (size === 2) { - v = view.getUint16(0) & 0x3fff; - } else if (size === 4) { - v = view.getUint32(0) & 0x3fffffff; - } else if (size === 8) { - // NOTE: Precision loss above 2^52 - v = Number(view.getBigUint64(0) & 0x3fffffffffffffffn); - } else { - throw new Error("impossible"); - } - - return [v, remain]; -} - -function setVint53(dst: Uint8Array, v: number): Uint8Array { - if (v <= MAX_U6) { - dst[0] = v; - return new Uint8Array(dst.buffer, dst.byteOffset, 1); - } - - if (v <= MAX_U14) { - const view = new DataView(dst.buffer, dst.byteOffset, 2); - view.setUint16(0, v | 0x4000); - return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); - } - - if (v <= MAX_U30) { - const view = new DataView(dst.buffer, dst.byteOffset, 4); - view.setUint32(0, v | 0x80000000); - return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); - } - - if (v <= MAX_U53) { - const view = new DataView(dst.buffer, dst.byteOffset, 8); - view.setBigUint64(0, BigInt(v) | 0xc000000000000000n); - return new Uint8Array(view.buffer, view.byteOffset, view.byteLength); - } - - throw new Error(`overflow, value larger than 53-bits: ${v}`); -} diff --git a/js/hang/src/publish/audio/encoder.ts b/js/hang/src/publish/audio/encoder.ts index 8049038db..ec84a754e 100644 --- a/js/hang/src/publish/audio/encoder.ts +++ b/js/hang/src/publish/audio/encoder.ts @@ -1,6 +1,7 @@ import type * as Moq from "@moq/lite"; import { Effect, type Getter, Signal } from "@moq/signals"; import type * as Catalog from "../../catalog"; +import { DEFAULT_CONTAINER } from "../../catalog"; import { u53 } from "../../catalog/integers"; import * as Frame from "../../frame"; import * as Time from "../../time"; @@ -26,6 +27,8 @@ export type EncoderProps = { // The size of each group. Larger groups mean fewer drops but the viewer can fall further behind. // NOTE: Each frame is always flushed to the network immediately. maxLatency?: Time.Milli; + + container?: Catalog.Container; }; export class Encoder { @@ -37,6 +40,7 @@ export class Encoder { muted: Signal; volume: Signal; maxLatency: Time.Milli; + #container: Catalog.Container; source: Signal; @@ -61,6 +65,7 @@ export class Encoder { this.muted = Signal.from(props?.muted ?? false); this.volume = Signal.from(props?.volume ?? 1); this.maxLatency = props?.maxLatency ?? (100 as Time.Milli); // Default is a group every 100ms + this.#container = props?.container ?? DEFAULT_CONTAINER; this.#signals.effect(this.#runSource.bind(this)); this.#signals.effect(this.#runConfig.bind(this)); @@ -127,6 +132,7 @@ export class Encoder { sampleRate: u53(worklet.context.sampleRate), numberOfChannels: u53(worklet.channelCount), bitrate: u53(worklet.channelCount * 32_000), + container: this.#container, }; effect.set(this.#config, config); @@ -170,6 +176,8 @@ export class Encoder { // We're using an async polyfill temporarily for Safari support. await libav.polyfill(); + console.log(`[Audio Publisher] Using container format: ${this.#container}`); + const encoder = new AudioEncoder({ output: (frame) => { if (frame.type !== "key") { @@ -184,7 +192,7 @@ export class Encoder { groupTimestamp = frame.timestamp as Time.Micro; } - const buffer = Frame.encode(frame, frame.timestamp as Time.Micro); + const buffer = Frame.encode(frame, frame.timestamp as Time.Micro, this.#container); group.writeFrame(buffer); }, error: (err) => { diff --git a/js/hang/src/publish/element.ts b/js/hang/src/publish/element.ts index cc209cd90..6ea066ca8 100644 --- a/js/hang/src/publish/element.ts +++ b/js/hang/src/publish/element.ts @@ -76,10 +76,12 @@ export default class HangPublish extends HTMLElement { audio: { enabled: this.#audioEnabled, + container: "raw", }, video: { hd: { enabled: this.#videoEnabled, + container: "raw", }, }, }); diff --git a/js/hang/src/publish/video/encoder.ts b/js/hang/src/publish/video/encoder.ts index 941029121..309657de5 100644 --- a/js/hang/src/publish/video/encoder.ts +++ b/js/hang/src/publish/video/encoder.ts @@ -1,7 +1,7 @@ import type * as Moq from "@moq/lite"; import { Effect, type Getter, Signal } from "@moq/signals"; import type * as Catalog from "../../catalog"; -import { u53 } from "../../catalog"; +import { DEFAULT_CONTAINER, u53 } from "../../catalog"; import * as Frame from "../../frame"; import * as Time from "../../time"; import { isFirefox } from "../../util/hacks"; @@ -10,6 +10,7 @@ import type { Source } from "./types"; export interface EncoderProps { enabled?: boolean | Signal; config?: EncoderConfig | Signal; + container?: Catalog.Container; } // TODO support signals? @@ -39,6 +40,7 @@ export class Encoder { enabled: Signal; source: Signal; frame: Getter; + #container: Catalog.Container; #catalog = new Signal(undefined); readonly catalog: Getter = this.#catalog; @@ -62,6 +64,7 @@ export class Encoder { this.source = source; this.enabled = Signal.from(props?.enabled ?? false); this.config = Signal.from(props?.config); + this.#container = props?.container ?? DEFAULT_CONTAINER; this.#signals.effect(this.#runCatalog.bind(this)); this.#signals.effect(this.#runConfig.bind(this)); @@ -75,6 +78,8 @@ export class Encoder { effect.set(this.active, true, false); effect.spawn(async () => { + console.log(`[Video Publisher] Using container format: ${this.#container}`); + let group: Moq.Group | undefined; effect.cleanup(() => group?.close()); @@ -90,7 +95,7 @@ export class Encoder { throw new Error("no keyframe"); } - const buffer = Frame.encode(frame, frame.timestamp as Time.Micro); + const buffer = Frame.encode(frame, frame.timestamp as Time.Micro, this.#container); group?.writeFrame(buffer); }, error: (err: Error) => { @@ -143,6 +148,7 @@ export class Encoder { codedWidth: u53(config.width), codedHeight: u53(config.height), optimizeForLatency: true, + container: this.#container, }; effect.set(this.#catalog, catalog); diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index 0b18131a1..ca542b780 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -167,8 +167,11 @@ export class Source { effect.cleanup(() => sub.close()); // Create consumer with slightly less latency than the render worklet to avoid underflowing. + // Container defaults to "legacy" via Zod schema for backward compatibility + console.log(`[Audio Subscriber] Using container format: ${config.container}`); const consumer = new Frame.Consumer(sub, { latency: Math.max(this.latency.peek() - JITTER_UNDERHEAD, 0) as Time.Milli, + container: config.container, }); effect.cleanup(() => consumer.close()); diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index 43833cac5..a7fd2c923 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -196,8 +196,11 @@ export class Source { effect.cleanup(() => sub.close()); // Create consumer that reorders groups/frames up to the provided latency. + // Container defaults to "legacy" via Zod schema for backward compatibility + console.log(`[Video Subscriber] Using container format: ${config.container}`); const consumer = new Frame.Consumer(sub, { latency: this.latency, + container: config.container, }); effect.cleanup(() => consumer.close());