diff --git a/encoding/_common64.ts b/encoding/_common64.ts index e0a13c3b7bf2..1982a8e8d91f 100644 --- a/encoding/_common64.ts +++ b/encoding/_common64.ts @@ -3,11 +3,12 @@ import type { Uint8Array_ } from "./_types.ts"; export type { Uint8Array_ }; +const encoder = new TextEncoder(); export const padding = "=".charCodeAt(0); export const alphabet: Record = { - base64: new TextEncoder() + base64: encoder .encode("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"), - base64url: new TextEncoder() + base64url: encoder .encode("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"), }; export const rAlphabet: Record = { @@ -19,6 +20,9 @@ alphabet.base64 alphabet.base64url .forEach((byte, i) => rAlphabet.base64url[byte] = i); +const WHITE_SPACE = new Uint8Array(256); +for (const byte of encoder.encode("\t\n\f\r ")) WHITE_SPACE[byte] = 1; + /** * Options for encoding and decoding base64 strings. */ @@ -87,6 +91,28 @@ export function encode( return o; } +export function removeWhiteSpace(buffer: Uint8Array_) { + const length = buffer.length; + + const indices: number[] = []; + + for (let i = 0; i < length; ++i) { + if (WHITE_SPACE[buffer[i]!]) indices.push(i); + } + + for (let i = 0; i < indices.length; ++i) { + const index = indices[i]!; + const start = index + 1; + const end = indices[i + 1] ?? length; + + buffer.set(buffer.subarray(start, end), index - i); + } + + return buffer.subarray(0, length - indices.length); +} + +class RetriableError extends Error {} + export function decode( buffer: Uint8Array_, i: number, @@ -94,10 +120,46 @@ export function decode( alphabet: Uint8Array, padding: number, ): number { + try { + return decodeChunk(buffer, i, o, alphabet, padding, true); + } catch (e) { + if (!(e instanceof RetriableError)) throw e; + buffer = removeWhiteSpace(buffer); + return decodeChunk(buffer, i, o, alphabet, padding, false); + } +} + +export function decodeChunk( + buffer: Uint8Array_, + i: number, + o: number, + alphabet: Uint8Array, + padding: number, + retryWs: boolean, +) { + if (retryWs && buffer.length > 0 && (WHITE_SPACE[buffer.at(-1)!])) { + throw new RetriableError(); + } + + const getHextet = (i: number): number => { + const char = buffer[i]!; + const hextet = alphabet[char] ?? 64; + // alphabet.Base64.length + if (hextet !== 64) return hextet; + + if (retryWs && WHITE_SPACE[char]) throw new RetriableError(); + throw new TypeError( + `Cannot decode input as base64: Invalid character (${ + String.fromCharCode(char) + })`, + ); + }; + for (let x = buffer.length - 2; x < buffer.length; ++x) { if (buffer[x] === padding) { for (let y = x + 1; y < buffer.length; ++y) { if (buffer[y] !== padding) { + if (retryWs && WHITE_SPACE[buffer[y]!]) throw new RetriableError(); throw new TypeError( `Cannot decode input as base64: Invalid character (${ String.fromCharCode(buffer[y]!) @@ -110,6 +172,7 @@ export function decode( } } if ((buffer.length - o) % 4 === 1) { + if (retryWs) throw new RetriableError(); throw new RangeError( `Cannot decode input as base64: Length (${ buffer.length - o @@ -119,25 +182,25 @@ export function decode( i += 3; for (; i < buffer.length; i += 4) { - const x = (getByte(buffer[i - 3]!, alphabet) << 18) | - (getByte(buffer[i - 2]!, alphabet) << 12) | - (getByte(buffer[i - 1]!, alphabet) << 6) | - getByte(buffer[i]!, alphabet); + const x = (getHextet(i - 3) << 18) | + (getHextet(i - 2) << 12) | + (getHextet(i - 1) << 6) | + getHextet(i); buffer[o++] = x >> 16; buffer[o++] = x >> 8 & 0xFF; buffer[o++] = x & 0xFF; } switch (i) { case buffer.length + 1: { - const x = (getByte(buffer[i - 3]!, alphabet) << 18) | - (getByte(buffer[i - 2]!, alphabet) << 12); + const x = (getHextet(i - 3) << 18) | + (getHextet(i - 2) << 12); buffer[o++] = x >> 16; break; } case buffer.length: { - const x = (getByte(buffer[i - 3]!, alphabet) << 18) | - (getByte(buffer[i - 2]!, alphabet) << 12) | - (getByte(buffer[i - 1]!, alphabet) << 6); + const x = (getHextet(i - 3) << 18) | + (getHextet(i - 2) << 12) | + (getHextet(i - 1) << 6); buffer[o++] = x >> 16; buffer[o++] = x >> 8 & 0xFF; break; @@ -145,15 +208,3 @@ export function decode( } return o; } - -function getByte(char: number, alphabet: Uint8Array): number { - const byte = alphabet[char] ?? 64; - if (byte === 64) { // alphabet.Base64.length - throw new TypeError( - `Cannot decode input as base64: Invalid character (${ - String.fromCharCode(char) - })`, - ); - } - return byte; -} diff --git a/encoding/base64_test.ts b/encoding/base64_test.ts index 26581c508e17..6eeaa442b18e 100644 --- a/encoding/base64_test.ts +++ b/encoding/base64_test.ts @@ -43,3 +43,12 @@ Deno.test("decodeBase64() decodes binary", () => { assertEquals(outputBinary, input); } }); + +Deno.test("decodeBase64() ignores white space", () => { + const ws = "\t\n\f\r "; + for (const [input, output] of testsetBinary) { + const spaced = ["", ...output, ""].join(ws); + const outputBinary = decodeBase64(spaced); + assertEquals(outputBinary, input); + } +}); diff --git a/encoding/unstable_base64_stream.ts b/encoding/unstable_base64_stream.ts index 8163ccead19a..1e21b968c602 100644 --- a/encoding/unstable_base64_stream.ts +++ b/encoding/unstable_base64_stream.ts @@ -31,10 +31,11 @@ import { alphabet, type Base64Alphabet, calcSizeBase64, - decode, + decodeChunk, encode, padding, rAlphabet, + removeWhiteSpace, } from "./_common64.ts"; import { detach } from "./_common_detach.ts"; @@ -171,30 +172,32 @@ export class Base64DecoderStream let remainder = 0; super({ transform(chunk, controller) { - let output = encode(chunk); + let output = removeWhiteSpace(encode(chunk)); if (remainder) { output = detach(output, remainder + output.length)[0]; output.set(push.subarray(0, remainder)); } remainder = output.length % 4; if (remainder) push.set(output.subarray(-remainder)); - const o = decode( + const o = decodeChunk( output.subarray(0, -remainder || undefined), 0, 0, abc, padding, + false, ); controller.enqueue(output.subarray(0, o)); }, flush(controller) { if (remainder) { - const o = decode( + const o = decodeChunk( push.subarray(0, remainder), 0, 0, abc, padding, + false, ); controller.enqueue(push.subarray(0, o)); } diff --git a/encoding/unstable_base64_stream_test.ts b/encoding/unstable_base64_stream_test.ts index a406e23fc23a..63928662d465 100644 --- a/encoding/unstable_base64_stream_test.ts +++ b/encoding/unstable_base64_stream_test.ts @@ -76,3 +76,16 @@ Deno.test("Base64DecoderStream() with raw format", async () => { ); } }); + +Deno.test("Base64DecoderStream() allows white space", async () => { + const text = await Deno.readTextFile("./deno.lock"); + + const encoded = encodeBase64(text).replaceAll(/.{76}/g, `$&\r\n`); + + const stream = new Blob([encoded]).stream() + .pipeThrough(new FixedChunkStream(1021)) + .pipeThrough(new TextDecoderStream()) + .pipeThrough(new Base64DecoderStream()); + + assertEquals(await toText(stream), text); +});