Skip to content

feat(encoding): skip whitespace in base64 input #6771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 74 additions & 23 deletions encoding/_common64.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
import type { Uint8Array_ } from "./_types.ts";
export type { Uint8Array_ };

const encoder = new TextEncoder();
export const padding = "=".charCodeAt(0);
export const alphabet: Record<Base64Alphabet, Uint8Array> = {
base64: new TextEncoder()
base64: encoder
.encode("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"),
base64url: new TextEncoder()
base64url: encoder
.encode("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"),
};
export const rAlphabet: Record<Base64Alphabet, Uint8Array> = {
Expand All @@ -19,6 +20,9 @@ alphabet.base64
alphabet.base64url
.forEach((byte, i) => rAlphabet.base64url[byte] = i);

const WHITE_SPACE = new Uint8Array(256);
for (const byte of encoder.encode("\t\n\f\r ")) WHITE_SPACE[byte] = 1;

/**
* Options for encoding and decoding base64 strings.
*/
Expand Down Expand Up @@ -87,17 +91,75 @@ export function encode(
return o;
}

export function removeWhiteSpace(buffer: Uint8Array_) {
const length = buffer.length;

const indices: number[] = [];

for (let i = 0; i < length; ++i) {
if (WHITE_SPACE[buffer[i]!]) indices.push(i);
}

for (let i = 0; i < indices.length; ++i) {
const index = indices[i]!;
const start = index + 1;
const end = indices[i + 1] ?? length;

buffer.set(buffer.subarray(start, end), index - i);
}

return buffer.subarray(0, length - indices.length);
}

class RetriableError extends Error {}

export function decode(
buffer: Uint8Array_,
i: number,
o: number,
alphabet: Uint8Array,
padding: number,
): number {
try {
return decodeChunk(buffer, i, o, alphabet, padding, true);
} catch (e) {
if (!(e instanceof RetriableError)) throw e;
buffer = removeWhiteSpace(buffer);
return decodeChunk(buffer, i, o, alphabet, padding, false);
}
}

export function decodeChunk(
buffer: Uint8Array_,
i: number,
o: number,
alphabet: Uint8Array,
padding: number,
retryWs: boolean,
) {
Comment on lines +132 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function should have an option object instead of so many args for more clarity.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't agree. Every value is needed and the function isn't exposed to the user.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but it makes it hard to understand what they are for if one is not already familiar with the code.
Alternatively adding tsdoc might also be a solution.

if (retryWs && buffer.length > 0 && (WHITE_SPACE[buffer.at(-1)!])) {
throw new RetriableError();
}

const getHextet = (i: number): number => {
const char = buffer[i]!;
const hextet = alphabet[char] ?? 64;
// alphabet.Base64.length
if (hextet !== 64) return hextet;

if (retryWs && WHITE_SPACE[char]) throw new RetriableError();
throw new TypeError(
`Cannot decode input as base64: Invalid character (${
String.fromCharCode(char)
})`,
);
};

for (let x = buffer.length - 2; x < buffer.length; ++x) {
if (buffer[x] === padding) {
for (let y = x + 1; y < buffer.length; ++y) {
if (buffer[y] !== padding) {
if (retryWs && WHITE_SPACE[buffer[y]!]) throw new RetriableError();
throw new TypeError(
`Cannot decode input as base64: Invalid character (${
String.fromCharCode(buffer[y]!)
Expand All @@ -110,6 +172,7 @@ export function decode(
}
}
if ((buffer.length - o) % 4 === 1) {
if (retryWs) throw new RetriableError();
throw new RangeError(
`Cannot decode input as base64: Length (${
buffer.length - o
Expand All @@ -119,41 +182,29 @@ export function decode(

i += 3;
for (; i < buffer.length; i += 4) {
const x = (getByte(buffer[i - 3]!, alphabet) << 18) |
(getByte(buffer[i - 2]!, alphabet) << 12) |
(getByte(buffer[i - 1]!, alphabet) << 6) |
getByte(buffer[i]!, alphabet);
const x = (getHextet(i - 3) << 18) |
(getHextet(i - 2) << 12) |
(getHextet(i - 1) << 6) |
getHextet(i);
buffer[o++] = x >> 16;
buffer[o++] = x >> 8 & 0xFF;
buffer[o++] = x & 0xFF;
}
switch (i) {
case buffer.length + 1: {
const x = (getByte(buffer[i - 3]!, alphabet) << 18) |
(getByte(buffer[i - 2]!, alphabet) << 12);
const x = (getHextet(i - 3) << 18) |
(getHextet(i - 2) << 12);
buffer[o++] = x >> 16;
break;
}
case buffer.length: {
const x = (getByte(buffer[i - 3]!, alphabet) << 18) |
(getByte(buffer[i - 2]!, alphabet) << 12) |
(getByte(buffer[i - 1]!, alphabet) << 6);
const x = (getHextet(i - 3) << 18) |
(getHextet(i - 2) << 12) |
(getHextet(i - 1) << 6);
buffer[o++] = x >> 16;
buffer[o++] = x >> 8 & 0xFF;
break;
}
}
return o;
}

function getByte(char: number, alphabet: Uint8Array): number {
const byte = alphabet[char] ?? 64;
if (byte === 64) { // alphabet.Base64.length
throw new TypeError(
`Cannot decode input as base64: Invalid character (${
String.fromCharCode(char)
})`,
);
}
return byte;
}
9 changes: 9 additions & 0 deletions encoding/base64_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,12 @@ Deno.test("decodeBase64() decodes binary", () => {
assertEquals(outputBinary, input);
}
});

Deno.test("decodeBase64() ignores white space", () => {
const ws = "\t\n\f\r ";
for (const [input, output] of testsetBinary) {
const spaced = ["", ...output, ""].join(ws);
const outputBinary = decodeBase64(spaced);
assertEquals(outputBinary, input);
}
});
11 changes: 7 additions & 4 deletions encoding/unstable_base64_stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ import {
alphabet,
type Base64Alphabet,
calcSizeBase64,
decode,
decodeChunk,
encode,
padding,
rAlphabet,
removeWhiteSpace,
} from "./_common64.ts";
import { detach } from "./_common_detach.ts";

Expand Down Expand Up @@ -171,30 +172,32 @@ export class Base64DecoderStream<T extends "string" | "bytes">
let remainder = 0;
super({
transform(chunk, controller) {
let output = encode(chunk);
let output = removeWhiteSpace(encode(chunk));
if (remainder) {
output = detach(output, remainder + output.length)[0];
output.set(push.subarray(0, remainder));
}
remainder = output.length % 4;
if (remainder) push.set(output.subarray(-remainder));
const o = decode(
const o = decodeChunk(
output.subarray(0, -remainder || undefined),
0,
0,
abc,
padding,
false,
);
controller.enqueue(output.subarray(0, o));
},
flush(controller) {
if (remainder) {
const o = decode(
const o = decodeChunk(
push.subarray(0, remainder),
0,
0,
abc,
padding,
false,
);
controller.enqueue(push.subarray(0, o));
}
Expand Down
13 changes: 13 additions & 0 deletions encoding/unstable_base64_stream_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,16 @@ Deno.test("Base64DecoderStream() with raw format", async () => {
);
}
});

Deno.test("Base64DecoderStream() allows white space", async () => {
const text = await Deno.readTextFile("./deno.lock");

const encoded = encodeBase64(text).replaceAll(/.{76}/g, `$&\r\n`);

const stream = new Blob([encoded]).stream()
.pipeThrough(new FixedChunkStream(1021))
.pipeThrough(new TextDecoderStream())
.pipeThrough(new Base64DecoderStream());

assertEquals(await toText(stream), text);
});
Loading