-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Work with strings, expose module functor #3
base: main
Are you sure you want to change the base?
Changes from all commits
e3a277a
1a09ca1
679f1a1
e5e1f6a
f9c4e52
27dc455
cae0cac
9081bb7
efe75a3
50ad21e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,81 +5,78 @@ | |
* LICENSE file in the root directory of this source tree. | ||
*) | ||
|
||
(* VLQ (variable-length quantity) encoder | ||
https://en.wikipedia.org/wiki/Variable-length_quantity *) | ||
|
||
module type Config = sig | ||
val shift: int | ||
val char_of_digit: int -> char | ||
val digit_of_char: char -> int | ||
val shift : int | ||
val char_of_int : int -> char | ||
val int_of_char : char -> int | ||
end | ||
|
||
module type S = sig | ||
val encode: Buffer.t -> int -> unit | ||
val decode: char Stream.t -> int | ||
val encode : int -> string | ||
val decode : string -> int | ||
end | ||
|
||
exception Unexpected_eof | ||
exception Invalid_base64 of char | ||
exception Char_of_int_failure of int | ||
exception Int_of_char_failure of char | ||
|
||
module Make (C: Config) = struct | ||
let vlq_base = 1 lsl C.shift | ||
let vlq_base_mask = vlq_base - 1 | ||
let vlq_continuation_bit = vlq_base (* MSB *) | ||
let vlq_continuation_bit = vlq_base | ||
|
||
(** | ||
* Converts from a two-complement value to a value where the sign bit is | ||
* placed in the least significant bit. For example, as decimals: | ||
* 1 becomes 2 (10 binary), -1 becomes 3 (11 binary) | ||
* 2 becomes 4 (100 binary), -2 becomes 5 (101 binary) | ||
*) | ||
(** Converts from a two-complement value to a value where the sign bit is | ||
placed in the least significant bit. For example, as decimals: | ||
1 becomes 2 (10 binary), -1 becomes 3 (11 binary) | ||
2 becomes 4 (100 binary), -2 becomes 5 (101 binary) *) | ||
let vlq_signed_of_int value = | ||
if value < 0 then ((-value) lsl 1) + 1 else (value lsl 1) + 0 | ||
|
||
(* Write the value to the buffer, as multiple characters as necessary *) | ||
let rec encode_vlq buf vlq = | ||
let digit = vlq land vlq_base_mask in | ||
let vlq = vlq lsr C.shift in | ||
if vlq = 0 then Buffer.add_char buf (C.char_of_digit digit) | ||
else begin | ||
(* set the continuation bit *) | ||
Buffer.add_char buf (C.char_of_digit (digit lor vlq_continuation_bit)); | ||
encode_vlq buf vlq | ||
end | ||
match value < 0 with | ||
| true -> ((-value) lsl 1) + 1 | ||
| false -> value lsl 1 | ||
|
||
(* Encodes `value` as a VLQ and writes it to `buf` *) | ||
let encode buf value = | ||
let encode value = | ||
let vlq = vlq_signed_of_int value in | ||
encode_vlq buf vlq | ||
let rec loop vlq encoded = | ||
let digit = vlq land vlq_base_mask in | ||
let vlq = vlq lsr C.shift in | ||
match vlq = 0 with | ||
| true -> encoded ^ Char.escaped (C.char_of_int digit) | ||
| false -> | ||
loop vlq (encoded ^ Char.escaped | ||
(C.char_of_int (digit lor vlq_continuation_bit))) in | ||
loop vlq "" | ||
|
||
let decode = | ||
let rec helper (acc, shift) stream = | ||
let decode value = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the problem with taking a string is that we don't know how many characters the VLQ is. with a stream, we consume characters until we read a complete VLQ, which mutates the stream so it's ready to read the next one. given a sourcemap segment as a string like "AKgBiB", how do we split it into ("A", "K", "gB", "iB") to decode it into (0, 5, 16, 17)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. Thinking in this case, if decode (encode x) = x
encode (decode y) = y Mathematically, there is an isomorphism because both functions are bijective, like This would be an issue where the sourcemap module would be responsible at all (but, for optimizations, we could expose functions with streams and buffers. I'll implement it. |
||
let stream = Stream.of_string value in | ||
let rec loop shift decoded = | ||
let chr = | ||
try Stream.next stream | ||
with Stream.Failure -> raise Unexpected_eof | ||
in | ||
let digit = C.digit_of_char chr in | ||
let continued = (digit land vlq_continuation_bit) != 0 in | ||
let acc = acc + (digit land vlq_base_mask) lsl shift in | ||
if continued then helper (acc, shift + C.shift) stream else acc | ||
in | ||
fun stream -> | ||
let acc = helper (0, 0) stream in | ||
let abs = acc / 2 in | ||
if acc land 1 = 0 then abs else -(abs) | ||
with Stream.Failure -> raise Unexpected_eof in | ||
let digit = C.int_of_char chr in | ||
let decoded = decoded + (digit land vlq_base_mask) lsl shift in | ||
match digit land vlq_continuation_bit with | ||
| 0 -> decoded | ||
| _ -> (* Continuation found *) | ||
loop (shift + C.shift) decoded in | ||
let decoded = loop 0 0 in | ||
let abs = decoded / 2 in | ||
match decoded land 1 with | ||
| 0 -> abs | ||
| _ -> -(abs) | ||
end | ||
|
||
module Base64 = Make (struct | ||
let shift = 5 | ||
let base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" | ||
|
||
(* Convert a number between 0 and 63 to a base64 char *) | ||
let char_of_digit digit = | ||
if 0 <= digit && digit < String.length base64 | ||
then base64.[digit] | ||
else failwith (Printf.sprintf "Must be between 0 and 63: %d" digit) | ||
let char_of_int digit = | ||
match digit >= 0 && digit < String.length base64 with | ||
| true -> base64.[digit] | ||
| _ -> raise (Char_of_int_failure digit) | ||
|
||
let digit_of_char chr = | ||
try String.index base64 chr | ||
with Not_found -> raise (Invalid_base64 chr) | ||
let int_of_char chr = | ||
match String.index_opt base64 chr with | ||
| Some index -> index | ||
| None -> raise (Int_of_char_failure chr) | ||
end) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
(executable ( | ||
(name test) | ||
(libraries (vlq oUnit)) | ||
(libraries (vlq ounit)) | ||
)) | ||
|
||
(alias ( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
writing to a buffer was actually a significant perf win for us, so I'd like to leave the ability to do that even if we also expose a
string
APIThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree! The source map implementation for
string_of_mappings
writes the encoded value to the result,buffer
tobuffer
.