diff --git a/README.md b/README.md index 419e8ef..aca3465 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,8 @@ npm install strnum const toNumber = require("strnum"); toNumber(undefined) // undefined -toNumber(null)) //null -toNumber("")) // "" +toNumber(null) //null +toNumber("") // "" toNumber("string"); //"string") toNumber("12,12"); //"12,12") toNumber("12 12"); //"12 12") diff --git a/benchmark.js b/benchmark.js new file mode 100644 index 0000000..078673f --- /dev/null +++ b/benchmark.js @@ -0,0 +1,160 @@ +import { Bench } from 'tinybench' +import toNumber from "./strnum.js" + +const bench = new Bench({ name: 'strnum benchmark', time: 100 }) + +function toNumberBenchmark(str, options) { + bench.add(`${str}${options ? ', ' + JSON.stringify(options) : ''}`, () => { + toNumber(str, options) + }) +} + +toNumberBenchmark(undefined); +toNumberBenchmark(null); +toNumberBenchmark(""); +toNumberBenchmark("string"); +toNumberBenchmark("e89794659669cb7bb967db73a7ea6889c3891727") +toNumberBenchmark("12,12"); +toNumberBenchmark("12 12"); +toNumberBenchmark("12-12"); +toNumberBenchmark("12.12.12"); +toNumberBenchmark("+12"); +toNumberBenchmark("+ 12"); +toNumberBenchmark("12+12"); +toNumberBenchmark("1212+"); +toNumberBenchmark("0x2f"); +toNumberBenchmark("-0x2f"); +toNumberBenchmark("0x2f", { hex: true }); +toNumberBenchmark("-0x2f", { hex: true }); +toNumberBenchmark("0x2f", { hex: false }); +toNumberBenchmark("-0x2f", { hex: false }); +toNumberBenchmark("0xzz"); +toNumberBenchmark("iweraf0x123qwerqwer"); +toNumberBenchmark("1230x55"); +toNumberBenchmark("JVBERi0xLjMNCiXi48"); +toNumberBenchmark("0"); +toNumberBenchmark("00"); +toNumberBenchmark("00.0"); + +toNumberBenchmark("0", { leadingZeros: false }); +toNumberBenchmark("00", { leadingZeros: false }); +toNumberBenchmark("00.0", { leadingZeros: false }); + +toNumberBenchmark("06"); +toNumberBenchmark("06", { leadingZeros: true }); +toNumberBenchmark("06", { leadingZeros: false }); + +toNumberBenchmark("006"); +toNumberBenchmark("006", { leadingZeros: true }); +toNumberBenchmark("006", { leadingZeros: false }); + +toNumberBenchmark("000000000000000000000000017717", { leadingZeros: false }); +toNumberBenchmark("000000000000000000000000017717", { leadingZeros: true }); +toNumberBenchmark("0420926189200190257681175017717"); +toNumberBenchmark("20.21.030"); +toNumberBenchmark("0.21.030"); +toNumberBenchmark("0.21."); +toNumberBenchmark("0."); +toNumberBenchmark("+0."); +toNumberBenchmark("-0."); +toNumberBenchmark("1."); +toNumberBenchmark("00.00"); +toNumberBenchmark("0.06"); +toNumberBenchmark("00.6"); +toNumberBenchmark(".006"); +toNumberBenchmark("6.0"); +toNumberBenchmark("06.0"); + +toNumberBenchmark("0.0", { leadingZeros: false }); +toNumberBenchmark("00.00", { leadingZeros: false }); +toNumberBenchmark("0.06", { leadingZeros: false }); +toNumberBenchmark("00.6", { leadingZeros: false }); +toNumberBenchmark(".006", { leadingZeros: false }); +toNumberBenchmark("6.0", { leadingZeros: false }); +toNumberBenchmark("06.0", { leadingZeros: false }); +toNumberBenchmark("+06"); +toNumberBenchmark("-06"); +toNumberBenchmark("-06", { leadingZeros: true }); +toNumberBenchmark("-06", { leadingZeros: false }); + +toNumberBenchmark("-0.0"); +toNumberBenchmark("-00.00"); +toNumberBenchmark("-0.06"); +toNumberBenchmark("-00.6"); +toNumberBenchmark("-.006"); +toNumberBenchmark("-6.0"); +toNumberBenchmark("-06.0"); +toNumberBenchmark("+06.0"); + +toNumberBenchmark("-0.0", { leadingZeros: false }); +toNumberBenchmark("-00.00", { leadingZeros: false }); +toNumberBenchmark("-0.06", { leadingZeros: false }); +toNumberBenchmark("-00.6", { leadingZeros: false }); +toNumberBenchmark("-.006", { leadingZeros: false }); +toNumberBenchmark("-6.0", { leadingZeros: false }); +toNumberBenchmark("-06.0", { leadingZeros: false }); +toNumberBenchmark("020211201030005811824"); +toNumberBenchmark("20211201030005811824"); +toNumberBenchmark("20.211201030005811824"); +toNumberBenchmark("0.211201030005811824"); +toNumberBenchmark("01.0e2", { leadingZeros: false }); +toNumberBenchmark("-01.0e2", { leadingZeros: false }); +toNumberBenchmark("01.0e2"); +toNumberBenchmark("-01.0e2"); +toNumberBenchmark("1.0e2"); + +toNumberBenchmark("-1.0e2"); +toNumberBenchmark("1.0e-2"); + +toNumberBenchmark("420926189200190257681175017717"); +toNumberBenchmark("420926189200190257681175017717", { eNotation: false }); + +toNumberBenchmark("1e-2"); +toNumberBenchmark("1e+2"); +toNumberBenchmark("1.e+2"); +toNumberBenchmark("01.0E2", { leadingZeros: false }); +toNumberBenchmark("-01.0E2", { leadingZeros: false }); +toNumberBenchmark("01.0E2"); +toNumberBenchmark("-01.0E2"); +toNumberBenchmark("1.0E2"); + +toNumberBenchmark("-1.0E2"); +toNumberBenchmark("1.0E-2"); + +toNumberBenchmark("E-2"); +toNumberBenchmark("E2"); +toNumberBenchmark("0E2"); +toNumberBenchmark("-0E2"); +toNumberBenchmark("00E2"); +toNumberBenchmark("00E2", { leadingZeros: false }); +toNumberBenchmark("0", { skipLike: /.*/ }); +toNumberBenchmark("+12", { skipLike: /\+[0-9]{10}/ }); +toNumberBenchmark("12+12", { skipLike: /\+[0-9]{10}/ }); +toNumberBenchmark("12+1212121212", { skipLike: /\+[0-9]{10}/ }); +toNumberBenchmark("+1212121212"); +toNumberBenchmark("+1212121212", { skipLike: /\+[0-9]{10}/ }); +toNumberBenchmark("+12 12"); +toNumberBenchmark(" +12 12 "); +toNumberBenchmark(" +1212 "); +toNumberBenchmark("+1212"); +toNumberBenchmark("+12.12"); +toNumberBenchmark("-12.12"); +toNumberBenchmark("-012.12"); + +toNumberBenchmark("Infinity"); +toNumberBenchmark("-Infinity"); +toNumberBenchmark("+Infinity"); +toNumberBenchmark("Infinity", { infinity: true }); +toNumberBenchmark("-Infinity", { infinity: true }); +toNumberBenchmark("+Infinity", { infinity: true }); +toNumberBenchmark("Infinity", { infinity: false }); +toNumberBenchmark("-Infinity", { infinity: false }); +toNumberBenchmark("+Infinity", { infinity: false }); +toNumberBenchmark(" Infinity "); +toNumberBenchmark(" -Infinity "); +toNumberBenchmark(" +Infinity "); + +await bench.run() + +console.log(bench.name) +console.table(bench.table()) \ No newline at end of file diff --git a/package.json b/package.json index b60e835..a65bf4c 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,10 @@ "description": "Parse String to Number based on configuration", "type": "module", "main": "strnum.js", + "types": "strnum.d.ts", "scripts": { - "test": "jasmine strnum.test.js" + "test": "jasmine *.test.js", + "types": "tsc -p ." }, "keywords": [ "string", @@ -26,6 +28,8 @@ } ], "devDependencies": { - "jasmine": "^5.6.0" + "jasmine": "^5.6.0", + "tinybench": "^4.0.1", + "typescript": "^5.8.3" } -} +} \ No newline at end of file diff --git a/strnum.d.ts b/strnum.d.ts new file mode 100644 index 0000000..7727c9f --- /dev/null +++ b/strnum.d.ts @@ -0,0 +1,92 @@ +declare module "strnum" { + /** + * @template {*} T + * @param {T} str - The string to convert to a number. + * @param {Options} [options] - Options to control the conversion behavior. + * @returns {number|T} - The converted number or the original value if conversion is not applicable. + */ + export default function toNumber(str: T, options?: Options): number | T; + /** + * @param {string} str - The string to analyze. + * @param {Options} options - Options to control the parsing behavior. + * @returns {number} - A bitmask representing the analysis result of the string. + */ + export function analyzeNumber(str: string, options: Options): number; + export type State = typeof NUMBER | typeof NOT_A_NUMBER | typeof BINARY | typeof DECIMAL | typeof OCTAL | typeof HEX | typeof FLOAT | typeof INTEGER | typeof BIGINT | typeof ZERO | typeof WHITESPACE | typeof BEGIN | typeof END | typeof LEADING_WHITESPACE | typeof TRAILING_WHITESPACE | typeof BEGIN_INTEGER_DIGITS | typeof BEGIN_FRAC_DIGITS | typeof BEGIN_BINARY | typeof BEGIN_HEX | typeof BEGIN_OCTAL | typeof BEGIN_EXPONENT | typeof BEGIN_ZERO | typeof FIRST_DIGIT_ZERO_NOT_LEADING | typeof LEADING_ZEROS | typeof INFINITY | typeof SIGN | typeof EXPONENT_INDICATOR | typeof EXPONENT_SIGN | typeof EXPONENT_DECIMAL; + export type Options = { + /** + * - Whether to allow hexadecimal numbers (e.g., "0x1A"). + */ + hex?: boolean; + /** + * - Whether to allow octal numbers (e.g., "0o17"). + */ + octal?: boolean; + /** + * - Whether to allow binary numbers (e.g., "0b1010"). + */ + binary?: boolean; + /** + * - Whether to allow BigInt numbers (e.g., "123n"). + */ + bigint?: boolean; + /** + * - Whether to allow leading zeros in numbers (e.g., "000123"). + */ + leadingZeros?: boolean; + /** + * - Whether to check if the number is a safe integer. + */ + safeInteger?: boolean; + /** + * - Whether to allow "Infinity" and "-Infinity". + */ + infinity?: boolean; + /** + * - A regular expression to skip certain string patterns. + */ + skipLike?: RegExp; + /** + * - Whether to allow scientific notation (e.g., "1e10"). + */ + eNotation?: boolean; + /** + * - Whether to treat empty strings or strings with whitespace as zero (e.g., " "). + */ + empty?: boolean; + /** + * - Whether to force IEEE 754 compliance for floating-point numbers (e.g. "1234567890.1234567890" => 1234567890.1234567). + */ + ieee754?: boolean; + }; + const NUMBER: 0; + const NOT_A_NUMBER: 1; + const BINARY: 2; + const DECIMAL: 4; + const OCTAL: 8; + const HEX: 16; + const FLOAT: 32; + const INTEGER: 64; + const BIGINT: 128; + const ZERO: 2048; + const WHITESPACE: 512; + const BEGIN: 8192; + const END: 16384; + const LEADING_WHITESPACE: 8704; + const TRAILING_WHITESPACE: 16896; + const BEGIN_INTEGER_DIGITS: 8196; + const BEGIN_FRAC_DIGITS: 8224; + const BEGIN_BINARY: 8194; + const BEGIN_HEX: 8208; + const BEGIN_OCTAL: 8200; + const BEGIN_EXPONENT: 12288; + const BEGIN_ZERO: 10240; + const FIRST_DIGIT_ZERO_NOT_LEADING: 26624; + const LEADING_ZEROS: 10244; + const INFINITY: 256; + const SIGN: 1024; + const EXPONENT_INDICATOR: 4096; + const EXPONENT_SIGN: 5120; + const EXPONENT_DECIMAL: 4100; + export {}; +} diff --git a/strnum.js b/strnum.js index 330da88..2a0c1f2 100644 --- a/strnum.js +++ b/strnum.js @@ -1,129 +1,564 @@ -const hexRegex = /^[-+]?0x[a-fA-F0-9]+$/; -const numRegex = /^([\-\+])?(0*)([0-9]*(\.[0-9]*)?)$/; -// const octRegex = /^0x[a-z0-9]+/; -// const binRegex = /0x[a-z0-9]+/; - - -const consider = { - hex : true, - // oct: false, - leadingZeros: true, - decimalPoint: "\.", - eNotation: true, - //skipLike: /regex/ -}; - -export default function toNumber(str, options = {}){ - options = Object.assign({}, consider, options ); - if(!str || typeof str !== "string" ) return str; - - let trimmedStr = str.trim(); - - if(options.skipLike !== undefined && options.skipLike.test(trimmedStr)) return str; - else if(str==="0") return 0; - else if (options.hex && hexRegex.test(trimmedStr)) { - return parse_int(trimmedStr, 16); - // }else if (options.oct && octRegex.test(str)) { - // return Number.parseInt(val, 8); - }else if (trimmedStr.search(/.+[eE].+/)!== -1) { //eNotation - return resolveEnotation(str,trimmedStr,options); - // }else if (options.parseBin && binRegex.test(str)) { - // return Number.parseInt(val, 2); - }else{ - //separate negative sign, leading zeros, and rest number - const match = numRegex.exec(trimmedStr); - // +00.123 => [ , '+', '00', '.123', .. - if(match){ - const sign = match[1] || ""; - const leadingZeros = match[2]; - let numTrimmedByZeros = trimZeros(match[3]); //complete num without leading zeros - const decimalAdjacentToLeadingZeros = sign ? // 0., -00., 000. - str[leadingZeros.length+1] === "." - : str[leadingZeros.length] === "."; - - //trim ending zeros for floating number - if(!options.leadingZeros //leading zeros are not allowed - && (leadingZeros.length > 1 - || (leadingZeros.length === 1 && !decimalAdjacentToLeadingZeros))){ - // 00, 00.3, +03.24, 03, 03.24 +/** + * @typedef {Object} Options + * @property {boolean} [hex=true] - Whether to allow hexadecimal numbers (e.g., "0x1A"). + * @property {boolean} [octal=false] - Whether to allow octal numbers (e.g., "0o17"). + * @property {boolean} [binary=false] - Whether to allow binary numbers (e.g., "0b1010"). + * @property {boolean} [bigint=false] - Whether to allow BigInt numbers (e.g., "123n"). + * @property {boolean} [leadingZeros=true] - Whether to allow leading zeros in numbers (e.g., "000123"). + * @property {boolean} [safeInteger=true] - Whether to check if the number is a safe integer. + * @property {boolean} [infinity=false] - Whether to allow "Infinity" and "-Infinity". + * @property {RegExp} [skipLike] - A regular expression to skip certain string patterns. + * @property {boolean} [eNotation=true] - Whether to allow scientific notation (e.g., "1e10"). + * @property {boolean} [empty=false] - Whether to treat empty strings or strings with whitespace as zero (e.g., " "). + * @property {boolean} [ieee754=false] - Whether to force IEEE 754 compliance for floating-point numbers (e.g. "1234567890.1234567890" => 1234567890.1234567). + */ + +/** + * The character used for scientific notation in numbers, based on the environment. + * This is determined by checking if a large number can be represented in scientific notation. + * @type {"e"|"E"} + * @constant + */ +const EXP_CHAR = function () { + const bigNumberAsString = '' + 1e100; + if (bigNumberAsString.indexOf("e") !== -1) { + return "e"; + } else if (bigNumberAsString.indexOf("E") !== -1) { + return "E"; + } else { + throw new Error("Cannot determine scientific notation character"); + } +}(); + +/** @type {(string: string, radix?: 2|8|10|16) => number} */ +const parse_int = ((function parse_int() { + if (parseInt) return parseInt; + else if (Number.parseInt) return Number.parseInt; + else if (window && window.parseInt) return window.parseInt; + else return function parseInt() { + throw new Error("parseInt, Number.parseInt, window.parseInt are not supported") + }; +})()); + +/** + * @template {*} T + * @param {T} str - The string to convert to a number. + * @param {Options} [options] - Options to control the conversion behavior. + * @returns {number|T} - The converted number or the original value if conversion is not applicable. + */ +export default function toNumber(str, options = {}) { + if (!str || typeof str !== "string") { + return str; + } + + const analyzeResult = analyzeNumber(str, options); + + if ((analyzeResult & NOT_A_NUMBER) === NOT_A_NUMBER) { + return str; + } + + let trimmedStr; + if (options.skipLike !== undefined) { + trimmedStr = ((analyzeResult & WHITESPACE) === WHITESPACE) + ? str.trim() + : str; + if (options.skipLike.test(trimmedStr)) { + return str; + } + } + + if ((analyzeResult & ZERO) === ZERO) { + return 0; + } + + if ((analyzeResult & INFINITY) === INFINITY) { + return analyzeResult & NEGATIVE ? -Infinity : Infinity; + } + + let num; + if ((analyzeResult & BIGINT) === BIGINT) { + num = parse_int(str); + } else if ((analyzeResult & SIGN) === 0) { + num = +str; + } else if ((analyzeResult & HEX) === HEX) { + num = parse_int(str, 16); + } else if ((analyzeResult & REMOVE_TYPE_HINT) !== 0) { + if (trimmedStr === undefined) { + if ((analyzeResult & WHITESPACE) === WHITESPACE) { + trimmedStr = str.trim(); + } else { + trimmedStr = str; + } + } + num = +trimmedStr.slice(1); + if ((analyzeResult & NEGATIVE) === NEGATIVE) { + num = -num; + } + } else { + num = +str; + } + + if ((analyzeResult & EXPONENT_INDICATOR) === EXPONENT_INDICATOR) { + return num; + } + + // If the number is out of safe integer range, return the original string + if (((analyzeResult & FLOAT) !== FLOAT)) { + if (options.safeInteger !== false && Number.isSafeInteger(num) === false) { + return str; + } + + if (options.eNotation === false && ('' + num).indexOf(EXP_CHAR) !== -1) { + // If the number is in scientific notation, return the original string + return str; + } + + return num; + } else { + if (options.ieee754 === true) { + return num; + } + + const parsedStr = '' + num; + const parsedDecimalPoint = parsedStr.indexOf(".") + 1; + const parsedStrLength = parsedStr.length; + + // If the parsed number has fewer than 14 digits after the decimal point, + // we can safely return it as a number. + if ((parsedStrLength - parsedDecimalPoint) < 14) { + return num; + } + + const strDecimalPoint = str.indexOf(".") + 1; + + let i = 0; + const parsedFracLength = parsedStrLength - parsedDecimalPoint; + for (; i < parsedFracLength; i++) { + if (parsedStr[parsedDecimalPoint + i] !== str[strDecimalPoint + i]) { return str; } - else{//no leading zeros or leading zeros are allowed - const num = Number(trimmedStr); - const parsedStr = String(num); - - if( num === 0) return num; - if(parsedStr.search(/[eE]/) !== -1){ //given number is long and parsed to eNotation - if(options.eNotation) return num; - else return str; - }else if(trimmedStr.indexOf(".") !== -1){ //floating number - if(parsedStr === "0") return num; //0.0 - else if(parsedStr === numTrimmedByZeros) return num; //0.456. 0.79000 - else if( parsedStr === `${sign}${numTrimmedByZeros}`) return num; - else return str; - } - - let n = leadingZeros? numTrimmedByZeros : trimmedStr; - if(leadingZeros){ - // -009 => -9 - return (n === parsedStr) || (sign+n === parsedStr) ? num : str - }else { - // +9 - return (n === parsedStr) || (n === sign+parsedStr) ? num : str - } + } + + // ignore trailing zeros and whitespace in the fractional part + i += strDecimalPoint; + while (i++ < str.length) { + switch (str.charCodeAt(i)) { + case 0x30: // '0' + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space + case 0x20: // ' ' + case 0x09: // '\t' + case 0x0b: // '\v' + case 0x0c: // '\f' + case 0x0d: // '\r' + case 0x0a: // '\n' + case 0xFEFF: // '\ufeff' (Unicode line separator) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGeneral_Category%3DSpace_Separator%7D + case 0xA0: // Non-breaking space + case 0x1680: // Ogham space mark + case 0x2000: // En quad + case 0x2001: // Em quad + case 0x2002: // En space + case 0x2003: // Em space + case 0x2004: // Three-per-em space + case 0x2005: // Four-per-em space + case 0x2006: // Six-per-em space + case 0x2007: // Figure space + case 0x2008: // Punctuation space + case 0x2009: // Thin space + case 0x200A: // Hair space + case 0x2028: // Line separator + case 0x2029: // Paragraph separator + case 0x202F: // Narrow no-break space + case 0x205F: // Medium mathematical space + case 0x3000: // Ideographic space + continue; + default: + return str; } - }else{ //non-numeric string - return str; } + return num; } } -const eNotationRegx = /^([-+])?(0*)(\d*(\.\d*)?[eE][-\+]?\d+)$/; -function resolveEnotation(str,trimmedStr,options){ - if(!options.eNotation) return str; - const notation = trimmedStr.match(eNotationRegx); - if(notation){ - let sign = notation[1] || ""; - const eChar = notation[3].indexOf("e") === -1 ? "E" : "e"; - const leadingZeros = notation[2]; - const eAdjacentToLeadingZeros = sign ? // 0E. - str[leadingZeros.length+1] === eChar - : str[leadingZeros.length] === eChar; - - if(leadingZeros.length > 1 && eAdjacentToLeadingZeros) return str; - else if(leadingZeros.length === 1 - && (notation[3].startsWith(`.${eChar}`) || notation[3][0] === eChar)){ - return Number(trimmedStr); - }else if(options.leadingZeros && !eAdjacentToLeadingZeros){ //accept with leading zeros - //remove leading 0s - trimmedStr = (notation[1] || "") + notation[3]; - return Number(trimmedStr); - }else return str; - }else{ - return str; +const NUMBER = /** @type {const} */ 0b00000000000000000; +const NOT_A_NUMBER = /** @type {const} */ 0b00000000000000001; + +const BINARY = /** @type {const} */ 0b00000000000000010; +const DECIMAL = /** @type {const} */ 0b00000000000000100; +const OCTAL = /** @type {const} */ 0b00000000000001000; +const HEX = /** @type {const} */ 0b00000000000010000; + +const FLOAT = /** @type {const} */ 0b00000000000100000; +const INTEGER = /** @type {const} */ 0b00000000001000000; +const BIGINT = /** @type {const} */ 0b00000000010000000; +const INFINITY = /** @type {const} */ 0b00000000100000000; + +// Special character codes +const WHITESPACE = /** @type {const} */ 0b00000001000000000; +const SIGN = /** @type {const} */ 0b00000010000000000; +const ZERO = /** @type {const} */ 0b00000100000000000; +const EXPONENT_INDICATOR = /** @type {const} */ 0b00001000000000000; // 'e' or 'E' + +// Positional constants +const BEGIN = /** @type {const} */ 0b00010000000000000; +const END = /** @type {const} */ 0b00100000000000000; + +const NEGATIVE = /** @type {const} */ 0b01000000000000000; + +const LEADING_WHITESPACE = /** @type {const} */ assertBitmask(8705, BEGIN | WHITESPACE | NOT_A_NUMBER); +const TRAILING_WHITESPACE = /** @type {const} */ assertBitmask(16896, END | WHITESPACE); + +const BEGIN_SIGN = /** @type {const} */ assertBitmask(9217, BEGIN | SIGN | NOT_A_NUMBER); +const BEGIN_FRAC_DIGITS = /** @type {const} */ assertBitmask(8224, BEGIN | FLOAT); +const BEGIN_HEX = /** @type {const} */ assertBitmask(8209, BEGIN | HEX | NOT_A_NUMBER); +const BEGIN_OCTAL = /** @type {const} */ assertBitmask(8201, BEGIN | OCTAL | NOT_A_NUMBER); +const BEGIN_BINARY = /** @type {const} */ assertBitmask(8195, BEGIN | BINARY | NOT_A_NUMBER); + +const BEGIN_EXPONENT = /** @type {const} */ assertBitmask(12289, EXPONENT_INDICATOR | BEGIN | NOT_A_NUMBER); +const BEGIN_EXPONENT_SIGN = /** @type {const} */ assertBitmask(5121, EXPONENT_INDICATOR | SIGN | NOT_A_NUMBER); +const EXPONENT = /** @type {const} */ assertBitmask(4100, EXPONENT_INDICATOR | DECIMAL); + +const BEGIN_ZERO = /** @type {const} */ assertBitmask(10240, BEGIN | ZERO); +const FIRST_DIGIT_ZERO_NOT_LEADING = /** @type {const} */ assertBitmask(26624, ZERO | BEGIN | END); +const LEADING_ZEROS = /** @type {const} */ assertBitmask(10244, ZERO | BEGIN | DECIMAL); + +const REMOVE_TYPE_HINT = /** @type {const} */ assertBitmask(10, BINARY | OCTAL); + +/** + * @typedef {typeof NUMBER | + * typeof NOT_A_NUMBER | + * typeof BINARY | + * typeof DECIMAL | + * typeof OCTAL | + * typeof HEX | + * typeof FLOAT | + * typeof INTEGER | + * typeof BIGINT | + * typeof BEGIN | + * typeof LEADING_WHITESPACE | + * typeof TRAILING_WHITESPACE | + * typeof BEGIN_FRAC_DIGITS | + * typeof BEGIN_BINARY | + * typeof BEGIN_HEX | + * typeof BEGIN_OCTAL | + * typeof BEGIN_EXPONENT | + * typeof BEGIN_ZERO | + * typeof FIRST_DIGIT_ZERO_NOT_LEADING | + * typeof LEADING_ZEROS | + * typeof INFINITY | + * typeof BEGIN_SIGN | + * typeof EXPONENT_INDICATOR | + * typeof BEGIN_EXPONENT_SIGN | + * typeof EXPONENT + * } State + */ + +/** + * @template {number} T + * @param {T} value + * @param {number} bitmask + * @returns {T} - Returns the value if it matches the bitmask, otherwise throws an error. + */ +function assertBitmask(value, bitmask) { + if (value !== bitmask) { + throw new Error(`Expected bitmask ${bitmask}, but got ${value}`); } + return value; } /** - * - * @param {string} numStr without leading zeros - * @returns + * @param {string} str - The string to analyze. + * @param {Options} options - Options to control the parsing behavior. + * @returns {number} - A bitmask representing the analysis result of the string. */ -function trimZeros(numStr){ - if(numStr && numStr.indexOf(".") !== -1){//float - numStr = numStr.replace(/0+$/, ""); //remove ending zeros - if(numStr === ".") numStr = "0"; - else if(numStr[0] === ".") numStr = "0"+numStr; - else if(numStr[numStr.length-1] === ".") numStr = numStr.substring(0,numStr.length-1); - return numStr; +export function analyzeNumber(str, options) { + let len = str.length; + + /** @type {State} */ + let state = BEGIN; + let pos = -1; + + let result = NUMBER; + + const ON_HEX = options.hex !== false ? BEGIN_HEX : NOT_A_NUMBER; + const ON_E = options.eNotation !== false ? BEGIN_EXPONENT : NOT_A_NUMBER; + const ON_BIGINT = options.bigint === true ? BIGINT : NOT_A_NUMBER; + const ON_BINARY = options.binary === true ? BEGIN_BINARY : NOT_A_NUMBER; + const ON_OCTAL = options.octal === true ? BEGIN_OCTAL : NOT_A_NUMBER; + const ON_LEADING_ZEROS = options.leadingZeros === false ? FIRST_DIGIT_ZERO_NOT_LEADING : BEGIN_ZERO; + const ON_INFINITY = options.infinity === true ? INFINITY : NOT_A_NUMBER; + + while (++pos < len) { + switch (str.charCodeAt(pos)) { + case 0x30: // '0' + switch (state) { + case FIRST_DIGIT_ZERO_NOT_LEADING: + return NOT_A_NUMBER; + case BEGIN_ZERO: + state = LEADING_ZEROS; + continue; + case LEADING_WHITESPACE: + case BEGIN: + case BEGIN_SIGN: + state = ON_LEADING_ZEROS; + continue; + } + case 0x31: // '1' + switch (state) { + case BINARY: + continue; + case BEGIN_BINARY: + result |= BINARY; + state = BINARY; + continue; + } + case 0x32: // '2' + case 0x33: // '3' + case 0x34: // '4' + case 0x35: // '5' + case 0x36: // '6' + case 0x37: // '7' + switch (state) { + case OCTAL: + continue; + case BEGIN_OCTAL: + result |= OCTAL; + state = OCTAL; + continue; + } + case 0x38: // '8' + case 0x39: // '9' + switch (state) { + case FLOAT: + case DECIMAL: + case HEX: + case EXPONENT: + continue; + case BEGIN_SIGN: + case BEGIN_ZERO: + case LEADING_ZEROS: + case BEGIN: + case LEADING_WHITESPACE: + state = DECIMAL; + continue; + case BEGIN_HEX: + result |= HEX; + state = HEX; + continue; + case BEGIN_EXPONENT: + case BEGIN_EXPONENT_SIGN: + state = EXPONENT; + continue; + case BEGIN_FRAC_DIGITS: + result |= FLOAT; + state = FLOAT; + continue; + default: + return NOT_A_NUMBER; + } + case 0x61: // 'a' + case 0x63: // 'c' + case 0x64: // 'd' + case 0x66: // 'f' + case 0x41: // 'A' + case 0x43: // 'C' + case 0x44: // 'D' + case 0x46: // 'F' + switch (state) { + case HEX: + continue; + case BEGIN_HEX: + result |= HEX; + state = HEX; + continue; + default: + return NOT_A_NUMBER; + } + case 0x62: // 'b' + case 0x42: // 'B' + switch (state) { + case HEX: + continue; + case BEGIN_HEX: + result |= HEX; + state = HEX; + continue; + case BEGIN_ZERO: + case FIRST_DIGIT_ZERO_NOT_LEADING: + state = ON_BINARY; + continue; + default: + return NOT_A_NUMBER; + } + case 0x65: // 'e' + case 0x45: // 'E' + switch (state) { + case HEX: + continue; + case BEGIN_HEX: + result |= HEX; + state = HEX; + continue; + case BEGIN_ZERO: + case FIRST_DIGIT_ZERO_NOT_LEADING: + case DECIMAL: + case BEGIN_FRAC_DIGITS: + case FLOAT: + result |= EXPONENT_INDICATOR; + state = ON_E; + continue; + default: + return NOT_A_NUMBER; + } + case 0x2D: // '-' + switch (state) { + case BEGIN: + case LEADING_WHITESPACE: + result |= SIGN; + result |= NEGATIVE; + state = BEGIN_SIGN; + continue; + case BEGIN_EXPONENT: + state = BEGIN_EXPONENT_SIGN; + continue; + default: + return NOT_A_NUMBER; + } + case 0x2B: // '+' + switch (state) { + case BEGIN: + case LEADING_WHITESPACE: + result |= SIGN; + state = BEGIN_SIGN; + continue; + case BEGIN_EXPONENT: + state = BEGIN_EXPONENT_SIGN; + continue; + default: + return NOT_A_NUMBER; + } + case 0x2E: // '.' + switch (state) { + case BEGIN: + case LEADING_WHITESPACE: + case BEGIN_SIGN: + case BEGIN_ZERO: + case FIRST_DIGIT_ZERO_NOT_LEADING: + case LEADING_ZEROS: + case DECIMAL: + state = BEGIN_FRAC_DIGITS; + continue; + default: + return NOT_A_NUMBER; + } + case 0x78: // 'x' + case 0x58: // 'X' + switch (state) { + case BEGIN_ZERO: + case FIRST_DIGIT_ZERO_NOT_LEADING: + state = ON_HEX; + continue; + default: + return NOT_A_NUMBER; + } + case 0x6F: // 'o' + case 0x4F: // 'O' + switch (state) { + case BEGIN_ZERO: + case FIRST_DIGIT_ZERO_NOT_LEADING: + state = ON_OCTAL; + continue; + default: + return NOT_A_NUMBER; + } + case 0x6E: // 'n' + switch (state) { + case DECIMAL: + result |= BIGINT; + state = ON_BIGINT; + continue; + default: + return NOT_A_NUMBER; + } + case 0x49: // 'I' + switch (state) { + case BEGIN: + case LEADING_WHITESPACE: + case BEGIN_SIGN: + if ( + str.charCodeAt(++pos) === 0x6E && // 'n' + str.charCodeAt(++pos) === 0x66 && // 'f' + str.charCodeAt(++pos) === 0x69 && // 'i' + str.charCodeAt(++pos) === 0x6E && // 'n' + str.charCodeAt(++pos) === 0x69 && // 'i' + str.charCodeAt(++pos) === 0x74 && // 't' + str.charCodeAt(++pos) === 0x79 // 'y' + ) { + result |= INFINITY; + state = ON_INFINITY; + continue; + } + default: + return NOT_A_NUMBER; + } + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space + case 0x20: // ' ' + case 0x09: // '\t' + case 0x0b: // '\v' + case 0x0c: // '\f' + case 0x0d: // '\r' + case 0x0a: // '\n' + case 0xFEFF: // '\ufeff' (Unicode line separator) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5Cp%7BGeneral_Category%3DSpace_Separator%7D + case 0xA0: // Non-breaking space + case 0x1680: // Ogham space mark + case 0x2000: // En quad + case 0x2001: // Em quad + case 0x2002: // En space + case 0x2003: // Em space + case 0x2004: // Three-per-em space + case 0x2005: // Four-per-em space + case 0x2006: // Six-per-em space + case 0x2007: // Figure space + case 0x2008: // Punctuation space + case 0x2009: // Thin space + case 0x200A: // Hair space + case 0x2028: // Line separator + case 0x2029: // Paragraph separator + case 0x202F: // Narrow no-break space + case 0x205F: // Medium mathematical space + case 0x3000: // Ideographic space + switch (state) { + case LEADING_WHITESPACE: + case TRAILING_WHITESPACE: + continue; + case BEGIN: + result |= WHITESPACE; + state = LEADING_WHITESPACE; + continue; + case BINARY: + case OCTAL: + case DECIMAL: + case HEX: + case EXPONENT: + case BIGINT: + case FLOAT: + case INFINITY: + result |= WHITESPACE; + state = TRAILING_WHITESPACE; + continue; + default: + return NOT_A_NUMBER; + } + default: + return NOT_A_NUMBER; + } } - return numStr; -} -function parse_int(numStr, base){ - //polyfill - if(parseInt) return parseInt(numStr, base); - else if(Number.parseInt) return Number.parseInt(numStr, base); - else if(window && window.parseInt) return window.parseInt(numStr, base); - else throw new Error("parseInt, Number.parseInt, window.parseInt are not supported") -} \ No newline at end of file + if (state & NOT_A_NUMBER) { + return NOT_A_NUMBER; + } else if (state & ZERO) { + return result | ZERO; + } + return result; +} diff --git a/strnum.test.js b/strnum.test.js index 2c44fdb..f43078a 100644 --- a/strnum.test.js +++ b/strnum.test.js @@ -24,6 +24,7 @@ describe("Should convert all the valid numeric strings to number", () => { it("should parse hexadecimal values", () => { expect(toNumber("0x2f")).toEqual(47); expect(toNumber("-0x2f")).toEqual(-47); + expect(toNumber("0x0", { hex : true})).toEqual(0); expect(toNumber("0x2f", { hex : true})).toEqual(47); expect(toNumber("-0x2f", { hex : true})).toEqual(-47); expect(toNumber("0x2f", { hex : false})).toEqual("0x2f"); @@ -31,6 +32,7 @@ describe("Should convert all the valid numeric strings to number", () => { }) it("should not parse strings with 0x embedded", () => { expect(toNumber("0xzz")).toEqual("0xzz"); + expect(toNumber("0x")).toEqual("0x"); expect(toNumber("iweraf0x123qwerqwer")).toEqual("iweraf0x123qwerqwer"); expect(toNumber("1230x55")).toEqual("1230x55"); expect(toNumber("JVBERi0xLjMNCiXi48")).toEqual("JVBERi0xLjMNCiXi48"); @@ -55,7 +57,7 @@ describe("Should convert all the valid numeric strings to number", () => { expect(toNumber("000000000000000000000000017717" , { leadingZeros : false})).toEqual("000000000000000000000000017717"); expect(toNumber("000000000000000000000000017717" , { leadingZeros : true})).toEqual(17717); expect(toNumber("020211201030005811824") ).toEqual("020211201030005811824"); - expect(toNumber("0420926189200190257681175017717") ).toEqual(4.209261892001902e+29); + expect(toNumber("0420926189200190257681175017717", { safeInteger: false }) ).toEqual(4.209261892001902e+29); }) it("invalid floating number", () => { expect(toNumber("20.21.030") ).toEqual("20.21.030"); @@ -110,6 +112,7 @@ describe("Should convert all the valid numeric strings to number", () => { expect(toNumber("20211201030005811824") ).toEqual("20211201030005811824"); expect(toNumber("20.211201030005811824") ).toEqual("20.211201030005811824"); expect(toNumber("0.211201030005811824") ).toEqual("0.211201030005811824"); + expect(toNumber("0.21120103000500000000000 ") ).toEqual(0.211201030005); }); it("scientific notation", () => { expect(toNumber("01.0e2" , { leadingZeros : false})).toEqual("01.0e2"); @@ -118,15 +121,26 @@ describe("Should convert all the valid numeric strings to number", () => { expect(toNumber("-01.0e2") ).toEqual(-100); expect(toNumber("1.0e2") ).toEqual(100); + expect(toNumber("1.0e2 ") ).toEqual(100); + expect(toNumber("1.0e02") ).toEqual(100); + expect(toNumber("1.0e002") ).toEqual(100); + expect(toNumber("-1.0e2") ).toEqual(-100); expect(toNumber("1.0e-2")).toEqual(0.01); - expect(toNumber("420926189200190257681175017717") ).toEqual(4.209261892001902e+29); + expect(toNumber("420926189200190257681175017717", { safeInteger: false }) ).toEqual(4.209261892001902e+29); expect(toNumber("420926189200190257681175017717" , { eNotation: false} )).toEqual("420926189200190257681175017717"); expect(toNumber("1e-2")).toEqual(0.01); expect(toNumber("1e+2")).toEqual(100); expect(toNumber("1.e+2")).toEqual(100); + expect(toNumber("1.e++2")).toEqual("1.e++2"); + expect(toNumber("1.e+-2")).toEqual("1.e+-2"); + expect(toNumber("1.e-+2")).toEqual("1.e-+2"); + expect(toNumber("1e++2")).toEqual("1e++2"); + expect(toNumber("1e+-2")).toEqual("1e+-2"); + expect(toNumber("1e-+2")).toEqual("1e-+2"); + expect(toNumber("1e.2")).toEqual("1e.2"); }); it("scientific notation with upper E", () => { @@ -168,6 +182,27 @@ describe("Should convert all the valid numeric strings to number", () => { expect(toNumber("+12.12")).toEqual(12.12); expect(toNumber("-12.12")).toEqual(-12.12); expect(toNumber("-012.12")).toEqual(-12.12); - expect(toNumber("-012.12")).toEqual(-12.12); + }) + it("Infinity", () => { + expect(toNumber("Infinity")).toEqual("Infinity"); + expect(toNumber("-Infinity")).toEqual("-Infinity"); + expect(toNumber("+Infinity")).toEqual("+Infinity"); + expect(toNumber("Infinity", { infinity: true })).toEqual(Infinity); + expect(toNumber("-Infinity", { infinity: true })).toEqual(-Infinity); + expect(toNumber("+Infinity", { infinity: true })).toEqual(+Infinity); + expect(toNumber("Infinity", { infinity: false })).toEqual("Infinity"); + expect(toNumber("-Infinity", { infinity: false })).toEqual("-Infinity"); + expect(toNumber("+Infinity", { infinity: false })).toEqual("+Infinity"); + expect(toNumber(" Infinity ")).toEqual(" Infinity "); + expect(toNumber(" -Infinity ")).toEqual(" -Infinity "); + expect(toNumber(" +Infinity ")).toEqual(" +Infinity "); + expect(toNumber(" Infinity ", { infinity: true })).toEqual(Infinity); + expect(toNumber(" -Infinity ", { infinity: true })).toEqual(-Infinity); + expect(toNumber(" +Infinity ", { infinity: true })).toEqual(Infinity); + }) + + it("bigint", () => { + expect(toNumber("1212n", { bigint: true })).toEqual(1212); + expect(toNumber("-1212n", { bigint: true })).toEqual(-1212); }) }); diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..dfdf787 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2015", + "checkJs": true, + "allowJs": true, + "declaration": true, + "outFile": "strnum.d.ts", + "emitDeclarationOnly": true + }, + "include": [ + "strnum.js" + ] +} \ No newline at end of file