Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ordered-float = { version = "5.0", default-features = false }
rand = { version = "0.9.0", features = ["small_rng"] }
ryu = "1.0"
serde = "1.0"
serde_json = { version = "1.0", default-features = false, features = ["std"] }
serde_json = { version = "1.0", default-features = false, features = ["std", "arbitrary_precision"] }

[dev-dependencies]
goldenfile = "1.8"
Expand All @@ -46,6 +46,7 @@ json-deserializer = "0.4.4"
simd-json = "0.15.0"
mockalloc = "0.1.2"
criterion = "0.5.1"
proptest = "1.7"

[features]
default = ["databend", "serde_json/preserve_order"]
Expand Down
25 changes: 13 additions & 12 deletions src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ pub(crate) const TRUE_LEVEL: u8 = 3;
pub(crate) const FALSE_LEVEL: u8 = 2;
pub(crate) const EXTENSION_LEVEL: u8 = 1;

pub(crate) const TYPE_STRING: &str = "string";
pub(crate) const TYPE_NULL: &str = "null";
pub(crate) const TYPE_BOOLEAN: &str = "boolean";
pub(crate) const TYPE_NUMBER: &str = "number";
pub(crate) const TYPE_ARRAY: &str = "array";
pub(crate) const TYPE_OBJECT: &str = "object";
pub(crate) const TYPE_DECIMAL: &str = "decimal";
pub(crate) const TYPE_BINARY: &str = "binary";
pub(crate) const TYPE_DATE: &str = "date";
pub(crate) const TYPE_TIMESTAMP: &str = "timestamp";
pub(crate) const TYPE_TIMESTAMP_TZ: &str = "timestamp_tz";
pub(crate) const TYPE_INTERVAL: &str = "interval";
pub(crate) const TYPE_STRING: &str = "STRING";
pub(crate) const TYPE_NULL: &str = "NULL_VALUE";
pub(crate) const TYPE_BOOLEAN: &str = "BOOLEAN";
pub(crate) const TYPE_INTEGER: &str = "INTEGER";
pub(crate) const TYPE_ARRAY: &str = "ARRAY";
pub(crate) const TYPE_OBJECT: &str = "OBJECT";
pub(crate) const TYPE_DECIMAL: &str = "DECIMAL";
pub(crate) const TYPE_DOUBLE: &str = "DOUBLE";
pub(crate) const TYPE_BINARY: &str = "BINARY";
pub(crate) const TYPE_DATE: &str = "DATE";
pub(crate) const TYPE_TIMESTAMP: &str = "TIMESTAMP";
pub(crate) const TYPE_TIMESTAMP_TZ: &str = "TIMESTAMP_TZ";
pub(crate) const TYPE_INTERVAL: &str = "INTERVAL";
15 changes: 12 additions & 3 deletions src/core/databend/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,10 @@ impl<'de> Deserializer<'de> {
match num {
Number::Int64(n) => T::from_i64(n).ok_or(Error::UnexpectedType),
Number::UInt64(n) => T::from_u64(n).ok_or(Error::UnexpectedType),
Number::Float64(_) | Number::Decimal128(_) | Number::Decimal256(_) => {
Err(Error::UnexpectedType)
}
Number::Float64(_)
| Number::Decimal64(_)
| Number::Decimal128(_)
| Number::Decimal256(_) => Err(Error::UnexpectedType),
}
}

Expand All @@ -228,6 +229,10 @@ impl<'de> Deserializer<'de> {
Number::Int64(n) => T::from_i64(n).ok_or(Error::UnexpectedType),
Number::UInt64(n) => T::from_u64(n).ok_or(Error::UnexpectedType),
Number::Float64(n) => T::from_f64(n).ok_or(Error::UnexpectedType),
Number::Decimal64(v) => {
let n = v.to_float64();
T::from_f64(n).ok_or(Error::UnexpectedType)
}
Number::Decimal128(v) => {
let n = v.to_float64();
T::from_f64(n).ok_or(Error::UnexpectedType)
Expand Down Expand Up @@ -317,6 +322,10 @@ impl<'de> Deserializer<'de> {
}
}
Number::Float64(i) => visitor.visit_f64(i),
Number::Decimal64(i) => {
let v = i.to_float64();
visitor.visit_f64(v)
}
Number::Decimal128(i) => {
let v = i.to_float64();
visitor.visit_f64(v)
Expand Down
43 changes: 3 additions & 40 deletions src/core/databend/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,20 +487,7 @@ impl Serialize for RawJsonb<'_> {
NUMBER_TAG => {
let num = Number::decode(&self.data[payload_start..payload_end])
.map_err(|e| ser::Error::custom(format!("{e}")))?;

match num {
Number::Int64(i) => serializer.serialize_i64(i),
Number::UInt64(i) => serializer.serialize_u64(i),
Number::Float64(i) => serializer.serialize_f64(i),
Number::Decimal128(i) => {
let v = i.to_float64();
serializer.serialize_f64(v)
}
Number::Decimal256(i) => {
let v = i.to_float64();
serializer.serialize_f64(v)
}
}
num.serialize(serializer)
}
STRING_TAG => {
let s = unsafe {
Expand Down Expand Up @@ -539,19 +526,7 @@ impl Serialize for RawJsonb<'_> {
NUMBER_TAG => {
let num = Number::decode(&self.data[payload_start..payload_end])
.map_err(|e| ser::Error::custom(format!("{e}")))?;
match num {
Number::Int64(i) => serialize_seq.serialize_element(&i)?,
Number::UInt64(i) => serialize_seq.serialize_element(&i)?,
Number::Float64(i) => serialize_seq.serialize_element(&i)?,
Number::Decimal128(i) => {
let v = i.to_float64();
serialize_seq.serialize_element(&v)?
}
Number::Decimal256(i) => {
let v = i.to_float64();
serialize_seq.serialize_element(&v)?
}
}
serialize_seq.serialize_element(&num)?;
}
STRING_TAG => {
let s = unsafe {
Expand Down Expand Up @@ -624,19 +599,7 @@ impl Serialize for RawJsonb<'_> {
NUMBER_TAG => {
let num = Number::decode(&self.data[payload_start..payload_end])
.map_err(|e| ser::Error::custom(format!("{e}")))?;
match num {
Number::Int64(i) => serialize_map.serialize_entry(&k, &i)?,
Number::UInt64(i) => serialize_map.serialize_entry(&k, &i)?,
Number::Float64(i) => serialize_map.serialize_entry(&k, &i)?,
Number::Decimal128(i) => {
let v = i.to_float64();
serialize_map.serialize_entry(&k, &v)?
}
Number::Decimal256(i) => {
let v = i.to_float64();
serialize_map.serialize_entry(&k, &v)?
}
}
serialize_map.serialize_entry(&k, &num)?;
}
STRING_TAG => {
let s = unsafe {
Expand Down
47 changes: 31 additions & 16 deletions src/core/databend/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use crate::extension::Timestamp;
use crate::extension::TimestampTz;
use crate::number::Decimal128;
use crate::number::Decimal256;
use crate::number::Decimal64;
use crate::Number;
use crate::OwnedJsonb;
use crate::RawJsonb;
Expand Down Expand Up @@ -304,19 +305,23 @@ impl Number {
writer.write_all(&v.to_be_bytes())?;
Ok(9)
}
Self::Decimal64(v) => {
writer.write_all(&[NUMBER_DECIMAL])?;
writer.write_all(&v.value.to_be_bytes())?;
writer.write_all(&v.scale.to_be_bytes())?;
Ok(10)
}
Self::Decimal128(v) => {
writer.write_all(&[NUMBER_DECIMAL])?;
writer.write_all(&v.value.to_be_bytes())?;
writer.write_all(&v.precision.to_be_bytes())?;
writer.write_all(&v.scale.to_be_bytes())?;
Ok(19)
Ok(18)
}
Self::Decimal256(v) => {
writer.write_all(&[NUMBER_DECIMAL])?;
writer.write_all(&v.value.to_be_bytes())?;
writer.write_all(&v.precision.to_be_bytes())?;
writer.write_all(&v.scale.to_be_bytes())?;
Ok(35)
Ok(34)
}
}
}
Expand Down Expand Up @@ -353,26 +358,36 @@ impl Number {
},
NUMBER_FLOAT => Number::Float64(f64::from_be_bytes(bytes[1..].try_into().unwrap())),
NUMBER_DECIMAL => match len {
9 => {
let value = i64::from_be_bytes(bytes[1..9].try_into().unwrap());
let scale = u8::from_be_bytes(bytes[9..10].try_into().unwrap());
let dec = Decimal64 { scale, value };
Number::Decimal64(dec)
}
17 => {
let value = i128::from_be_bytes(bytes[1..17].try_into().unwrap());
let scale = u8::from_be_bytes(bytes[17..18].try_into().unwrap());
let dec = Decimal128 { scale, value };
Number::Decimal128(dec)
}
18 => {
// Compatible with deprecated Decimal128 formats, including precision
let value = i128::from_be_bytes(bytes[1..17].try_into().unwrap());
let precision = u8::from_be_bytes(bytes[17..18].try_into().unwrap());
let scale = u8::from_be_bytes(bytes[18..19].try_into().unwrap());
let dec = Decimal128 {
precision,
scale,
value,
};
let dec = Decimal128 { scale, value };
Number::Decimal128(dec)
}
33 => {
let value = i256::from_be_bytes(bytes[1..33].try_into().unwrap());
let scale = u8::from_be_bytes(bytes[33..34].try_into().unwrap());
let dec = Decimal256 { scale, value };
Number::Decimal256(dec)
}
34 => {
// Compatible with deprecated Decimal256 formats, including precision
let value = i256::from_be_bytes(bytes[1..33].try_into().unwrap());
let precision = u8::from_be_bytes(bytes[33..34].try_into().unwrap());
let scale = u8::from_be_bytes(bytes[34..35].try_into().unwrap());
let dec = Decimal256 {
precision,
scale,
value,
};
let dec = Decimal256 { scale, value };
Number::Decimal256(dec)
}
_ => {
Expand Down
41 changes: 41 additions & 0 deletions src/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,76 @@ const MONTHS_PER_YEAR: i32 = 12;

const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f";

/// Represents extended JSON value types that are not supported in standard JSON.
///
/// Standard JSON only supports strings, numbers, booleans, null, arrays, and objects.
/// This enum provides additional data types commonly needed in database systems and
/// other applications that require more specialized data representations.
#[derive(Debug, Clone)]
pub enum ExtensionValue<'a> {
/// Binary data (byte array), allowing efficient storage of binary content
/// that would otherwise require base64 encoding in standard JSON
Binary(&'a [u8]),
/// Calendar date without time component (year, month, day)
Date(Date),
/// Timestamp with microsecond precision but without timezone information
Timestamp(Timestamp),
/// Timestamp with microsecond precision and timezone offset information
TimestampTz(TimestampTz),
/// Time interval representation for duration calculations
Interval(Interval),
}

/// Represents a calendar date (year, month, day) without time component.
///
/// The value is stored as days since the Unix epoch (January 1, 1970).
/// This allows for efficient date arithmetic and comparison operations.
/// Standard JSON has no native date type and typically uses ISO 8601 strings.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct Date {
/// Days since Unix epoch (January 1, 1970)
/// Positive values represent dates after the epoch, negative values represent dates before
pub value: i32,
}

/// Represents a timestamp (date and time) without timezone information.
///
/// The value is stored as microseconds since the Unix epoch (January 1, 1970 00:00:00 UTC).
/// This provides microsecond precision for timestamp operations.
/// Standard JSON has no native timestamp type and typically uses ISO 8601 strings.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct Timestamp {
/// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC)
pub value: i64,
}

/// Represents a timestamp with timezone information.
///
/// Combines a timestamp value with a timezone offset, allowing for
/// timezone-aware datetime operations. The timestamp is stored in UTC,
/// and the offset indicates the local timezone.
/// Standard JSON has no native timezone-aware timestamp type.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct TimestampTz {
/// Timezone offset in hours from UTC
pub offset: i8,
/// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC)
pub value: i64,
}

/// Represents a time interval or duration.
///
/// This structure can represent complex time intervals with separate
/// components for months, days, and microseconds, allowing for precise
/// duration calculations that account for calendar irregularities.
/// Standard JSON has no native interval/duration type.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct Interval {
/// Number of months in the interval
pub months: i32,
/// Number of days in the interval
pub days: i32,
/// Number of microseconds in the interval
pub micros: i64,
}

Expand Down
3 changes: 3 additions & 0 deletions src/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ impl<'a> From<Value<'a>> for JsonValue {
Number::Int64(v) => JsonValue::Number(v.into()),
Number::UInt64(v) => JsonValue::Number(v.into()),
Number::Float64(v) => JsonValue::Number(JsonNumber::from_f64(v).unwrap()),
Number::Decimal64(v) => {
JsonValue::Number(JsonNumber::from_f64(v.to_float64()).unwrap())
}
Number::Decimal128(v) => {
JsonValue::Number(JsonNumber::from_f64(v.to_float64()).unwrap())
}
Expand Down
20 changes: 11 additions & 9 deletions src/functions/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,27 @@ impl RawJsonb<'_> {
/// // Type checking
/// let arr_jsonb = "[1, 2, 3]".parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = arr_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "array");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "ARRAY");
///
/// let obj_jsonb = r#"{"a": 1}"#.parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = obj_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "object");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "OBJECT");
///
/// let num_jsonb = "1".parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = num_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "number");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "INTEGER");
///
/// let string_jsonb = r#""hello""#.parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = string_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "string");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "STRING");
///
/// let bool_jsonb = "true".parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = bool_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "boolean");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "BOOLEAN");
///
/// let null_jsonb = "null".parse::<OwnedJsonb>().unwrap();
/// let raw_jsonb = null_jsonb.as_raw();
/// assert_eq!(raw_jsonb.type_of().unwrap(), "null");
/// assert_eq!(raw_jsonb.type_of().unwrap(), "NULL_VALUE");
/// ```
pub fn type_of(&self) -> Result<&'static str> {
let jsonb_item_type = self.jsonb_item_type()?;
Expand All @@ -99,9 +99,11 @@ impl RawJsonb<'_> {
JsonbItem::Number(data) => {
let val = Number::decode(data)?;
match val {
Number::Decimal128(_v) => Ok(TYPE_DECIMAL),
Number::Decimal256(_v) => Ok(TYPE_DECIMAL),
_ => Ok(TYPE_NUMBER),
Number::UInt64(_) | Number::Int64(_) => Ok(TYPE_INTEGER),
Number::Decimal64(_)
| Number::Decimal128(_)
| Number::Decimal256(_) => Ok(TYPE_DECIMAL),
Number::Float64(_) => Ok(TYPE_DOUBLE),
}
}
_ => Err(Error::InvalidJsonb),
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ pub use extension::*;
pub use from::*;
pub use number::Decimal128;
pub use number::Decimal256;
pub use number::Decimal64;
pub use number::Number;
pub use owned::to_owned_jsonb;
pub use owned::OwnedJsonb;
Expand Down
Loading