diff --git a/parquet-variant/Cargo.toml b/parquet-variant/Cargo.toml index 838ca7de8885..954bcb24acde 100644 --- a/parquet-variant/Cargo.toml +++ b/parquet-variant/Cargo.toml @@ -37,5 +37,26 @@ arrow-schema = { workspace = true } chrono = { workspace = true } serde_json = "1.0" base64 = "0.22" +rand = { version = "0.9", default-features = false, features = [ + "std", + "std_rng", + "thread_rng", +] } + +[dev-dependencies] +criterion = { version = "0.6", default-features = false } + [lib] + +[[bin]] +name = "object_list_same_schemas" +path = "./perf/object_list_same_schemas.rs" + +[[bin]] +name = "object_list_unknown_schemas" +path = "./perf/object_list_unknown_schemas.rs" + +[[bench]] +name = "builder" +harness = false diff --git a/parquet-variant/benches/builder.rs b/parquet-variant/benches/builder.rs new file mode 100644 index 000000000000..afa2896ed0c5 --- /dev/null +++ b/parquet-variant/benches/builder.rs @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate parquet_variant; + +use criterion::*; + +use parquet_variant::VariantBuilder; +use rand::{ + distr::{uniform::SampleUniform, Alphanumeric}, + rngs::ThreadRng, + Rng, +}; +use std::{hint, ops::Range}; + +fn random(rng: &mut ThreadRng, range: Range) -> T { + rng.random_range::(range) +} + +// generates a string with a 50/50 chance whether it's a short or a long string +fn random_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(1..128); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +// generates a string guaranteed to be longer than 64 bytes +fn random_long_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(65..200); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +// Creates an object with field names inserted in reverse lexicographical order +fn bench_object_field_names_reverse_order(c: &mut Criterion) { + c.bench_function("bench_object_field_names_reverse_order", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + for i in 0..10_000 { + object_builder.insert( + format!("{}", 10_000 - i).as_str(), + random_string(&mut rng).as_str(), + ); + } + + object_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates a list of objects with the same schema (same field names) +/* + { + name: String, + age: i32, + likes_cilantro: bool, + comments: Long string + dishes: Vec + } +*/ +fn bench_object_list_same_schemas(c: &mut Criterion) { + c.bench_function("bench_object_list_same_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..200 { + let mut object_builder = list_builder.new_object(); + object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", random_long_string(&mut rng).as_str()); + + let mut list_builder = object_builder.new_list("dishes"); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + + list_builder.finish(); + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates a list of variant objects with an undefined schema (random field names) +// values are randomly generated, with an equal distribution to whether it's a String, Object, or List +fn bench_object_list_unknown_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_unknown_schema", |b| { + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..200 { + let mut object_builder = list_builder.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + if rng.random_bool(0.33) { + object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + continue; + } + + if rng.random_bool(0.33) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + inner_object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(random_string(&mut rng).as_str()); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +criterion_group!( + benches, + bench_object_field_names_reverse_order, + bench_object_list_same_schemas, + bench_object_list_unknown_schema, +); +criterion_main!(benches); diff --git a/parquet-variant/benches/variant_builder.rs b/parquet-variant/benches/variant_builder.rs new file mode 100644 index 000000000000..65eede3baf9e --- /dev/null +++ b/parquet-variant/benches/variant_builder.rs @@ -0,0 +1,401 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate parquet_variant; + +use criterion::*; + +use parquet_variant::VariantBuilder; +use rand::{ + distr::{uniform::SampleUniform, Alphanumeric}, + rngs::ThreadRng, + Rng, +}; +use std::{hint, ops::Range}; + +fn random(rng: &mut ThreadRng, range: Range) -> T { + rng.random_range::(range) +} + +// generates a string with a 50/50 chance whether it's a short or a long string +fn random_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(1..128); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +struct RandomStringGenerator { + cursor: usize, + table: Vec, +} + +impl RandomStringGenerator { + pub fn new(rng: &mut ThreadRng, capacity: usize) -> Self { + Self { + cursor: 0, + table: vec![random_string(rng); capacity], + } + } + + pub fn next(&mut self) -> &str { + let this = &self.table[self.cursor]; + + self.cursor = (self.cursor + 1) % self.table.len(); + + this + } +} + +// Creates an object with field names inserted in reverse lexicographical order +fn bench_object_field_names_reverse_order(c: &mut Criterion) { + c.bench_function("bench_object_field_names_reverse_order", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + b.iter(|| { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + for i in 0..50_000 { + object_builder.insert(format!("{}", 1000 - i).as_str(), string_table.next()); + } + + object_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates objects with a homogenous schema (same field names) +/* + { + name: String, + age: i32, + likes_cilantro: bool, + comments: Long string + dishes: Vec + } +*/ +fn bench_object_same_schema(c: &mut Criterion) { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + + c.bench_function("bench_object_same_schema", |b| { + b.iter(|| { + for _ in 0..25_000 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + object_builder.insert("name", string_table.next()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", string_table.next()); + + let mut inner_list_builder = object_builder.new_list("dishes"); + inner_list_builder.append_value(string_table.next()); + inner_list_builder.append_value(string_table.next()); + inner_list_builder.append_value(string_table.next()); + + inner_list_builder.finish(); + object_builder.finish(); + + hint::black_box(variant.finish()); + } + }) + }); +} + +// Creates a list of objects with the same schema (same field names) +/* + { + name: String, + age: i32, + likes_cilantro: bool, + comments: Long string + dishes: Vec + } +*/ +fn bench_object_list_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_same_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 101); + + b.iter(|| { + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..25_000 { + let mut object_builder = list_builder.new_object(); + object_builder.insert("name", string_table.next()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", string_table.next()); + + let mut list_builder = object_builder.new_list("dishes"); + list_builder.append_value(string_table.next()); + list_builder.append_value(string_table.next()); + list_builder.append_value(string_table.next()); + + list_builder.finish(); + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates variant objects with an undefined schema (random field names) +// values are randomly generated, with an equal distribution to whether it's a String, Object, or List +fn bench_object_unknown_schema(c: &mut Criterion) { + c.bench_function("bench_object_unknown_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 1001); + + b.iter(|| { + for _ in 0..200 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + if rng.random_bool(0.33) { + let key = string_table.next(); + object_builder.insert(key, key); + continue; + } + + if rng.random_bool(0.5) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + let key = string_table.next(); + inner_object_builder.insert(key, key); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(string_table.next()); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + hint::black_box(variant.finish()); + } + }) + }); +} + +// Creates a list of variant objects with an undefined schema (random field names) +// values are randomly generated, with an equal distribution to whether it's a String, Object, or List +fn bench_object_list_unknown_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_unknown_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 1001); + + b.iter(|| { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..200 { + let mut object_builder = list_builder.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + let key = string_table.next(); + + if rng.random_bool(0.33) { + object_builder.insert(key, key); + continue; + } + + if rng.random_bool(0.5) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + let key = string_table.next(); + inner_object_builder.insert(key, key); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(key); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +// Creates objects with a homogenous schema (same field names) +/* + { + "id": &[u8], // Following are common across all objects + "span_id: &[u8], + "created": u32, + "ended": u32, + "span_name": String, + + "attributees": { + // following fields are randomized + } + } +*/ +fn bench_object_partially_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_partially_same_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + + b.iter(|| { + let mut rng = rand::rng(); + + for _ in 0..200 { + let mut variant = VariantBuilder::new(); + let mut object_builder = variant.new_object(); + + object_builder.insert( + "id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert( + "span_id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("span_name", string_table.next()); + + { + let mut inner_object_builder = object_builder.new_object("attributes"); + + for _num_fields in 0..random::(&mut rng, 0..100) { + let key = string_table.next(); + inner_object_builder.insert(key, key); + } + inner_object_builder.finish(); + } + + object_builder.finish(); + hint::black_box(variant.finish()); + } + }) + }); +} + +// Creates a list of variant objects with a partially homogenous schema (similar field names) +/* + { + "id": &[u8], // Following are common across all objects + "span_id: &[u8], + "created": u32, + "ended": u32, + "span_name": String, + + "attributees": { + // following fields are randomized + } + } +*/ +fn bench_object_list_partially_same_schema(c: &mut Criterion) { + c.bench_function("bench_object_list_partially_same_schema", |b| { + let mut rng = rand::rng(); + let mut string_table = RandomStringGenerator::new(&mut rng, 117); + + b.iter(|| { + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..100 { + let mut object_builder = list_builder.new_object(); + + object_builder.insert( + "id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert( + "span_id", + random::(&mut rng, 0..i128::MAX) + .to_le_bytes() + .as_slice(), + ); + + object_builder.insert("created", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("ended", random::(&mut rng, 0..u32::MAX) as i32); + object_builder.insert("span_name", string_table.next()); + + { + let mut inner_object_builder = object_builder.new_object("attributes"); + + for _num_fields in 0..random::(&mut rng, 0..100) { + let key = string_table.next(); + inner_object_builder.insert(key, key); + } + inner_object_builder.finish(); + } + + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); + }) + }); +} + +criterion_group!( + benches, + bench_object_field_names_reverse_order, + bench_object_same_schema, + bench_object_list_same_schema, + bench_object_unknown_schema, + bench_object_list_unknown_schema, + bench_object_partially_same_schema, + bench_object_list_partially_same_schema +); + +criterion_main!(benches); diff --git a/parquet-variant/perf/object_list_same_schemas.rs b/parquet-variant/perf/object_list_same_schemas.rs new file mode 100644 index 000000000000..1224d8e5c083 --- /dev/null +++ b/parquet-variant/perf/object_list_same_schemas.rs @@ -0,0 +1,59 @@ +use std::{hint, ops::Range}; + +use parquet_variant::VariantBuilder; +use rand::{ + distr::{uniform::SampleUniform, Alphanumeric}, + rngs::ThreadRng, + Rng, +}; + +fn random(rng: &mut ThreadRng, range: Range) -> T { + rng.random_range::(range) +} + +// generates a string with a 50/50 chance whether it's a short or a long string +fn random_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(1..128); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +// generates a string guaranteed to be longer than 64 bytes +fn random_long_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(65..200); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +fn main() { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..25_000 { + let mut object_builder = list_builder.new_object(); + object_builder.insert("name", random_string(&mut rng).as_str()); + object_builder.insert("age", random::(&mut rng, 18..100) as i32); + object_builder.insert("likes_cilantro", rng.random_bool(0.5)); + object_builder.insert("comments", random_long_string(&mut rng).as_str()); + + let mut list_builder = object_builder.new_list("dishes"); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + list_builder.append_value(random_string(&mut rng).as_str()); + + list_builder.finish(); + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); +} diff --git a/parquet-variant/perf/object_list_unknown_schemas.rs b/parquet-variant/perf/object_list_unknown_schemas.rs new file mode 100644 index 000000000000..eca17a4eb2c9 --- /dev/null +++ b/parquet-variant/perf/object_list_unknown_schemas.rs @@ -0,0 +1,70 @@ +use std::{hint, ops::Range}; + +use parquet_variant::VariantBuilder; +use rand::{ + distr::{uniform::SampleUniform, Alphanumeric}, + rngs::ThreadRng, + Rng, +}; + +fn random(rng: &mut ThreadRng, range: Range) -> T { + rng.random_range::(range) +} + +// generates a string with a 50/50 chance whether it's a short or a long string +fn random_string(rng: &mut ThreadRng) -> String { + let len = rng.random_range::(1..128); + + rng.sample_iter(&Alphanumeric) + .take(len) + .map(char::from) + .collect() +} + +fn main() { + let mut rng = rand::rng(); + + let mut variant = VariantBuilder::new(); + + let mut list_builder = variant.new_list(); + + for _ in 0..1_000 { + let mut object_builder = list_builder.new_object(); + + for _num_fields in 0..random::(&mut rng, 0..100) { + if rng.random_bool(0.33) { + object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + continue; + } + + if rng.random_bool(0.33) { + let mut inner_object_builder = object_builder.new_object("rand_object"); + + for _num_fields in 0..random::(&mut rng, 0..25) { + inner_object_builder.insert( + random_string(&mut rng).as_str(), + random_string(&mut rng).as_str(), + ); + } + inner_object_builder.finish(); + + continue; + } + + let mut inner_list_builder = object_builder.new_list("rand_list"); + + for _num_elements in 0..random::(&mut rng, 0..25) { + inner_list_builder.append_value(random_string(&mut rng).as_str()); + } + + inner_list_builder.finish(); + } + object_builder.finish(); + } + + list_builder.finish(); + hint::black_box(variant.finish()); +} diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index fda15c2b4336..74e4b5492bce 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -16,7 +16,7 @@ // under the License. use crate::decoder::{VariantBasicType, VariantPrimitiveType}; use crate::{ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; -use std::collections::BTreeMap; +use std::collections::HashMap; const BASIC_TYPE_BITS: u8 = 2; const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); @@ -233,14 +233,14 @@ impl ValueBuffer { #[derive(Default)] struct MetadataBuilder { - field_name_to_id: BTreeMap, + field_name_to_id: HashMap, field_names: Vec, } impl MetadataBuilder { /// Upsert field name to dictionary, return its ID fn upsert_field_name(&mut self, field_name: &str) -> u32 { - use std::collections::btree_map::Entry; + use std::collections::hash_map::Entry; match self.field_name_to_id.entry(field_name.to_string()) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { @@ -256,6 +256,10 @@ impl MetadataBuilder { self.field_names.len() } + fn field_name(&self, i: usize) -> &str { + &self.field_names[i] + } + fn metadata_size(&self) -> usize { self.field_names.iter().map(|k| k.len()).sum() } @@ -567,7 +571,8 @@ impl<'a> ListBuilder<'a> { pub struct ObjectBuilder<'a, 'b> { parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder, - fields: BTreeMap, // (field_id, offset) + fields: Vec<(u32, usize)>, // (field_id, offset) + field_id_to_index: HashMap, // (field_id, index to `fields`) buffer: ValueBuffer, /// Is there a pending list or object that needs to be finalized? pending: Option<(&'b str, usize)>, @@ -578,19 +583,35 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { Self { parent_buffer, metadata_builder, - fields: BTreeMap::new(), + fields: Vec::new(), + field_id_to_index: HashMap::new(), buffer: ValueBuffer::default(), pending: None, } } + fn upsert_field(&mut self, field_id: u32, field_start: usize) { + use std::collections::hash_map::Entry; + + match self.field_id_to_index.entry(field_id) { + Entry::Occupied(occupied_entry) => { + let i = *occupied_entry.get(); + self.fields[i] = (field_id, field_start); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(self.fields.len()); + self.fields.push((field_id, field_start)); + } + } + } + fn check_pending_field(&mut self) { let Some((field_name, field_start)) = self.pending.as_ref() else { return; }; let field_id = self.metadata_builder.upsert_field_name(field_name); - self.fields.insert(field_id, *field_start); + self.upsert_field(field_id, *field_start); self.pending = None; } @@ -605,7 +626,7 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let field_id = self.metadata_builder.upsert_field_name(key); let field_start = self.buffer.offset(); - self.fields.insert(field_id, field_start); + self.upsert_field(field_id, field_start); self.buffer.append_non_nested_value(value); } @@ -643,16 +664,15 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let num_fields = self.fields.len(); let is_large = num_fields > u8::MAX as usize; - let field_ids_by_sorted_field_name = self - .metadata_builder - .field_name_to_id - .iter() - .filter_map(|(_, id)| self.fields.contains_key(id).then_some(*id)) - .collect::>(); + self.fields.sort_by(|a, b| { + let key_a = &self.metadata_builder.field_name(a.0 as usize); + let key_b = &self.metadata_builder.field_name(b.0 as usize); + key_a.cmp(key_b) + }); - let max_id = self.fields.keys().last().copied().unwrap_or(0) as usize; + let max_id = self.fields.iter().map(|&(id, _)| id).max().unwrap_or(0); - let id_size = int_size(max_id); + let id_size = int_size(max_id as usize); let offset_size = int_size(data_size); // Write header @@ -664,13 +684,12 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { ); // Write field IDs (sorted order) - for id in &field_ids_by_sorted_field_name { - write_offset(self.parent_buffer.inner_mut(), *id as usize, id_size); + for &(id, _) in &self.fields { + write_offset(self.parent_buffer.inner_mut(), id as usize, id_size); } // Write field offsets - for id in &field_ids_by_sorted_field_name { - let &offset = self.fields.get(id).unwrap(); + for &(_, offset) in &self.fields { write_offset(self.parent_buffer.inner_mut(), offset, offset_size); } @@ -861,75 +880,6 @@ mod tests { assert_eq!(field_ids, vec![1, 2, 0]); } - #[test] - fn test_object_and_metadata_ordering() { - let mut builder = VariantBuilder::new(); - - let mut obj = builder.new_object(); - - obj.insert("zebra", "stripes"); // ID = 0 - obj.insert("apple", "red"); // ID = 1 - - { - // fields_map is ordered by insertion order (field id) - let fields_map = obj.fields.keys().copied().collect::>(); - assert_eq!(fields_map, vec![0, 1]); - - // dict is ordered by field names - let dict_metadata = obj - .metadata_builder - .field_name_to_id - .iter() - .map(|(f, i)| (f.as_str(), *i)) - .collect::>(); - - assert_eq!(dict_metadata, vec![("apple", 1), ("zebra", 0)]); - - // dict_keys is ordered by insertion order (field id) - let dict_keys = obj - .metadata_builder - .field_names - .iter() - .map(|k| k.as_str()) - .collect::>(); - assert_eq!(dict_keys, vec!["zebra", "apple"]); - } - - obj.insert("banana", "yellow"); // ID = 2 - - { - // fields_map is ordered by insertion order (field id) - let fields_map = obj.fields.keys().copied().collect::>(); - assert_eq!(fields_map, vec![0, 1, 2]); - - // dict is ordered by field names - let dict_metadata = obj - .metadata_builder - .field_name_to_id - .iter() - .map(|(f, i)| (f.as_str(), *i)) - .collect::>(); - - assert_eq!( - dict_metadata, - vec![("apple", 1), ("banana", 2), ("zebra", 0)] - ); - - // dict_keys is ordered by insertion order (field id) - let dict_keys = obj - .metadata_builder - .field_names - .iter() - .map(|k| k.as_str()) - .collect::>(); - assert_eq!(dict_keys, vec!["zebra", "apple", "banana"]); - } - - obj.finish(); - - builder.finish(); - } - #[test] fn test_duplicate_fields_in_object() { let mut builder = VariantBuilder::new(); @@ -1242,8 +1192,10 @@ mod tests { /* { "c": { + "b": false, "c": "a" - } + }, + "b": false, } */ @@ -1253,10 +1205,17 @@ mod tests { let mut outer_object_builder = builder.new_object(); { let mut inner_object_builder = outer_object_builder.new_object("c"); + inner_object_builder.insert("b", false); inner_object_builder.insert("c", "a"); + inner_object_builder.finish(); } + outer_object_builder.insert("b", false); + + // note, we can't guarantee an Objects field is sorted by field id. + assert_eq!(outer_object_builder.fields, vec![(1, 0), (0, 10)]); + outer_object_builder.finish(); } @@ -1264,15 +1223,17 @@ mod tests { let variant = Variant::try_new(&metadata, &value).unwrap(); let outer_object = variant.as_object().unwrap(); - assert_eq!(outer_object.len(), 1); - assert_eq!(outer_object.field_name(0).unwrap(), "c"); + assert_eq!(outer_object.len(), 2); + assert_eq!(outer_object.field_name(0).unwrap(), "b"); - let inner_object_variant = outer_object.field(0).unwrap(); + let inner_object_variant = outer_object.field(1).unwrap(); let inner_object = inner_object_variant.as_object().unwrap(); - assert_eq!(inner_object.len(), 1); - assert_eq!(inner_object.field_name(0).unwrap(), "c"); - assert_eq!(inner_object.field(0).unwrap(), Variant::from("a")); + assert_eq!(inner_object.len(), 2); + assert_eq!(inner_object.field_name(0).unwrap(), "b"); + assert_eq!(inner_object.field(0).unwrap(), Variant::from(false)); + assert_eq!(inner_object.field_name(1).unwrap(), "c"); + assert_eq!(inner_object.field(1).unwrap(), Variant::from("a")); } #[test] diff --git a/profile.json.gz b/profile.json.gz new file mode 100644 index 000000000000..d76385376432 Binary files /dev/null and b/profile.json.gz differ