From 453569cd41fdd170b458d39c61252d991c3de105 Mon Sep 17 00:00:00 2001 From: Jialun Zhang Date: Thu, 26 Dec 2024 16:14:33 +0800 Subject: [PATCH 1/2] force to remove wrong commit user --- immut/array/array.mbt | 128 ++++-------- immut/array/operation.mbt | 38 ++++ immut/array/tree.mbt | 424 ++++++++++++++++++++++++++++++++++++-- immut/array/types.mbt | 8 +- 4 files changed, 496 insertions(+), 102 deletions(-) diff --git a/immut/array/array.mbt b/immut/array/array.mbt index 44fb4a7c9..4728b7ab6 100644 --- a/immut/array/array.mbt +++ b/immut/array/array.mbt @@ -49,24 +49,34 @@ pub fn iter[A](self : T[A]) -> Iter[A] { }) } +///| + ///| pub fn T::from_iter[A](iter : Iter[A]) -> T[A] { iter.fold(init=new(), fn(arr, e) { arr.push(e) }) } ///| +/// pub fn length[A](self : T[A]) -> Int { self.size } ///| +/// pub fn copy[A](self : T[A]) -> T[A] { fn copy(t : Tree[A]) -> Tree[A] { match t { Leaf(l) => Leaf(l.copy()) Empty => Empty - Node(node) => - Node(FixedArray::makei(node.length(), fn(i) { copy(node[i]) })) + Node(node, sizes) => + Node( + FixedArray::makei(node.length(), fn(i) { copy(node[i]) }), + match sizes { + Some(sizes) => Some(FixedArray::copy(sizes)) + None => None + }, + ) } } @@ -74,6 +84,7 @@ pub fn copy[A](self : T[A]) -> T[A] { } ///| +/// /// Get a value at the given index. /// /// # Examples @@ -92,6 +103,7 @@ pub fn op_get[A](self : T[A], index : Int) -> A { } ///| +/// /// Set a value at the given index (immutable). /// /// # Example @@ -100,25 +112,15 @@ pub fn op_get[A](self : T[A], index : Int) -> A { /// assert_eq!(v.set(1, 10), @array.of([1, 10, 3, 4, 5])) /// ``` pub fn set[A](self : T[A], index : Int, value : A) -> T[A] { - fn set(i : Int, e, s, t : Tree[A]) -> Tree[A] { - match t { - Leaf(l) => Leaf(immutable_set(l, i & bitmask, e)) - Node(node) => { - let idx = shr_as_uint(i, s) & bitmask - Node(immutable_set(node, idx, set(i, e, s - num_bits, node[idx]))) - } - Empty => abort("Index out of bounds") - } - } - { - tree: set(index, value, self.shift, self.tree), + tree: self.tree.set(index, self.shift, value), size: self.size, shift: self.shift, } } ///| +/// /// Push a value to the end of the array. /// /// # Example @@ -127,22 +129,12 @@ pub fn set[A](self : T[A], index : Int, value : A) -> T[A] { /// assert_eq!(v.push(4), @array.of([1, 2, 3, 4])) /// ``` pub fn push[A](self : T[A], value : A) -> T[A] { - if self.size == (branching_factor << self.shift) { - { - tree: Node([self.tree, new_branch([value], self.shift)]), - size: self.size + 1, - shift: self.shift + num_bits, - } - } else { - { - tree: self.tree.add(self.size, self.shift, value), - size: self.size + 1, - shift: self.shift, - } - } + let (tree, shift) = self.tree.push_end(self.shift, value) + { tree, size: self.size + 1, shift } } ///| +/// /// Create a persistent array from an array. /// /// # Example @@ -155,6 +147,7 @@ pub fn T::from_array[A](arr : Array[A]) -> T[A] { } ///| +/// /// Iterate over the array. /// /// # Example @@ -165,18 +158,11 @@ pub fn T::from_array[A](arr : Array[A]) -> T[A] { /// assert_eq!(arr, [1, 2, 3, 4, 5]) /// ``` pub fn each[A](self : T[A], f : (A) -> Unit) -> Unit { - fn go(t : Tree[A]) -> Unit { - match t { - Empty => () - Leaf(l) => l.each(f) - Node(n) => n.each(fn(t) { go(t) }) - } - } - - go(self.tree) + self.tree.each(f) } ///| +/// /// Iterate over the array with index. /// /// # Example @@ -187,33 +173,17 @@ pub fn each[A](self : T[A], f : (A) -> Unit) -> Unit { /// assert_eq!(arr, [0, 2, 6, 12, 20]) /// ``` pub fn eachi[A](self : T[A], f : (Int, A) -> Unit) -> Unit { - fn go(t : Tree[A], shift : Int, start : Int) -> Unit { - match t { - Empty => () - Leaf(l) => - for i = 0; i < l.length(); i = i + 1 { - f(start + i, l[i]) - } - Node(n) => { - let child_shift = shift - num_bits - let mut start = start - for i = 0; i < n.length(); i = i + 1 { - go(n[i], child_shift, start) - start += 1 << shift - } - } - } - } - - go(self.tree, self.shift, 0) + self.tree.eachi(f, self.shift, 0) } ///| +/// pub impl[A : Eq] Eq for T[A] with op_equal(self, other) { self.size == other.size && self.tree == other.tree } ///| +/// /// Fold the array. /// /// # Example @@ -222,18 +192,11 @@ pub impl[A : Eq] Eq for T[A] with op_equal(self, other) { /// assert_eq!(v.fold(fn(a, b) { a + b }, init=0), 15) /// ``` pub fn fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { - fn go(t : Tree[A], acc : B) -> B { - match t { - Empty => acc - Leaf(l) => l.fold(f, init=acc) - Node(n) => n.fold(fn(t, acc) { go(acc, t) }, init=acc) - } - } - - go(self.tree, init) + self.tree.fold(init, f) } ///| +/// /// Fold the array in reverse order. /// /// # Example @@ -242,18 +205,11 @@ pub fn fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { /// assert_eq!(v.rev_fold(fn(a, b) { a + b }, init=0), 15) /// ``` pub fn rev_fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { - fn go(t : Tree[A], acc : B) -> B { - match t { - Empty => acc - Leaf(l) => l.rev_fold(f, init=acc) - Node(n) => n.rev_fold(fn(t, acc) { go(acc, t) }, init=acc) - } - } - - go(self.tree, init) + self.tree.rev_fold(init, f) } ///| +/// /// Fold the array from left to right. /// /// # Example @@ -267,6 +223,7 @@ pub fn fold_left[A](self : T[A], f : (A, A) -> A, init~ : A) -> A { } ///| +/// /// Fold the array from right to left. /// /// # Example @@ -280,6 +237,7 @@ pub fn fold_right[A](self : T[A], f : (A, A) -> A, init~ : A) -> A { } ///| +/// /// Map a function over the array. /// /// # Example @@ -288,18 +246,11 @@ pub fn fold_right[A](self : T[A], f : (A, A) -> A, init~ : A) -> A { /// assert_eq!(v.map(fn(e) { e * 2 }), @array.of([2, 4, 6, 8, 10])) /// ``` pub fn map[A, B](self : T[A], f : (A) -> B) -> T[B] { - fn go(t : Tree[A]) -> Tree[B] { - match t { - Empty => Empty - Leaf(l) => Leaf(l.map(f)) - Node(n) => Node(FixedArray::makei(n.length(), fn(i) { go(n[i]) })) - } - } - - { tree: go(self.tree), size: self.size, shift: self.shift } + { tree: self.tree.map(f), size: self.size, shift: self.shift } } ///| +/// fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { fn tree(cap, len, s) -> Tree[A] { if cap == branching_factor { @@ -317,7 +268,9 @@ fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { tree(cap / branching_factor, len, i) } - Node(FixedArray::makei(child_count, child)) + // Use None here because the implementation of `new_by_leaves` ensures that the tree is full + // and we can use radix indexing. + Node(FixedArray::makei(child_count, child), None) } } @@ -353,23 +306,27 @@ test "new_by_leaves" { } ///| +/// /// Create a persistent array with a given length and value. pub fn T::make[A](len : Int, value : A) -> T[A] { new_by_leaves(len, fn(_s, l) { FixedArray::make(l, value) }) } ///| +/// /// Create a persistent array with a given length and a function to generate values. pub fn T::makei[A](len : Int, f : (Int) -> A) -> T[A] { new_by_leaves(len, fn(s, l) { FixedArray::makei(l, fn(i) { f(s + i) }) }) } ///| +/// pub fn T::of[A](arr : FixedArray[A]) -> T[A] { makei(arr.length(), fn(i) { arr[i] }) } ///| +/// pub impl[X : @quickcheck.Arbitrary] @quickcheck.Arbitrary for T[X] with arbitrary( size, rs @@ -378,6 +335,7 @@ pub impl[X : @quickcheck.Arbitrary] @quickcheck.Arbitrary for T[X] with arbitrar } ///| +/// pub impl[A : Hash] Hash for T[A] with hash_combine(self, hasher) { for e in self { hasher.combine(e) @@ -404,8 +362,8 @@ test "mix" { v2.each(fn(e) { ct = ct + e }) inspect!(ct, content="14850") v2 = v2.map(fn(e) { e * 2 }) - let ct1 = fold(v2, fn(a, b) { a + b }, init=0) - let ct2 = rev_fold(v2, fn(a, b) { a + b }, init=0) + let ct1 = v2.fold(fn(a, b) { a + b }, init=0) + let ct2 = v2.rev_fold(fn(a, b) { a + b }, init=0) inspect!(ct1, content="19800") inspect!(ct2, content="19800") inspect!(v.tree.is_empty_tree(), content="false") diff --git a/immut/array/operation.mbt b/immut/array/operation.mbt index d61d360a1..9cf85783b 100644 --- a/immut/array/operation.mbt +++ b/immut/array/operation.mbt @@ -13,6 +13,7 @@ // limitations under the License. ///| +/// Set the value at the given index. This operation is O(n). fn immutable_set[T](arr : FixedArray[T], i : Int, v : T) -> FixedArray[T] { let arr = arr.copy() arr[i] = v @@ -20,6 +21,7 @@ fn immutable_set[T](arr : FixedArray[T], i : Int, v : T) -> FixedArray[T] { } ///| +/// Add an element to the end of the array. This operation is O(n). fn immutable_push[T](arr : FixedArray[T], val : T) -> FixedArray[T] { let len = arr.length() let new_arr = FixedArray::make(len + 1, val) @@ -32,3 +34,39 @@ fn immutable_push[T](arr : FixedArray[T], val : T) -> FixedArray[T] { fn shr_as_uint(x : Int, y : Int) -> Int { (x.reinterpret_as_uint() >> y).reinterpret_as_int() } + +///| +/// Given an index and a shift, return the index of the branch that contains the given index. +fn radix_indexing(index : Int, shift : Int) -> Int { + shr_as_uint(index, shift) & bitmask +} + +///| +/// +/// Get the index of the branch that contains the given index. +/// For example, if the sizes are [0, 3, 6, 10] and the index is 5, the function should return 2. +fn get_branch_index(sizes : FixedArray[Int], index : Int) -> Int { + let mut lo = 0 + let mut hi = sizes.length() + while LINEAR_THRESHOLD < hi - lo { + let mid = (lo + hi) / 2 + if sizes[mid] <= index { + lo = mid + } else { + hi = mid + } + } + while sizes[lo] <= index { + lo += 1 + } + lo +} + +///| +/// Copy the sizes array. +fn copy_sizes(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => Some(sizes.copy()) + None => None + } +} diff --git a/immut/array/tree.mbt b/immut/array/tree.mbt index 7301ca4c0..f1be860bc 100644 --- a/immut/array/tree.mbt +++ b/immut/array/tree.mbt @@ -16,11 +16,16 @@ let num_bits = 5 ///| +/// Invariant: `branching_factor` is a power of 2. let branching_factor : Int = 1 << num_bits ///| let bitmask : Int = branching_factor - 1 +///| +/// The threshold for switching to a linear search. +const LINEAR_THRESHOLD : Int = 4 + ///| fn Tree::empty[T]() -> Tree[T] { Tree::Empty @@ -30,7 +35,7 @@ fn Tree::empty[T]() -> Tree[T] { fn get_first[T](self : Tree[T]) -> T { match self { Leaf(leaf) => leaf[0] - Node(node) => get_first(node[0]) + Node(node, _) => get_first(node[0]) // TODO: prove that this should always be non-zero Empty => abort("Index out of bounds") } } @@ -39,17 +44,73 @@ fn get_first[T](self : Tree[T]) -> T { fn get_last[T](self : Tree[T]) -> T { match self { Leaf(leaf) => leaf[leaf.length() - 1] - Node(node) => get_last(node[node.length() - 1]) + Node(node, _) => get_last(node[node.length() - 1]) // TODO: prove that this should always be non-zero Empty => abort("Index out of bounds") } } ///| fn get[T](self : Tree[T], index : Int, shift : Int) -> T { + fn get_radix(node : Tree[T], shift : Int) -> T { + match node { + Leaf(leaf) => leaf[index & bitmask] + Node(node, None) => + get_radix(node[radix_indexing(index, shift)], shift - num_bits) + Node(_, Some(_)) => + abort("Unreachable: Node should not have sizes in get_radix") + Empty => abort("Index out of bounds") + } + } + + match self { + Leaf(leaf) => leaf[index] + Node(children, Some(sizes)) => { + let branch_index = get_branch_index(sizes, index) + let sub_index = index - sizes[branch_index] + get(children[branch_index], sub_index, shift - num_bits) + } + Node(_, None) => get_radix(self, shift) + Empty => abort("Index out of bounds") + } +} + +///| +fn set[T](self : Tree[T], index : Int, shift : Int, value : T) -> Tree[T] { + fn set_radix(node : Tree[T], shift : Int) -> Tree[T] { + match node { + Leaf(leaf) => Leaf(immutable_set(leaf, index & bitmask, value)) + Node(node, None) => { + let sub_idx = radix_indexing(index, shift) + Node( + immutable_set( + node, + sub_idx, + set_radix(node[radix_indexing(index, shift)], shift - num_bits), + ), + None, + ) + } + Node(_, Some(_)) => + abort("Unreachable: Node should not have sizes in set_radix") + Empty => abort("Index out of bounds") + } + } + match self { - Leaf(leaf) => leaf[index & bitmask] - Node(node) => - get(node[shr_as_uint(index, shift) & bitmask], index, shift - num_bits) + Leaf(leaf) => Leaf(immutable_set(leaf, index & bitmask, value)) + Node(children, Some(sizes)) => { + let branch_index = get_branch_index(sizes, index) + let sub_index = index - sizes[branch_index] + Node( + immutable_set( + children, + branch_index, + children[branch_index].set(sub_index, shift - num_bits, value), + ), + Some(sizes.copy()), + ) + } + Node(_children, None) => set_radix(self, shift) Empty => abort("Index out of bounds") } } @@ -63,25 +124,356 @@ fn is_empty_tree[T](self : Tree[T]) -> Bool { } ///| -fn new_branch[T](leaf : FixedArray[T], shift : Int) -> Tree[T] { +/// Create a new tree with a single leaf. Note that the resulting tree is a left-skewed tree. +fn new_branch_left[T](leaf : FixedArray[T], shift : Int) -> Tree[T] { match shift { 0 => Leaf(leaf) - s => Node([new_branch(leaf, s - num_bits)]) + s => Node([new_branch_left(leaf, s - num_bits)], None) // size is None because we can use radix indexing + } +} + +///| +/// Push a value to the end of the tree. +/// Precondition: +/// - The height of `self` = `shift` / `num_bits` (the height starts from 0). +/// - `length` is the number of elements in the tree. +fn push_end[T](self : Tree[T], shift : Int, value : T) -> (Tree[T], Int) { + fn update_sizes_last(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => { + let new_sizes = sizes.copy() + new_sizes[new_sizes.length() - 1] += 1 + Some(new_sizes) + } + None => None + } + } + + fn push_sizes_last(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => Some(immutable_push(sizes, 1)) + None => None + } + } + + fn worker(node : Tree[T], shift : Int) -> Tree[T]? { + match node { + Leaf(leaf) => { + if shift != 0 { + abort( + "Unreachable: Leaf should not have a non-zero shift, which means we have not reached the bottom of the tree", + ) + } + if leaf.length() < branching_factor { + Some(Leaf(immutable_push(leaf, value))) + } else { + None + } + } + Node(nodes, sizes) => { + let len = nodes.length() + match worker(nodes[len - 1], shift - num_bits) { + // We have successfully pushed the value, now duplicate its ancestor nodes. + Some(new_node) => { + let new_nodes = nodes.copy() + new_nodes[len - 1] = new_node + Some(Node(new_nodes, update_sizes_last(sizes))) + } + // We need to create a new node to push the value. + None => + if len < branching_factor { + Some( + Node( + immutable_push( + nodes, + new_branch_left([value], shift - num_bits), + ), + push_sizes_last(sizes), + ), + ) + } else { + None + } + } + } + Empty => Some(Leaf([value])) + } + } + + match worker(self, shift) { + Some(new_tree) => (new_tree, shift) + None => { + let new_branch = new_branch_left([value], shift) + ( + match self { + Leaf(_leaf) => Node([self, new_branch], None) + Node(_nodes, Some(sizes)) => + Node([self, new_branch], Some([sizes[sizes.length() - 1], 1])) + Node(_nodes, None) => Node([self, new_branch], None) + Empty => + abort( + "Unreachable: Empty tree should have fallen into the Some(new_tree) branch", + ) + }, + shift + num_bits, + ) + } } } ///| -fn add[T](self : Tree[T], index : Int, shift : Int, value : T) -> Tree[T] { +/// +/// For each element in the tree, apply the function `f`. +fn each[A](self : Tree[A], f : (A) -> Unit) -> Unit { match self { - Leaf(l) => Leaf(immutable_push(l, value)) - Node(n) => { - let idx = shr_as_uint(index, shift) & bitmask - if idx < n.length() { - Node(immutable_set(n, idx, n[idx].add(index, shift - num_bits, value))) - } else { - Node(immutable_push(n, new_branch([value], shift - num_bits))) + Empty => () + Leaf(l) => l.each(f) + Node(ns, _) => ns.each(fn(t) { t.each(f) }) + } +} + +///| +/// +/// For each element in the tree, apply the function `f` with the index of the element. +fn eachi[A]( + self : Tree[A], + f : (Int, A) -> Unit, + shift : Int, + start : Int +) -> Unit { + match self { + Empty => () + Leaf(l) => + for i = 0; i < l.length(); i = i + 1 { + f(start + i, l[i]) + } + Node(ns, None) => { + let child_shift = shift - num_bits + let mut start = start + for i = 0; i < ns.length(); i = i + 1 { + ns[i].eachi(f, child_shift, start) + start += 1 << shift + } + } + Node(ns, Some(sizes)) => { + let child_shift = shift - num_bits + let mut start = start + for i = 0; i < ns.length(); i = i + 1 { + ns[i].eachi(f, child_shift, start) + start += sizes[i] } } - Empty => Leaf([value]) + } +} + +///| +/// Fold the tree. +fn fold[A, B](self : Tree[A], acc : B, f : (B, A) -> B) -> B { + match self { + Empty => acc + Leaf(l) => l.fold(f, init=acc) + Node(n, _) => n.fold(fn(acc, t) { t.fold(acc, f) }, init=acc) + } +} + +///| +/// Fold the tree in reverse order. +fn rev_fold[A, B](self : Tree[A], acc : B, f : (B, A) -> B) -> B { + match self { + Empty => acc + Leaf(l) => l.rev_fold(f, init=acc) + Node(n, _) => n.rev_fold(fn(acc, t) { t.rev_fold(acc, f) }, init=acc) + } +} + +///| +/// Map the tree. +fn map[A, B](self : Tree[A], f : (A) -> B) -> Tree[B] { + match self { + Empty => Empty + Leaf(l) => Leaf(l.map(f)) + Node(n, szs) => + Node( + FixedArray::makei(n.length(), fn(i) { n[i].map(f) }), + copy_sizes(szs), + ) + } +} + +///| +fn concat[A]( + left : Tree[A], + left_shift : Int, + right : Tree[A], + right_shift : Int, + top : Bool +) -> (Tree[A], Int) { + if left_shift > right_shift { + let (c, c_shift) = concat( + left.right_child(), + left_shift - num_bits, + right, + right_shift, + false, + ) + return rebalance(left, left_shift, c, c_shift, Empty, 0, false), + } else if right_shift > left_shift { + let (c, c_shift) = concat( + left, + left_shift, + right.left_child(), + right_shift - num_bits, + false, + ) + return rebalance(Empty, 0, c, c_shift, right, right_shift, false) + } + else { + + if left_shift == 0 { + let left_elems = left.leaf_elements() + let right_elems = right.leaf_elements() + let left_len = left_elems.length() + let right_len = right_elems.length() + let len = left_len+right_len + let children = { + if top && len <= branching_factor{ + [Leaf(FixedArray::makei(len, fn (i: Int) { if i < left_len {left_elems[i]} else {right_elems[i-left_len]}}))] + } + else { + [left, right] + } + } + return Node(children, None) + } else { + let c = concat(left.right_child(), left_child - num_bits, right.left_child(), right_child - num_bits, false) + return rebalance() + } + } +} + +///| +/// Given three `Node`s of the same height, rebalance them into two. +fn rebalance[A]( + left : Tree[A], + center : Tree[A], + right : Tree[A], + shift : Int, + top : Bool +) -> Tree[A] { + let t = merge(left, center, right) + +} + +///| +/// Given three trees of the same height (if not `Empty`), merge them into one. +/// `left` and `right` might be `Node` or `Empty`. +/// `center` is always a `Node`. +/// The resulting array might be longer than `branching_factor`, +/// which will be handled by `rebalance` later. +fn merge[A]( + left : Tree[A], + center : Tree[A], + right : Tree[A] +) -> FixedArray[Tree[A]] { + if left.is_leaf() || not(center.is_node()) || right.is_leaf() { + abort("Unreachable: input to merge is invalid") + } + fn get_children(self : Tree[A]) -> FixedArray[Tree[A]] { + match self { + Node(children, _) => children + Empty => [] + Leaf(_) => abort("Unreachable") + } + } + + let left_children = get_children(left) + let center_children = get_children(center) + let right_children = get_children(right) + let left_len = left_children.length() + let center_len = center_children.length() + let right_len = right_children.length() + FixedArray::makei(left_len + center_len + right_len, fn(i) { + if i < left_len { + left_children[i] + } else if i < left_len + center_len { + center_children[i - left_len] + } else { + right_children[i - left_len - center_len] + } + }) +} + +///| +fn concat_plan[A](t : FixedArray[Tree[A]]) -> (FixedArray[Int], Int) { + let c = FixedArray::make(t.length(), fn(i : Int) { t[i].num_children() }) + let S = c.fold(init=0, fn(acc, x) { acc + x }) + // round up to the nearest integer of S/branching_factor + let opt_len = (S + branching_factor - 1) / branching_factor + let mut n = t.length() + let mut i = 0 + + +} + +///| +fn is_node[A](self : Tree[A]) -> Bool { + match self { + Node(_, _) => true + _ => false + } +} + +///| +fn is_leaf[A](self : Tree[A]) -> Bool { + match self { + Leaf(_) => true + _ => false + } +} + +///| +fn is_empty[A](self : Tree[A]) -> Bool { + match self { + Empty => true + _ => false + } +} + +///| +/// Get the rightmost child of a tree node. Abort if +/// it is not a `Node`. +fn right_child[A](self : Tree[A]) -> Tree[A] { + match self { + Node(children, _) => children[-1] + Leaf(_) | Empty => abort("Should not get children on non-`Node`s") + } +} + +///| +/// Get the leftmost child of a tree node. Abort if +/// it is not a `Node`. +fn left_child[A](self : Tree[A]) -> Tree[A] { + match self { + Node(children, _) => children[0] + Leaf(_) | Empty => abort("Should not get children on non-`Node`s") + } +} + +///| +/// Get the leaf contents. Abort if it is not a `Leaf`. +fn leaf_elements[A](self : Tree[A]) -> FixedArray[A] { + match self { + Leaf(children) => children + _ => abort("Should not call `get_leaf_elements` on non-leaf nodes") + } +} + +///| +/// Get the length of the current node, not the total number of elements in the tree. +fn num_children[A](self : Tree[A]) -> Int { + match self { + Empty => 0 + Leaf(l) => l.length() + Node(children, _) => children.length() } } diff --git a/immut/array/types.mbt b/immut/array/types.mbt index 8e9a9cb1d..e9688c7d4 100644 --- a/immut/array/types.mbt +++ b/immut/array/types.mbt @@ -13,6 +13,10 @@ // limitations under the License. ///| +/// Invariants: +/// - `shift` = tree height * `num_bits`. When it is 0, we are at the leaf level. +/// - `size` = the number of elements in the tree. +/// - `shift` is not used when `tree` is `Empty`. struct T[A] { tree : Tree[A] size : Int @@ -20,8 +24,10 @@ struct T[A] { } ///| +/// Invariants: +/// - For `Node`, the sizes array is `None` if the tree is full, i.e., we can use radix indexing. priv enum Tree[A] { Empty - Node(FixedArray[Tree[A]]) + Node(FixedArray[Tree[A]], FixedArray[Int]?) // (Subtrees, Sizes of subtrees) Leaf(FixedArray[A]) } derive(Eq, Show) From c6b5a25e757bbe420d47edd0c2bc27b0b8a29223 Mon Sep 17 00:00:00 2001 From: Jialun Zhang Date: Thu, 26 Dec 2024 17:31:22 +0800 Subject: [PATCH 2/2] sync --- immut/array/tree.mbt | 48 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/immut/array/tree.mbt b/immut/array/tree.mbt index f1be860bc..1770bd044 100644 --- a/immut/array/tree.mbt +++ b/immut/array/tree.mbt @@ -26,6 +26,14 @@ let bitmask : Int = branching_factor - 1 /// The threshold for switching to a linear search. const LINEAR_THRESHOLD : Int = 4 +///| +/// The $e_{max}$ parameter of the search step invariant. +const E_MAX : Int = 2 + +///| +/// $e_{max} / 2$. +let e_max_2 : Int = E_MAX / 2 + ///| fn Tree::empty[T]() -> Tree[T] { Tree::Empty @@ -405,14 +413,35 @@ fn merge[A]( ///| fn concat_plan[A](t : FixedArray[Tree[A]]) -> (FixedArray[Int], Int) { - let c = FixedArray::make(t.length(), fn(i : Int) { t[i].num_children() }) - let S = c.fold(init=0, fn(acc, x) { acc + x }) + let mut node_counts = FixedArray::makei(t.length(), fn { + i => t[i].num_children() + }) + let total_nodes = node_counts.fold(init=0, fn { acc, x => acc + x }) // round up to the nearest integer of S/branching_factor - let opt_len = (S + branching_factor - 1) / branching_factor - let mut n = t.length() + let opt_len = (total_nodes + branching_factor - 1) / branching_factor + let mut new_len = t.length() let mut i = 0 - + while opt_len + e_max_2 < new_len { + // Skip over all nodes satisfying the invariant. + while node_counts[i] > branching_factor - e_max_2 { + i += 1 + } + // Found short node, so redistribute over the next nodes + let mut remaining_nodes = node_counts[i] + while remaining_nodes > 0 { + let min_size = min(remaining_nodes + node_counts[i + 1], branching_factor) + node_counts[i] = min_size + remaining_nodes = remaining_nodes + node_counts[i + 1] - min_size + i += 1 + } + for j = i; j < new_len - 1; j = j + 1 { + node_counts[j] = node_counts[j + 1] + } + new_len -= 1 + i -= 1 + } + return (node_counts, new_len) } ///| @@ -477,3 +506,12 @@ fn num_children[A](self : Tree[A]) -> Int { Node(children, _) => children.length() } } + +///| +fn min(a : Int, b : Int) -> Int { + if a < b { + a + } else { + b + } +}